/usr/include/unicode/colldata.h is in libicu-dev 4.8.1.1-3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 | /*
******************************************************************************
* Copyright (C) 1996-2010, International Business Machines *
* Corporation and others. All Rights Reserved. *
******************************************************************************
*/
/**
* \file
* \brief C++ API: Collation data used to compute minLengthInChars.
* \internal
*/
#ifndef COLL_DATA_H
#define COLL_DATA_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_COLLATION
#include "unicode/uobject.h"
#include "unicode/ucol.h"
U_NAMESPACE_BEGIN
/**
* The size of the internal buffer for the Collator's short description string.
* @internal ICU 4.0.1 technology preview
*/
#define KEY_BUFFER_SIZE 64
/**
* The size of the internal CE buffer in a <code>CEList</code> object
* @internal ICU 4.0.1 technology preview
*/
#define CELIST_BUFFER_SIZE 4
/**
* \def INSTRUMENT_CELIST
* Define this to enable the <code>CEList</code> objects to collect
* statistics.
* @internal ICU 4.0.1 technology preview
*/
//#define INSTRUMENT_CELIST
/**
* The size of the initial list in a <code>StringList</code> object.
* @internal ICU 4.0.1 technology preview
*/
#define STRING_LIST_BUFFER_SIZE 16
/**
* \def INSTRUMENT_STRING_LIST
* Define this to enable the <code>StringList</code> objects to
* collect statistics.
* @internal ICU 4.0.1 technology preview
*/
//#define INSTRUMENT_STRING_LIST
/**
* This object holds a list of CEs generated from a particular
* <code>UnicodeString</code>
*
* @internal ICU 4.0.1 technology preview
*/
class U_I18N_API CEList : public UObject
{
public:
/**
* Construct a <code>CEList</code> object.
*
* @param coll - the Collator used to collect the CEs.
* @param string - the string for which to collect the CEs.
* @param status - will be set if any errors occur.
*
* Note: if on return, status is set to an error code,
* the only safe thing to do with this object is to call
* the destructor.
*
* @internal ICU 4.0.1 technology preview
*/
CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status);
/**
* The destructor.
* @internal ICU 4.0.1 technology preview
*/
~CEList();
/**
* Return the number of CEs in the list.
*
* @return the number of CEs in the list.
*
* @internal ICU 4.0.1 technology preview
*/
int32_t size() const;
/**
* Get a particular CE from the list.
*
* @param index - the index of the CE to return
*
* @return the CE, or <code>0</code> if <code>index</code> is out of range
*
* @internal ICU 4.0.1 technology preview
*/
uint32_t get(int32_t index) const;
/**
* Check if the CEs in another <code>CEList</code> match the
* suffix of this list starting at a give offset.
*
* @param offset - the offset of the suffix
* @param other - the other <code>CEList</code>
*
* @return <code>TRUE</code> if the CEs match, <code>FALSE</code> otherwise.
*
* @internal ICU 4.0.1 technology preview
*/
UBool matchesAt(int32_t offset, const CEList *other) const;
/**
* The index operator.
*
* @param index - the index
*
* @return a reference to the given CE in the list
*
* @internal ICU 4.0.1 technology preview
*/
uint32_t &operator[](int32_t index) const;
/**
* UObject glue...
* @internal ICU 4.0.1 technology preview
*/
virtual UClassID getDynamicClassID() const;
/**
* UObject glue...
* @internal ICU 4.0.1 technology preview
*/
static UClassID getStaticClassID();
private:
void add(uint32_t ce, UErrorCode &status);
uint32_t ceBuffer[CELIST_BUFFER_SIZE];
uint32_t *ces;
int32_t listMax;
int32_t listSize;
#ifdef INSTRUMENT_CELIST
static int32_t _active;
static int32_t _histogram[10];
#endif
};
/**
* StringList
*
* This object holds a list of <code>UnicodeString</code> objects.
*
* @internal ICU 4.0.1 technology preview
*/
class U_I18N_API StringList : public UObject
{
public:
/**
* Construct an empty <code>StringList</code>
*
* @param status - will be set if any errors occur.
*
* Note: if on return, status is set to an error code,
* the only safe thing to do with this object is to call
* the destructor.
*
* @internal ICU 4.0.1 technology preview
*/
StringList(UErrorCode &status);
/**
* The destructor.
*
* @internal ICU 4.0.1 technology preview
*/
~StringList();
/**
* Add a string to the list.
*
* @param string - the string to add
* @param status - will be set if any errors occur.
*
* @internal ICU 4.0.1 technology preview
*/
void add(const UnicodeString *string, UErrorCode &status);
/**
* Add an array of Unicode code points to the list.
*
* @param chars - the address of the array of code points
* @param count - the number of code points in the array
* @param status - will be set if any errors occur.
*
* @internal ICU 4.0.1 technology preview
*/
void add(const UChar *chars, int32_t count, UErrorCode &status);
/**
* Get a particular string from the list.
*
* @param index - the index of the string
*
* @return a pointer to the <code>UnicodeString</code> or <code>NULL</code>
* if <code>index</code> is out of bounds.
*
* @internal ICU 4.0.1 technology preview
*/
const UnicodeString *get(int32_t index) const;
/**
* Get the number of stings in the list.
*
* @return the number of strings in the list.
*
* @internal ICU 4.0.1 technology preview
*/
int32_t size() const;
/**
* the UObject glue...
* @internal ICU 4.0.1 technology preview
*/
virtual UClassID getDynamicClassID() const;
/**
* the UObject glue...
* @internal ICU 4.0.1 technology preview
*/
static UClassID getStaticClassID();
private:
UnicodeString *strings;
int32_t listMax;
int32_t listSize;
#ifdef INSTRUMENT_STRING_LIST
static int32_t _lists;
static int32_t _strings;
static int32_t _histogram[101];
#endif
};
/*
* Forward references to internal classes.
*/
class StringToCEsMap;
class CEToStringsMap;
class CollDataCache;
/**
* CollData
*
* This class holds the Collator-specific data needed to
* compute the length of the shortest string that can
* generate a partcular list of CEs.
*
* <code>CollData</code> objects are quite expensive to compute. Because
* of this, they are cached. When you call <code>CollData::open</code> it
* returns a reference counted cached object. When you call <code>CollData::close</code>
* the reference count on the object is decremented but the object is not deleted.
*
* If you do not need to reuse any unreferenced objects in the cache, you can call
* <code>CollData::flushCollDataCache</code>. If you no longer need any <code>CollData</code>
* objects, you can call <code>CollData::freeCollDataCache</code>
*
* @internal ICU 4.0.1 technology preview
*/
class U_I18N_API CollData : public UObject
{
public:
/**
* Construct a <code>CollData</code> object.
*
* @param collator - the collator
* @param status - will be set if any errors occur.
*
* @return the <code>CollData</code> object. You must call
* <code>close</code> when you are done using the object.
*
* Note: if on return, status is set to an error code,
* the only safe thing to do with this object is to call
* <code>CollData::close</code>.
*
* @internal ICU 4.0.1 technology preview
*/
static CollData *open(UCollator *collator, UErrorCode &status);
/**
* Release a <code>CollData</code> object.
*
* @param collData - the object
*
* @internal ICU 4.0.1 technology preview
*/
static void close(CollData *collData);
/**
* Get the <code>UCollator</code> object used to create this object.
* The object returned may not be the exact object that was used to
* create this object, but it will have the same behavior.
* @internal ICU 4.0.1 technology preview
*/
UCollator *getCollator() const;
/**
* Get a list of all the strings which generate a list
* of CEs starting with a given CE.
*
* @param ce - the CE
*
* return a <code>StringList</code> object containing all
* the stirngs, or <code>NULL</code> if there are
* no such strings.
*
* @internal ICU 4.0.1 technology preview.
*/
const StringList *getStringList(int32_t ce) const;
/**
* Get a list of the CEs generated by a partcular stirng.
*
* @param string - the string
*
* @return a <code>CEList</code> object containt the CEs. You
* must call <code>freeCEList</code> when you are finished
* using the <code>CEList</code>/
*
* @internal ICU 4.0.1 technology preview.
*/
const CEList *getCEList(const UnicodeString *string) const;
/**
* Release a <code>CEList</code> returned by <code>getCEList</code>.
*
* @param list - the <code>CEList</code> to free.
*
* @internal ICU 4.0.1 technology preview
*/
void freeCEList(const CEList *list);
/**
* Return the length of the shortest string that will generate
* the given list of CEs.
*
* @param ces - the CEs
* @param offset - the offset of the first CE in the list to use.
*
* @return the length of the shortest string.
*
* @internal ICU 4.0.1 technology preview
*/
int32_t minLengthInChars(const CEList *ces, int32_t offset) const;
/**
* Return the length of the shortest string that will generate
* the given list of CEs.
*
* Note: the algorithm used to do this computation is recursive. To
* limit the amount of recursion, a "history" list is used to record
* the best answer starting at a particular offset in the list of CEs.
* If the same offset is visited again during the recursion, the answer
* in the history list is used.
*
* @param ces - the CEs
* @param offset - the offset of the first CE in the list to use.
* @param history - the history list. Must be at least as long as
* the number of cEs in the <code>CEList</code>
*
* @return the length of the shortest string.
*
* @internal ICU 4.0.1 technology preview
*/
int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history) const;
/**
* UObject glue...
* @internal ICU 4.0.1 technology preview
*/
virtual UClassID getDynamicClassID() const;
/**
* UObject glue...
* @internal ICU 4.0.1 technology preview
*/
static UClassID getStaticClassID();
/**
* <code>CollData</code> objects are expensive to compute, and so
* may be cached. This routine will free the cached objects and delete
* the cache.
*
* WARNING: Don't call this until you are have called <code>close</code>
* for each <code>CollData</code> object that you have used. also,
* DO NOT call this if another thread may be calling <code>flushCollDataCache</code>
* at the same time.
*
* @internal 4.0.1 technology preview
*/
static void freeCollDataCache();
/**
* <code>CollData</code> objects are expensive to compute, and so
* may be cached. This routine will remove any unused <code>CollData</code>
* objects from the cache.
*
* @internal 4.0.1 technology preview
*/
static void flushCollDataCache();
private:
friend class CollDataCache;
friend class CollDataCacheEntry;
CollData(UCollator *collator, char *cacheKey, int32_t cachekeyLength, UErrorCode &status);
~CollData();
CollData();
static char *getCollatorKey(UCollator *collator, char *buffer, int32_t bufferLength);
static CollDataCache *getCollDataCache();
UCollator *coll;
StringToCEsMap *charsToCEList;
CEToStringsMap *ceToCharsStartingWith;
char keyBuffer[KEY_BUFFER_SIZE];
char *key;
static CollDataCache *collDataCache;
uint32_t minHan;
uint32_t maxHan;
uint32_t jamoLimits[4];
};
U_NAMESPACE_END
#endif // #if !UCONFIG_NO_COLLATION
#endif // #ifndef COLL_DATA_H
|