17 #ifndef __UCHARSTRIE_H__ 18 #define __UCHARSTRIE_H__ 34 class UCharsTrieBuilder;
67 : ownedArray_(
NULL), uchars_(trieUChars),
68 pos_(uchars_), remainingMatchLength_(-1) {}
83 : ownedArray_(
NULL), uchars_(other.uchars_),
84 pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {}
93 remainingMatchLength_=-1;
112 const char16_t *uchars;
114 int32_t remainingMatchLength;
125 state.uchars=uchars_;
127 state.remainingMatchLength=remainingMatchLength_;
142 if(uchars_==state.uchars && uchars_!=
NULL) {
144 remainingMatchLength_=state.remainingMatchLength;
165 remainingMatchLength_=-1;
166 return nextImpl(uchars_, uchar);
223 const char16_t *pos=pos_;
224 int32_t leadUnit=*pos++;
226 return leadUnit&kValueIsFinal ?
227 readValue(pos, leadUnit&0x7fff) : readNodeValue(pos, leadUnit);
240 const char16_t *pos=pos_;
242 return pos!=
NULL && findUniqueValue(pos+remainingMatchLength_+1,
FALSE, uniqueValue);
303 UBool hasNext()
const;
333 UBool truncateAndStop() {
339 const char16_t *branchNext(
const char16_t *pos, int32_t length,
UErrorCode &errorCode);
341 const char16_t *uchars_;
342 const char16_t *pos_;
343 const char16_t *initialPos_;
344 int32_t remainingMatchLength_;
345 int32_t initialRemainingMatchLength_;
371 UCharsTrie(char16_t *adoptUChars,
const char16_t *trieUChars)
372 : ownedArray_(adoptUChars), uchars_(trieUChars),
373 pos_(uchars_), remainingMatchLength_(-1) {}
384 static inline int32_t readValue(
const char16_t *pos, int32_t leadUnit) {
386 if(leadUnit<kMinTwoUnitValueLead) {
388 }
else if(leadUnit<kThreeUnitValueLead) {
389 value=((leadUnit-kMinTwoUnitValueLead)<<16)|*pos;
391 value=(pos[0]<<16)|pos[1];
395 static inline const char16_t *skipValue(
const char16_t *pos, int32_t leadUnit) {
396 if(leadUnit>=kMinTwoUnitValueLead) {
397 if(leadUnit<kThreeUnitValueLead) {
405 static inline const char16_t *skipValue(
const char16_t *pos) {
406 int32_t leadUnit=*pos++;
407 return skipValue(pos, leadUnit&0x7fff);
410 static inline int32_t readNodeValue(
const char16_t *pos, int32_t leadUnit) {
413 if(leadUnit<kMinTwoUnitNodeValueLead) {
414 value=(leadUnit>>6)-1;
415 }
else if(leadUnit<kThreeUnitNodeValueLead) {
416 value=(((leadUnit&0x7fc0)-kMinTwoUnitNodeValueLead)<<10)|*pos;
418 value=(pos[0]<<16)|pos[1];
422 static inline const char16_t *skipNodeValue(
const char16_t *pos, int32_t leadUnit) {
424 if(leadUnit>=kMinTwoUnitNodeValueLead) {
425 if(leadUnit<kThreeUnitNodeValueLead) {
434 static inline const char16_t *jumpByDelta(
const char16_t *pos) {
435 int32_t delta=*pos++;
436 if(delta>=kMinTwoUnitDeltaLead) {
437 if(delta==kThreeUnitDeltaLead) {
438 delta=(pos[0]<<16)|pos[1];
441 delta=((delta-kMinTwoUnitDeltaLead)<<16)|*pos++;
447 static const char16_t *skipDelta(
const char16_t *pos) {
448 int32_t delta=*pos++;
449 if(delta>=kMinTwoUnitDeltaLead) {
450 if(delta==kThreeUnitDeltaLead) {
464 UStringTrieResult branchNext(
const char16_t *pos, int32_t length, int32_t uchar);
472 static const char16_t *findUniqueValueFromBranch(
const char16_t *pos, int32_t length,
473 UBool haveUniqueValue, int32_t &uniqueValue);
476 static UBool findUniqueValue(
const char16_t *pos,
UBool haveUniqueValue, int32_t &uniqueValue);
480 static void getNextBranchUChars(
const char16_t *pos, int32_t length,
Appendable &out);
525 static const int32_t kMaxBranchLinearSubNodeLength=5;
528 static const int32_t kMinLinearMatch=0x30;
529 static const int32_t kMaxLinearMatchLength=0x10;
534 static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;
535 static const int32_t kNodeTypeMask=kMinValueLead-1;
538 static const int32_t kValueIsFinal=0x8000;
541 static const int32_t kMaxOneUnitValue=0x3fff;
543 static const int32_t kMinTwoUnitValueLead=kMaxOneUnitValue+1;
544 static const int32_t kThreeUnitValueLead=0x7fff;
546 static const int32_t kMaxTwoUnitValue=((kThreeUnitValueLead-kMinTwoUnitValueLead)<<16)-1;
549 static const int32_t kMaxOneUnitNodeValue=0xff;
550 static const int32_t kMinTwoUnitNodeValueLead=kMinValueLead+((kMaxOneUnitNodeValue+1)<<6);
551 static const int32_t kThreeUnitNodeValueLead=0x7fc0;
553 static const int32_t kMaxTwoUnitNodeValue=
554 ((kThreeUnitNodeValueLead-kMinTwoUnitNodeValueLead)<<10)-1;
557 static const int32_t kMaxOneUnitDelta=0xfbff;
558 static const int32_t kMinTwoUnitDeltaLead=kMaxOneUnitDelta+1;
559 static const int32_t kThreeUnitDeltaLead=0xffff;
561 static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1;
563 char16_t *ownedArray_;
566 const char16_t *uchars_;
571 const char16_t *pos_;
573 int32_t remainingMatchLength_;
578 #endif // __UCHARSTRIE_H__ UCharsTrie & resetToState(const State &state)
Resets this trie to the saved state.
int32_t getValue() const
Returns a matching string's value if called immediately after current()/first()/next() returned USTRI...
UStringTrieResult
Return values for BytesTrie::next(), UCharsTrie::next() and similar methods.
UCharsTrie(const UCharsTrie &other)
Copy constructor, copies the other trie reader object and its state, but not the char16_t array which...
UBool hasUniqueValue(int32_t &uniqueValue) const
Determines whether all strings reachable from the current state map to the same value.
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
const UnicodeString & getString() const
UCharsTrie & reset()
Resets this trie to its initial state.
Iterator for all of the (string, value) pairs in a UCharsTrie.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
#define NULL
Define NULL if necessary, to nullptr for C++ and to ((void *)0) for C.
Builder class for UCharsTrie.
#define TRUE
The TRUE value of a UBool.
const UCharsTrie & saveState(State &state) const
Saves the state of this trie.
C++ API: Common ICU base class UObject.
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
State()
Constructs an empty State.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
UCharsTrie state object, for saving a trie's current state and resetting the trie back to this state ...
const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types...
C API: Helper definitions for dictionary trie APIs.
UCharsTrie(ConstChar16Ptr trieUChars)
Constructs a UCharsTrie reader instance.
Basic definitions for ICU, for both C and C++ APIs.
#define FALSE
The FALSE value of a UBool.
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside...
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
The input unit(s) continued a matching string and there is a value for the string so far...
UStringTrieResult first(int32_t uchar)
Traverses the trie from the initial state for this input char16_t.
UMemory is the common ICU base class.
Light-weight, non-const reader class for a UCharsTrie.
int8_t UBool
The ICU boolean type.
Base class for objects to which Unicode characters and strings can be appended.