ICU 64.2  64.2
tblcoll.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 * Copyright (C) 1996-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ******************************************************************************
8 */
9 
62 #ifndef TBLCOLL_H
63 #define TBLCOLL_H
64 
65 #include "unicode/utypes.h"
66 
67 #if !UCONFIG_NO_COLLATION
68 
69 #include "unicode/coll.h"
70 #include "unicode/locid.h"
71 #include "unicode/uiter.h"
72 #include "unicode/ucol.h"
73 
75 
76 struct CollationCacheEntry;
77 struct CollationData;
78 struct CollationSettings;
79 struct CollationTailoring;
83 class StringSearch;
87 class CollationElementIterator;
88 class CollationKey;
89 class SortKeyByteSink;
90 class UnicodeSet;
91 class UnicodeString;
92 class UVector64;
93 
114 public:
123  RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
124 
134  RuleBasedCollator(const UnicodeString& rules,
135  ECollationStrength collationStrength,
136  UErrorCode& status);
137 
147  RuleBasedCollator(const UnicodeString& rules,
148  UColAttributeValue decompositionMode,
149  UErrorCode& status);
150 
161  RuleBasedCollator(const UnicodeString& rules,
162  ECollationStrength collationStrength,
163  UColAttributeValue decompositionMode,
164  UErrorCode& status);
165 
166 #ifndef U_HIDE_INTERNAL_API
167 
171  RuleBasedCollator(const UnicodeString &rules,
172  UParseError &parseError, UnicodeString &reason,
173  UErrorCode &errorCode);
174 #endif /* U_HIDE_INTERNAL_API */
175 
181  RuleBasedCollator(const RuleBasedCollator& other);
182 
183 
201  RuleBasedCollator(const uint8_t *bin, int32_t length,
202  const RuleBasedCollator *base,
203  UErrorCode &status);
204 
209  virtual ~RuleBasedCollator();
210 
216  RuleBasedCollator& operator=(const RuleBasedCollator& other);
217 
224  virtual UBool operator==(const Collator& other) const;
225 
231  virtual Collator* clone(void) const;
232 
243  virtual CollationElementIterator* createCollationElementIterator(
244  const UnicodeString& source) const;
245 
255  virtual CollationElementIterator* createCollationElementIterator(
256  const CharacterIterator& source) const;
257 
258  // Make deprecated versions of Collator::compare() visible.
259  using Collator::compare;
260 
273  virtual UCollationResult compare(const UnicodeString& source,
274  const UnicodeString& target,
275  UErrorCode &status) const;
276 
290  virtual UCollationResult compare(const UnicodeString& source,
291  const UnicodeString& target,
292  int32_t length,
293  UErrorCode &status) const;
294 
311  virtual UCollationResult compare(const char16_t* source, int32_t sourceLength,
312  const char16_t* target, int32_t targetLength,
313  UErrorCode &status) const;
314 
326  virtual UCollationResult compare(UCharIterator &sIter,
327  UCharIterator &tIter,
328  UErrorCode &status) const;
329 
343  virtual UCollationResult compareUTF8(const StringPiece &source,
344  const StringPiece &target,
345  UErrorCode &status) const;
346 
361  virtual CollationKey& getCollationKey(const UnicodeString& source,
362  CollationKey& key,
363  UErrorCode& status) const;
364 
380  virtual CollationKey& getCollationKey(const char16_t *source,
381  int32_t sourceLength,
382  CollationKey& key,
383  UErrorCode& status) const;
384 
390  virtual int32_t hashCode() const;
391 
402  virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
403 
409  const UnicodeString& getRules() const;
410 
416  virtual void getVersion(UVersionInfo info) const;
417 
418 #ifndef U_HIDE_DEPRECATED_API
419 
435  int32_t getMaxExpansion(int32_t order) const;
436 #endif /* U_HIDE_DEPRECATED_API */
437 
448  virtual UClassID getDynamicClassID(void) const;
449 
461  static UClassID U_EXPORT2 getStaticClassID(void);
462 
463 #ifndef U_HIDE_DEPRECATED_API
464 
474  uint8_t *cloneRuleData(int32_t &length, UErrorCode &status) const;
475 #endif /* U_HIDE_DEPRECATED_API */
476 
487  int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status) const;
488 
500  void getRules(UColRuleOption delta, UnicodeString &buffer) const;
501 
509  virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
510  UErrorCode &status);
511 
520  UErrorCode &status) const;
521 
538  virtual Collator &setMaxVariable(UColReorderCode group, UErrorCode &errorCode);
539 
546  virtual UColReorderCode getMaxVariable() const;
547 
564  virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status);
565 
581  virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status);
582 
594  virtual void setVariableTop(uint32_t varTop, UErrorCode &status);
595 
603  virtual uint32_t getVariableTop(UErrorCode &status) const;
604 
614  virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
615 
630  virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
631  int32_t resultLength) const;
632 
649  virtual int32_t getSortKey(const char16_t *source, int32_t sourceLength,
650  uint8_t *result, int32_t resultLength) const;
651 
665  virtual int32_t getReorderCodes(int32_t *dest,
666  int32_t destCapacity,
667  UErrorCode& status) const;
668 
680  virtual void setReorderCodes(const int32_t* reorderCodes,
681  int32_t reorderCodesLength,
682  UErrorCode& status) ;
683 
689  const char *left, int32_t leftLength,
690  const char *right, int32_t rightLength,
691  UErrorCode &errorCode) const;
692 
716  virtual int32_t internalGetShortDefinitionString(const char *locale,
717  char *buffer,
718  int32_t capacity,
719  UErrorCode &status) const;
720 
725  virtual int32_t internalNextSortKeyPart(
726  UCharIterator *iter, uint32_t state[2],
727  uint8_t *dest, int32_t count, UErrorCode &errorCode) const;
728 
729  // Do not enclose the default constructor with #ifndef U_HIDE_INTERNAL_API
735 
736 #ifndef U_HIDE_INTERNAL_API
737 
743  const char *internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const;
744 
757  void internalGetContractionsAndExpansions(
758  UnicodeSet *contractions, UnicodeSet *expansions,
759  UBool addPrefixes, UErrorCode &errorCode) const;
760 
766  void internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const;
767 
772  void internalBuildTailoring(
773  const UnicodeString &rules,
774  int32_t strength,
775  UColAttributeValue decompositionMode,
776  UParseError *outParseError, UnicodeString *outReason,
777  UErrorCode &errorCode);
778 
781  return dynamic_cast<RuleBasedCollator *>(fromUCollator(uc));
782  }
784  static inline const RuleBasedCollator *rbcFromUCollator(const UCollator *uc) {
785  return dynamic_cast<const RuleBasedCollator *>(fromUCollator(uc));
786  }
787 
792  void internalGetCEs(const UnicodeString &str, UVector64 &ces, UErrorCode &errorCode) const;
793 #endif // U_HIDE_INTERNAL_API
794 
795 protected:
803  virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
804 
805 private:
806  friend class CollationElementIterator;
807  friend class Collator;
808 
809  RuleBasedCollator(const CollationCacheEntry *entry);
810 
816  enum Attributes {
817  ATTR_VARIABLE_TOP = UCOL_ATTRIBUTE_COUNT,
818  ATTR_LIMIT
819  };
820 
821  void adoptTailoring(CollationTailoring *t, UErrorCode &errorCode);
822 
823  // Both lengths must be <0 or else both must be >=0.
824  UCollationResult doCompare(const char16_t *left, int32_t leftLength,
825  const char16_t *right, int32_t rightLength,
826  UErrorCode &errorCode) const;
827  UCollationResult doCompare(const uint8_t *left, int32_t leftLength,
828  const uint8_t *right, int32_t rightLength,
829  UErrorCode &errorCode) const;
830 
831  void writeSortKey(const char16_t *s, int32_t length,
832  SortKeyByteSink &sink, UErrorCode &errorCode) const;
833 
834  void writeIdenticalLevel(const char16_t *s, const char16_t *limit,
835  SortKeyByteSink &sink, UErrorCode &errorCode) const;
836 
837  const CollationSettings &getDefaultSettings() const;
838 
839  void setAttributeDefault(int32_t attribute) {
840  explicitlySetAttributes &= ~((uint32_t)1 << attribute);
841  }
842  void setAttributeExplicitly(int32_t attribute) {
843  explicitlySetAttributes |= (uint32_t)1 << attribute;
844  }
845  UBool attributeHasBeenSetExplicitly(int32_t attribute) const {
846  // assert(0 <= attribute < ATTR_LIMIT);
847  return (UBool)((explicitlySetAttributes & ((uint32_t)1 << attribute)) != 0);
848  }
849 
857  UBool isUnsafe(UChar32 c) const;
858 
859  static void U_CALLCONV computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode);
860  UBool initMaxExpansions(UErrorCode &errorCode) const;
861 
862  void setFastLatinOptions(CollationSettings &ownedSettings) const;
863 
864  const CollationData *data;
865  const CollationSettings *settings; // reference-counted
866  const CollationTailoring *tailoring; // alias of cacheEntry->tailoring
867  const CollationCacheEntry *cacheEntry; // reference-counted
868  Locale validLocale;
869  uint32_t explicitlySetAttributes;
870 
871  UBool actualLocaleIsSameAsValid;
872 };
873 
875 
876 #endif // !UCONFIG_NO_COLLATION
877 #endif // TBLCOLL_H
uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]
The binary form of a version on ICU APIs is an array of 4 uint8_t.
Definition: uversion.h:59
virtual void setReorderCodes(const int32_t *reorderCodes, int32_t reorderCodesLength, UErrorCode &status)
Sets the ordering of scripts for this collator.
virtual int32_t hashCode(void) const =0
Generates the hash code for the collation object.
The Collator class performs locale-sensitive string comparison.
Definition: coll.h:165
virtual int32_t internalGetShortDefinitionString(const char *locale, char *buffer, int32_t capacity, UErrorCode &status) const
Get the short definition string for a collator.
virtual int32_t getReorderCodes(int32_t *dest, int32_t destCapacity, UErrorCode &status) const
Retrieves the reordering codes for this collator.
virtual Collator & setMaxVariable(UColReorderCode group, UErrorCode &errorCode)
Sets the variable top to the top of the specified reordering group.
UCollationResult
UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll(...
Definition: ucol.h:73
virtual uint32_t getVariableTop(UErrorCode &status) const =0
Gets the variable top value of a Collator.
virtual int32_t internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2], uint8_t *dest, int32_t count, UErrorCode &errorCode) const
Implements ucol_nextSortKeyPart().
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: platform.h:840
void * UClassID
UClassID is used to identify classes without using the compiler&#39;s RTTI.
Definition: uobject.h:93
C API for code unit iteration.
Definition: uiter.h:341
ECollationStrength
Base letter represents a primary difference.
Definition: coll.h:195
static const RuleBasedCollator * rbcFromUCollator(const UCollator *uc)
Definition: tblcoll.h:784
UColReorderCode
Enum containing the codes for reordering segments of the collation table that are not script codes...
Definition: ucol.h:146
UColAttribute
Attributes that collation service understands.
Definition: ucol.h:242
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside...
Definition: utypes.h:301
virtual UColAttributeValue getAttribute(UColAttribute attr, UErrorCode &status) const =0
Universal attribute getter.
virtual UCollationResult compareUTF8(const StringPiece &source, const StringPiece &target, UErrorCode &status) const
Compares two UTF-8 strings using the Collator.
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:137
The RuleBasedCollator class provides the implementation of Collator, using data-driven tables...
Definition: tblcoll.h:113
C++ API: Collation Service.
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:358
virtual Locale getLocale(ULocDataLocaleType type, UErrorCode &status) const =0
Gets the locale of the Collator.
virtual Collator * clone(void) const =0
Makes a copy of this object.
virtual UClassID getDynamicClassID(void) const =0
Returns a unique class ID POLYMORPHICALLY.
The CollationElementIterator class is used as an iterator to walk through each character of an intern...
Definition: coleitr.h:119
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:389
virtual void getVersion(UVersionInfo info) const =0
Gets the version information for a Collator.
virtual void setLocales(const Locale &requestedLocale, const Locale &validLocale, const Locale &actualLocale)
Used internally by registration to define the requested and valid locales.
C API: Collator.
Collation keys are generated by the Collator class.
Definition: sortkey.h:99
virtual int32_t getSortKey(const UnicodeString &source, uint8_t *result, int32_t resultLength) const =0
Get the sort key as an array of bytes from a UnicodeString.
virtual CollationKey & getCollationKey(const UnicodeString &source, CollationKey &key, UErrorCode &status) const =0
Transforms the string into a series of characters that can be compared with CollationKey::compareTo.
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:277
C API: Unicode Character Iteration.
virtual UBool operator==(const Collator &other) const
Returns TRUE if "other" is the same as "this".
virtual EComparisonResult compare(const UnicodeString &source, const UnicodeString &target) const
The comparison function compares the character data stored in two different strings.
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:138
virtual UColReorderCode getMaxVariable() const
Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:401
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested...
Definition: uloc.h:338
static RuleBasedCollator * rbcFromUCollator(UCollator *uc)
Definition: tblcoll.h:780
C++ API: Locale ID object.
struct UCollator UCollator
structure representing a collator object instance
Definition: ucol.h:58
virtual void setAttribute(UColAttribute attr, UColAttributeValue value, UErrorCode &status)=0
Universal attribute setter.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
Basic definitions for ICU, for both C and C++ APIs.
static Collator * fromUCollator(UCollator *uc)
Definition: coll.h:1166
virtual UCollationResult internalCompareUTF8(const char *left, int32_t leftLength, const char *right, int32_t rightLength, UErrorCode &errorCode) const
Implements ucol_strcollUTF8().
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:289
UColAttributeValue
Enum containing attribute values for controling collation behavior.
Definition: ucol.h:89
A string-like object that points to a sized piece of memory.
Definition: stringpiece.h:54
virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status)=0
Sets the variable top to the primary weight of the specified string.
UColRuleOption
Options for retrieving the rule string.
Definition: ucol.h:357
One more than the highest normal UColAttribute value.
Definition: ucol.h:351
int8_t UBool
The ICU boolean type.
Definition: umachine.h:225
virtual UnicodeSet * getTailoredSet(UErrorCode &status) const
Get a UnicodeSet that contains all the characters and sequences tailored in this collator.
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:192