ICU 64.2  64.2
normlzr.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  ********************************************************************
5  * COPYRIGHT:
6  * Copyright (c) 1996-2015, International Business Machines Corporation and
7  * others. All Rights Reserved.
8  ********************************************************************
9  */
10 
11 #ifndef NORMLZR_H
12 #define NORMLZR_H
13 
14 #include "unicode/utypes.h"
15 
21 #if !UCONFIG_NO_NORMALIZATION
22 
23 #include "unicode/chariter.h"
24 #include "unicode/normalizer2.h"
25 #include "unicode/unistr.h"
26 #include "unicode/unorm.h"
27 #include "unicode/uobject.h"
28 
135 public:
136 #ifndef U_HIDE_DEPRECATED_API
137 
142  enum {
143  DONE=0xffff
144  };
145 
146  // Constructors
147 
158  Normalizer(const UnicodeString& str, UNormalizationMode mode);
159 
171  Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode);
172 
184 #endif /* U_HIDE_DEPRECATED_API */
185 
191  Normalizer(const Normalizer& copy);
192 
197  virtual ~Normalizer();
198 
199 
200  //-------------------------------------------------------------------------
201  // Static utility methods
202  //-------------------------------------------------------------------------
203 
204 #ifndef U_HIDE_DEPRECATED_API
205 
219  static void U_EXPORT2 normalize(const UnicodeString& source,
220  UNormalizationMode mode, int32_t options,
221  UnicodeString& result,
222  UErrorCode &status);
223 
241  static void U_EXPORT2 compose(const UnicodeString& source,
242  UBool compat, int32_t options,
243  UnicodeString& result,
244  UErrorCode &status);
245 
263  static void U_EXPORT2 decompose(const UnicodeString& source,
264  UBool compat, int32_t options,
265  UnicodeString& result,
266  UErrorCode &status);
267 
288  static inline UNormalizationCheckResult
289  quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
290 
305  quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
306 
327  static inline UBool
328  isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
329 
345  static UBool
346  isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
347 
377  static UnicodeString &
378  U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right,
379  UnicodeString &result,
380  UNormalizationMode mode, int32_t options,
381  UErrorCode &errorCode);
382 #endif /* U_HIDE_DEPRECATED_API */
383 
448  static inline int32_t
449  compare(const UnicodeString &s1, const UnicodeString &s2,
450  uint32_t options,
451  UErrorCode &errorCode);
452 
453 #ifndef U_HIDE_DEPRECATED_API
454  //-------------------------------------------------------------------------
455  // Iteration API
456  //-------------------------------------------------------------------------
457 
466  UChar32 current(void);
467 
476  UChar32 first(void);
477 
486  UChar32 last(void);
487 
502  UChar32 next(void);
503 
518  UChar32 previous(void);
519 
529  void setIndexOnly(int32_t index);
530 
536  void reset(void);
537 
552  int32_t getIndex(void) const;
553 
562  int32_t startIndex(void) const;
563 
574  int32_t endIndex(void) const;
575 
584  UBool operator==(const Normalizer& that) const;
585 
594  inline UBool operator!=(const Normalizer& that) const;
595 
602  Normalizer* clone(void) const;
603 
610  int32_t hashCode(void) const;
611 
612  //-------------------------------------------------------------------------
613  // Property access methods
614  //-------------------------------------------------------------------------
615 
631  void setMode(UNormalizationMode newMode);
632 
643  UNormalizationMode getUMode(void) const;
644 
661  void setOption(int32_t option,
662  UBool value);
663 
674  UBool getOption(int32_t option) const;
675 
684  void setText(const UnicodeString& newText,
685  UErrorCode &status);
686 
695  void setText(const CharacterIterator& newText,
696  UErrorCode &status);
697 
707  void setText(ConstChar16Ptr newText,
708  int32_t length,
709  UErrorCode &status);
716  void getText(UnicodeString& result);
717 
723  static UClassID U_EXPORT2 getStaticClassID();
724 #endif /* U_HIDE_DEPRECATED_API */
725 
731  virtual UClassID getDynamicClassID() const;
732 
733 private:
734  //-------------------------------------------------------------------------
735  // Private functions
736  //-------------------------------------------------------------------------
737 
738  Normalizer(); // default constructor not implemented
739  Normalizer &operator=(const Normalizer &that); // assignment operator not implemented
740 
741  // Private utility methods for iteration
742  // For documentation, see the source code
743  UBool nextNormalize();
744  UBool previousNormalize();
745 
746  void init();
747  void clearBuffer(void);
748 
749  //-------------------------------------------------------------------------
750  // Private data
751  //-------------------------------------------------------------------------
752 
753  FilteredNormalizer2*fFilteredNorm2; // owned if not NULL
754  const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2
755  UNormalizationMode fUMode; // deprecated
756  int32_t fOptions;
757 
758  // The input text and our position in it
759  CharacterIterator *text;
760 
761  // The normalization buffer is the result of normalization
762  // of the source in [currentIndex..nextIndex[ .
763  int32_t currentIndex, nextIndex;
764 
765  // A buffer for holding intermediate results
766  UnicodeString buffer;
767  int32_t bufferPos;
768 };
769 
770 //-------------------------------------------------------------------------
771 // Inline implementations
772 //-------------------------------------------------------------------------
773 
774 #ifndef U_HIDE_DEPRECATED_API
775 inline UBool
777 { return ! operator==(other); }
778 
781  UNormalizationMode mode,
782  UErrorCode &status) {
783  return quickCheck(source, mode, 0, status);
784 }
785 
786 inline UBool
788  UNormalizationMode mode,
789  UErrorCode &status) {
790  return isNormalized(source, mode, 0, status);
791 }
792 #endif /* U_HIDE_DEPRECATED_API */
793 
794 inline int32_t
796  uint32_t options,
797  UErrorCode &errorCode) {
798  // all argument checking is done in unorm_compare
799  return unorm_compare(toUCharPtr(s1.getBuffer()), s1.length(),
800  toUCharPtr(s2.getBuffer()), s2.length(),
801  options,
802  &errorCode);
803 }
804 
806 
807 #endif /* #if !UCONFIG_NO_NORMALIZATION */
808 
809 #endif // NORMLZR_H
static UBool isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode)
Test if a string is in a given normalization form.
Definition: normlzr.h:787
static UNormalizationCheckResult quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status)
Performing quick check on a string, to quickly determine if the string is in a particular normalizati...
Definition: normlzr.h:780
int32_t unorm_compare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode)
Compares two strings for canonical equivalence.
static int32_t compare(const UnicodeString &s1, const UnicodeString &s2, uint32_t options, UErrorCode &errorCode)
Compare two strings for canonical equivalence.
Definition: normlzr.h:795
UNormalizationMode
Constants for normalization modes.
Definition: unorm.h:138
C++ API: Unicode String.
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:93
C++ API: New API for Unicode Normalization.
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:137
Unicode normalization functionality for standard Unicode normalization or for using custom mapping ta...
Definition: normalizer2.h:83
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:358
UBool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
Definition: stringpiece.h:218
C API: Unicode Normalization.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:389
virtual UClassID getDynamicClassID() const
ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
const UChar * toUCharPtr(const char16_t *p)
Converts from const char16_t * to const UChar *.
Definition: char16ptr.h:257
Old Unicode normalization API.
Definition: normlzr.h:134
C++ API: Common ICU base class UObject.
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:138
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:401
int32_t length(void) const
Return the length of the UnicodeString object.
Definition: unistr.h:3886
const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types...
Definition: char16ptr.h:146
char16_t * getBuffer(int32_t minCapacity)
Get a read/write pointer to the internal buffer.
Basic definitions for ICU, for both C and C++ APIs.
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside...
Definition: utypes.h:300
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:289
C++ API: Character Iterator.
UBool operator!=(const Normalizer &that) const
Returns FALSE when both iterators refer to the same character in the same input text.
Definition: normlzr.h:776
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:223
Normalization filtered by a UnicodeSet.
Definition: normalizer2.h:501
UNormalizationCheckResult
Result values for normalization quick check functions.
Definition: unorm2.h:94
int8_t UBool
The ICU boolean type.
Definition: umachine.h:225