ICU 64.2  64.2
translit.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1999-2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 * Date Name Description
9 * 11/17/99 aliu Creation.
10 **********************************************************************
11 */
12 #ifndef TRANSLIT_H
13 #define TRANSLIT_H
14 
15 #include "unicode/utypes.h"
16 
22 #if !UCONFIG_NO_TRANSLITERATION
23 
24 #include "unicode/uobject.h"
25 #include "unicode/unistr.h"
26 #include "unicode/parseerr.h"
27 #include "unicode/utrans.h" // UTransPosition, UTransDirection
28 #include "unicode/strenum.h"
29 
31 
32 class UnicodeFilter;
33 class UnicodeSet;
34 class TransliteratorParser;
35 class NormalizationTransliterator;
36 class TransliteratorIDParser;
37 
489 
490 private:
491 
495  UnicodeString ID;
496 
503  UnicodeFilter* filter;
504 
505  int32_t maximumContextLength;
506 
507  public:
508 
514  union Token {
519  int32_t integer;
524  void* pointer;
525  };
526 
527 #ifndef U_HIDE_INTERNAL_API
528 
533  inline static Token integerToken(int32_t);
534 
540  inline static Token pointerToken(void*);
541 #endif /* U_HIDE_INTERNAL_API */
542 
558  typedef Transliterator* (U_EXPORT2 *Factory)(const UnicodeString& ID, Token context);
559 
560 protected:
561 
571  Transliterator(const UnicodeString& ID, UnicodeFilter* adoptedFilter);
572 
578 
583  Transliterator& operator=(const Transliterator&);
584 
596  static Transliterator* createBasicInstance(const UnicodeString& id,
597  const UnicodeString* canon);
598 
599  friend class TransliteratorParser; // for parseID()
600  friend class TransliteratorIDParser; // for createBasicInstance()
601  friend class TransliteratorAlias; // for setID()
602 
603 public:
604 
609  virtual ~Transliterator();
610 
625  virtual Transliterator* clone() const;
626 
642  virtual int32_t transliterate(Replaceable& text,
643  int32_t start, int32_t limit) const;
644 
650  virtual void transliterate(Replaceable& text) const;
651 
716  virtual void transliterate(Replaceable& text, UTransPosition& index,
717  const UnicodeString& insertion,
718  UErrorCode& status) const;
719 
735  virtual void transliterate(Replaceable& text, UTransPosition& index,
736  UChar32 insertion,
737  UErrorCode& status) const;
738 
752  virtual void transliterate(Replaceable& text, UTransPosition& index,
753  UErrorCode& status) const;
754 
766  virtual void finishTransliteration(Replaceable& text,
767  UTransPosition& index) const;
768 
769 private:
770 
786  void _transliterate(Replaceable& text,
787  UTransPosition& index,
788  const UnicodeString* insertion,
789  UErrorCode &status) const;
790 
791 protected:
792 
872  virtual void handleTransliterate(Replaceable& text,
873  UTransPosition& pos,
874  UBool incremental) const = 0;
875 
876 public:
888  virtual void filteredTransliterate(Replaceable& text,
889  UTransPosition& index,
890  UBool incremental) const;
891 
892 private:
893 
921  virtual void filteredTransliterate(Replaceable& text,
922  UTransPosition& index,
923  UBool incremental,
924  UBool rollback) const;
925 
926 public:
927 
941  int32_t getMaximumContextLength(void) const;
942 
943 protected:
944 
951  void setMaximumContextLength(int32_t maxContextLength);
952 
953 public:
954 
965  virtual const UnicodeString& getID(void) const;
966 
976  static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
977  UnicodeString& result);
978 
1000  static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
1001  const Locale& inLocale,
1002  UnicodeString& result);
1003 
1011  const UnicodeFilter* getFilter(void) const;
1012 
1022  UnicodeFilter* orphanFilter(void);
1023 
1034  void adoptFilter(UnicodeFilter* adoptedFilter);
1035 
1055  Transliterator* createInverse(UErrorCode& status) const;
1056 
1073  static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
1074  UTransDirection dir,
1075  UParseError& parseError,
1076  UErrorCode& status);
1077 
1088  static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
1089  UTransDirection dir,
1090  UErrorCode& status);
1091 
1109  static Transliterator* U_EXPORT2 createFromRules(const UnicodeString& ID,
1110  const UnicodeString& rules,
1111  UTransDirection dir,
1112  UParseError& parseError,
1113  UErrorCode& status);
1114 
1126  virtual UnicodeString& toRules(UnicodeString& result,
1127  UBool escapeUnprintable) const;
1128 
1141  int32_t countElements() const;
1142 
1162  const Transliterator& getElement(int32_t index, UErrorCode& ec) const;
1163 
1179  UnicodeSet& getSourceSet(UnicodeSet& result) const;
1180 
1195  virtual void handleGetSourceSet(UnicodeSet& result) const;
1196 
1210  virtual UnicodeSet& getTargetSet(UnicodeSet& result) const;
1211 
1212 public:
1213 
1230  static void U_EXPORT2 registerFactory(const UnicodeString& id,
1231  Factory factory,
1232  Token context);
1233 
1255  static void U_EXPORT2 registerInstance(Transliterator* adoptedObj);
1256 
1271  static void U_EXPORT2 registerAlias(const UnicodeString& aliasID,
1272  const UnicodeString& realID);
1273 
1274 protected:
1275 
1276 #ifndef U_HIDE_INTERNAL_API
1277 
1286  static void _registerFactory(const UnicodeString& id,
1287  Factory factory,
1288  Token context);
1289 
1293  static void _registerInstance(Transliterator* adoptedObj);
1294 
1298  static void _registerAlias(const UnicodeString& aliasID, const UnicodeString& realID);
1299 
1333  static void _registerSpecialInverse(const UnicodeString& target,
1334  const UnicodeString& inverseTarget,
1335  UBool bidirectional);
1336 #endif /* U_HIDE_INTERNAL_API */
1337 
1338 public:
1339 
1357  static void U_EXPORT2 unregister(const UnicodeString& ID);
1358 
1359 public:
1360 
1370  static StringEnumeration* U_EXPORT2 getAvailableIDs(UErrorCode& ec);
1371 
1377  static int32_t U_EXPORT2 countAvailableSources(void);
1378 
1388  static UnicodeString& U_EXPORT2 getAvailableSource(int32_t index,
1389  UnicodeString& result);
1390 
1399  static int32_t U_EXPORT2 countAvailableTargets(const UnicodeString& source);
1400 
1412  static UnicodeString& U_EXPORT2 getAvailableTarget(int32_t index,
1413  const UnicodeString& source,
1414  UnicodeString& result);
1415 
1423  static int32_t U_EXPORT2 countAvailableVariants(const UnicodeString& source,
1424  const UnicodeString& target);
1425 
1439  static UnicodeString& U_EXPORT2 getAvailableVariant(int32_t index,
1440  const UnicodeString& source,
1441  const UnicodeString& target,
1442  UnicodeString& result);
1443 
1444 protected:
1445 
1446 #ifndef U_HIDE_INTERNAL_API
1447 
1451  static int32_t _countAvailableSources(void);
1452 
1457  static UnicodeString& _getAvailableSource(int32_t index,
1458  UnicodeString& result);
1459 
1464  static int32_t _countAvailableTargets(const UnicodeString& source);
1465 
1470  static UnicodeString& _getAvailableTarget(int32_t index,
1471  const UnicodeString& source,
1472  UnicodeString& result);
1473 
1478  static int32_t _countAvailableVariants(const UnicodeString& source,
1479  const UnicodeString& target);
1480 
1485  static UnicodeString& _getAvailableVariant(int32_t index,
1486  const UnicodeString& source,
1487  const UnicodeString& target,
1488  UnicodeString& result);
1489 #endif /* U_HIDE_INTERNAL_API */
1490 
1491 protected:
1492 
1499  void setID(const UnicodeString& id);
1500 
1501 public:
1502 
1513  static UClassID U_EXPORT2 getStaticClassID(void);
1514 
1530  virtual UClassID getDynamicClassID(void) const = 0;
1531 
1532 private:
1533  static UBool initializeRegistry(UErrorCode &status);
1534 
1535 public:
1536 #ifndef U_HIDE_OBSOLETE_API
1537 
1544  static int32_t U_EXPORT2 countAvailableIDs(void);
1545 
1558  static const UnicodeString& U_EXPORT2 getAvailableID(int32_t index);
1559 #endif /* U_HIDE_OBSOLETE_API */
1560 };
1561 
1562 inline int32_t Transliterator::getMaximumContextLength(void) const {
1563  return maximumContextLength;
1564 }
1565 
1566 inline void Transliterator::setID(const UnicodeString& id) {
1567  ID = id;
1568  // NUL-terminate the ID string, which is a non-aliased copy.
1569  ID.append((char16_t)0);
1570  ID.truncate(ID.length()-1);
1571 }
1572 
1573 #ifndef U_HIDE_INTERNAL_API
1574 inline Transliterator::Token Transliterator::integerToken(int32_t i) {
1575  Token t;
1576  t.integer = i;
1577  return t;
1578 }
1579 
1580 inline Transliterator::Token Transliterator::pointerToken(void* p) {
1581  Token t;
1582  t.pointer = p;
1583  return t;
1584 }
1585 #endif /* U_HIDE_INTERNAL_API */
1586 
1588 
1589 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
1590 
1591 #endif
Transliterator *(* Factory)(const UnicodeString &ID, Token context)
A function that creates and returns a Transliterator.
Definition: translit.h:558
Position structure for utrans_transIncremental() incremental transliteration.
Definition: utrans.h:122
C++ API: Unicode String.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:93
Base class for 'pure' C++ implementations of uenum api.
Definition: strenum.h:57
Transliterator is an abstract class that transliterates text from one format to another.
Definition: translit.h:488
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside...
Definition: utypes.h:301
A context integer or pointer for a factory function, passed by value.
Definition: translit.h:514
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition: rep.h:73
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:137
UBool truncate(int32_t targetLength)
Truncate this UnicodeString to the targetLength.
Definition: unistr.h:4726
UnicodeFilter defines a protocol for selecting a subset of the full range (U+0000 to U+10FFFF) of Uni...
Definition: unifilt.h:61
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:389
C API: Transliterator.
UnicodeString & append(const UnicodeString &srcText, int32_t srcStart, int32_t srcLength)
Append the characters in srcText in the range [srcStart, srcStart + srcLength) to the UnicodeString o...
Definition: unistr.h:4618
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:277
C++ API: Common ICU base class UObject.
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:138
C API: Parse Error Information.
int32_t integer
This token, interpreted as a 32-bit integer.
Definition: translit.h:519
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:401
C++ API: String Enumeration.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
Basic definitions for ICU, for both C and C++ APIs.
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:289
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:223
UTransDirection
Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules o...
Definition: utrans.h:80
int8_t UBool
The ICU boolean type.
Definition: umachine.h:225
void * pointer
This token, interpreted as a native pointer.
Definition: translit.h:524
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:192