ICU 64.2  64.2
utext.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2004-2012, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: utext.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2004oct06
16 * created by: Markus W. Scherer
17 */
18 
19 #ifndef __UTEXT_H__
20 #define __UTEXT_H__
21 
140 #include "unicode/utypes.h"
141 #include "unicode/uchar.h"
142 #if U_SHOW_CPLUSPLUS_API
143 #include "unicode/localpointer.h"
144 #include "unicode/rep.h"
145 #include "unicode/unistr.h"
146 #include "unicode/chariter.h"
147 #endif
148 
149 
151 
152 struct UText;
153 typedef struct UText UText;
156 /***************************************************************************************
157  *
158  * C Functions for creating UText wrappers around various kinds of text strings.
159  *
160  ****************************************************************************************/
161 
162 
183 U_STABLE UText * U_EXPORT2
184 utext_close(UText *ut);
185 
186 #if U_SHOW_CPLUSPLUS_API
187 
189 
200 
202 
203 #endif
204 
226 U_STABLE UText * U_EXPORT2
227 utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status);
228 
229 
244 U_STABLE UText * U_EXPORT2
245 utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status);
246 
247 
248 #if U_SHOW_CPLUSPLUS_API
249 
261 U_STABLE UText * U_EXPORT2
263 
264 
277 U_STABLE UText * U_EXPORT2
279 
280 
293 U_STABLE UText * U_EXPORT2
295 
308 U_STABLE UText * U_EXPORT2
310 
311 #endif
312 
313 
371 U_STABLE UText * U_EXPORT2
372 utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status);
373 
374 
386 U_STABLE UBool U_EXPORT2
387 utext_equals(const UText *a, const UText *b);
388 
389 
390 /*****************************************************************************
391  *
392  * Functions to work with the text represented by a UText wrapper
393  *
394  *****************************************************************************/
395 
407 U_STABLE int64_t U_EXPORT2
409 
423 U_STABLE UBool U_EXPORT2
424 utext_isLengthExpensive(const UText *ut);
425 
451 U_STABLE UChar32 U_EXPORT2
452 utext_char32At(UText *ut, int64_t nativeIndex);
453 
454 
465 U_STABLE UChar32 U_EXPORT2
467 
468 
487 U_STABLE UChar32 U_EXPORT2
488 utext_next32(UText *ut);
489 
490 
508 U_STABLE UChar32 U_EXPORT2
510 
511 
530 U_STABLE UChar32 U_EXPORT2
531 utext_next32From(UText *ut, int64_t nativeIndex);
532 
533 
534 
550 U_STABLE UChar32 U_EXPORT2
551 utext_previous32From(UText *ut, int64_t nativeIndex);
552 
565 U_STABLE int64_t U_EXPORT2
566 utext_getNativeIndex(const UText *ut);
567 
591 U_STABLE void U_EXPORT2
592 utext_setNativeIndex(UText *ut, int64_t nativeIndex);
593 
610 U_STABLE UBool U_EXPORT2
611 utext_moveIndex32(UText *ut, int32_t delta);
612 
635 U_STABLE int64_t U_EXPORT2
637 
638 
673 U_STABLE int32_t U_EXPORT2
674 utext_extract(UText *ut,
675  int64_t nativeStart, int64_t nativeLimit,
676  UChar *dest, int32_t destCapacity,
677  UErrorCode *status);
678 
679 
680 
681 /************************************************************************************
682  *
683  * #define inline versions of selected performance-critical text access functions
684  * Caution: do not use auto increment++ or decrement-- expressions
685  * as parameters to these macros.
686  *
687  * For most use, where there is no extreme performance constraint, the
688  * normal, non-inline functions are a better choice. The resulting code
689  * will be smaller, and, if the need ever arises, easier to debug.
690  *
691  * These are implemented as #defines rather than real functions
692  * because there is no fully portable way to do inline functions in plain C.
693  *
694  ************************************************************************************/
695 
696 #ifndef U_HIDE_INTERNAL_API
697 
706 #define UTEXT_CURRENT32(ut) \
707  ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
708  ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut))
709 #endif /* U_HIDE_INTERNAL_API */
710 
722 #define UTEXT_NEXT32(ut) \
723  ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
724  ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut))
725 
736 #define UTEXT_PREVIOUS32(ut) \
737  ((ut)->chunkOffset > 0 && \
738  (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \
739  (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut))
740 
753 #define UTEXT_GETNATIVEINDEX(ut) \
754  ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \
755  (ut)->chunkNativeStart+(ut)->chunkOffset : \
756  (ut)->pFuncs->mapOffsetToNative(ut))
757 
769 #define UTEXT_SETNATIVEINDEX(ut, ix) \
770  { int64_t __offset = (ix) - (ut)->chunkNativeStart; \
771  if (__offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit && (ut)->chunkContents[__offset]<0xdc00) { \
772  (ut)->chunkOffset=(int32_t)__offset; \
773  } else { \
774  utext_setNativeIndex((ut), (ix)); } }
775 
776 
777 
778 /************************************************************************************
779  *
780  * Functions related to writing or modifying the text.
781  * These will work only with modifiable UTexts. Attempting to
782  * modify a read-only UText will return an error status.
783  *
784  ************************************************************************************/
785 
786 
805 U_STABLE UBool U_EXPORT2
806 utext_isWritable(const UText *ut);
807 
808 
817 U_STABLE UBool U_EXPORT2
818 utext_hasMetaData(const UText *ut);
819 
820 
848 U_STABLE int32_t U_EXPORT2
849 utext_replace(UText *ut,
850  int64_t nativeStart, int64_t nativeLimit,
851  const UChar *replacementText, int32_t replacementLength,
852  UErrorCode *status);
853 
854 
855 
888 U_STABLE void U_EXPORT2
889 utext_copy(UText *ut,
890  int64_t nativeStart, int64_t nativeLimit,
891  int64_t destIndex,
892  UBool move,
893  UErrorCode *status);
894 
895 
917 U_STABLE void U_EXPORT2
918 utext_freeze(UText *ut);
919 
920 
927 enum {
961 };
962 
1000 typedef UText * U_CALLCONV
1001 UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status);
1002 
1003 
1012 typedef int64_t U_CALLCONV
1014 
1040 typedef UBool U_CALLCONV
1041 UTextAccess(UText *ut, int64_t nativeIndex, UBool forward);
1042 
1070 typedef int32_t U_CALLCONV
1072  int64_t nativeStart, int64_t nativeLimit,
1073  UChar *dest, int32_t destCapacity,
1074  UErrorCode *status);
1075 
1105 typedef int32_t U_CALLCONV
1107  int64_t nativeStart, int64_t nativeLimit,
1108  const UChar *replacementText, int32_t replacmentLength,
1109  UErrorCode *status);
1110 
1139 typedef void U_CALLCONV
1141  int64_t nativeStart, int64_t nativeLimit,
1142  int64_t nativeDest,
1143  UBool move,
1144  UErrorCode *status);
1145 
1159 typedef int64_t U_CALLCONV
1161 
1177 typedef int32_t U_CALLCONV
1178 UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex);
1179 
1180 
1198 typedef void U_CALLCONV
1200 
1201 
1211 struct UTextFuncs {
1226  int32_t tableSize;
1227 
1234 
1235 
1243 
1252 
1260 
1268 
1276 
1284 
1292 
1300 
1308 
1314 
1320 
1326 
1327 };
1332 typedef struct UTextFuncs UTextFuncs;
1333 
1345 struct UText {
1358  uint32_t magic;
1359 
1360 
1366  int32_t flags;
1367 
1368 
1375 
1382  int32_t sizeOfStruct;
1383 
1384  /* ------ 16 byte alignment boundary ----------- */
1385 
1386 
1393 
1398  int32_t extraSize;
1399 
1408 
1409  /* ---- 16 byte alignment boundary------ */
1410 
1416 
1422  int32_t chunkOffset;
1423 
1428  int32_t chunkLength;
1429 
1430  /* ---- 16 byte alignment boundary-- */
1431 
1432 
1440 
1446 
1452  void *pExtra;
1453 
1460  const void *context;
1461 
1462  /* --- 16 byte alignment boundary--- */
1463 
1469  const void *p;
1475  const void *q;
1481  const void *r;
1482 
1488  void *privP;
1489 
1490 
1491  /* --- 16 byte alignment boundary--- */
1492 
1493 
1499  int64_t a;
1500 
1506  int32_t b;
1507 
1513  int32_t c;
1514 
1515  /* ---- 16 byte alignment boundary---- */
1516 
1517 
1523  int64_t privA;
1529  int32_t privB;
1535  int32_t privC;
1536 };
1537 
1538 
1555 U_STABLE UText * U_EXPORT2
1556 utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status);
1557 
1558 // do not use #ifndef U_HIDE_INTERNAL_API around the following!
1564 enum {
1565  UTEXT_MAGIC = 0x345ad82c
1566 };
1567 
1575 #define UTEXT_INITIALIZER { \
1576  UTEXT_MAGIC, /* magic */ \
1577  0, /* flags */ \
1578  0, /* providerProps */ \
1579  sizeof(UText), /* sizeOfStruct */ \
1580  0, /* chunkNativeLimit */ \
1581  0, /* extraSize */ \
1582  0, /* nativeIndexingLimit */ \
1583  0, /* chunkNativeStart */ \
1584  0, /* chunkOffset */ \
1585  0, /* chunkLength */ \
1586  NULL, /* chunkContents */ \
1587  NULL, /* pFuncs */ \
1588  NULL, /* pExtra */ \
1589  NULL, /* context */ \
1590  NULL, NULL, NULL, /* p, q, r */ \
1591  NULL, /* privP */ \
1592  0, 0, 0, /* a, b, c */ \
1593  0, 0, 0 /* privA,B,C, */ \
1594  }
1595 
1596 
1598 
1599 
1600 
1601 #endif
int32_t UTextExtract(UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status)
Function type declaration for UText.extract().
Definition: utext.h:1071
int32_t c
(protected) Integer field reserved for use by the text provider.
Definition: utext.h:1513
int64_t utext_nativeLength(UText *ut)
Get the length of the text.
UChar32 utext_previous32(UText *ut)
Move the iterator position to the character (code point) whose index precedes the current position...
UTextClose * spare3
(private) Spare function pointer
Definition: utext.h:1325
int32_t nativeIndexingLimit
(protected) The highest chunk offset where native indexing and chunk (UTF-16) indexing correspond...
Definition: utext.h:1407
int64_t chunkNativeStart
(protected) Native index of the first character in the text chunk.
Definition: utext.h:1415
UBool utext_isWritable(const UText *ut)
Return TRUE if the text can be written (modified) with utext_replace() or utext_copy().
void UTextClose(UText *ut)
Function type declaration for UText.utextClose().
Definition: utext.h:1199
int32_t providerProperties
Text provider properties.
Definition: utext.h:1374
void * pExtra
(protected) Pointer to additional space requested by the text provider during the utext_open operatio...
Definition: utext.h:1452
int64_t a
(protected) Integer field reserved for use by the text provider.
Definition: utext.h:1499
UChar32 utext_previous32From(UText *ut, int64_t nativeIndex)
Set the iteration index, and return the code point preceding the one specified by the initial index...
int32_t chunkLength
(protected) Length the text chunk (UTF-16 buffer), in UChars.
Definition: utext.h:1428
C++ API: Unicode String.
void UTextCopy(UText *ut, int64_t nativeStart, int64_t nativeLimit, int64_t nativeDest, UBool move, UErrorCode *status)
Function type declaration for UText.copy().
Definition: utext.h:1140
UTextMapNativeIndexToUTF16 * mapNativeIndexToUTF16
(public) Function pointer for UTextMapNativeIndexToUTF16.
Definition: utext.h:1299
UText * utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status)
Open a read-only UText implementation for UTF-8 strings.
UText * UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status)
Function type declaration for UText.clone().
Definition: utext.h:1001
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: platform.h:840
int32_t reserved1
(private) Alignment padding.
Definition: utext.h:1233
void utext_freeze(UText *ut)
UTextExtract * extract
(public) Function pointer for UTextExtract.
Definition: utext.h:1267
int64_t UTextNativeLength(UText *ut)
Function type declaration for UText.nativeLength().
Definition: utext.h:1013
void * privP
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1488
UTextClose * close
(public) Function pointer for UTextClose.
Definition: utext.h:1307
int32_t flags
(private) Flags for managing the allocation and freeing of memory associated with this UText...
Definition: utext.h:1366
int32_t privC
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1535
UTextClone * clone
(public) Function pointer for UTextClone
Definition: utext.h:1242
UTextNativeLength * nativeLength
(public) function pointer for UTextLength May be expensive to compute!
Definition: utext.h:1251
const void * q
(protected) Pointer fields available for use by the text provider.
Definition: utext.h:1475
(public) Function dispatch table for UText.
Definition: utext.h:1211
UChar32 utext_next32(UText *ut)
Get the code point at the current iteration position of the UText, and advance the position to the fi...
The provider supports modifying the text via the replace() and copy() functions.
Definition: utext.h:946
UTextAccess * access
(public) Function pointer for UTextAccess.
Definition: utext.h:1259
UChar32 utext_char32At(UText *ut, int64_t nativeIndex)
Returns the code point at the requested index, or U_SENTINEL (-1) if it is out of bounds...
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:84
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition: rep.h:73
Text provider owns the text storage.
Definition: utext.h:960
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:137
const void * p
(protected) Pointer fields available for use by the text provider.
Definition: utext.h:1469
void utext_copy(UText *ut, int64_t nativeStart, int64_t nativeLimit, int64_t destIndex, UBool move, UErrorCode *status)
Copy or move a substring from one position to another within the text, while retaining any metadata a...
UTextCopy * copy
(public) Function pointer for UTextCopy.
Definition: utext.h:1283
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:562
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:358
UBool utext_equals(const UText *a, const UText *b)
Compare two UText objects for equality.
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
UChar32 utext_current32(UText *ut)
Get the code point at the current iteration position, or U_SENTINEL (-1) if the iteration has reached...
const UChar * chunkContents
(protected) pointer to a chunk of text in UTF-16 format.
Definition: utext.h:1439
int32_t reserved3
Definition: utext.h:1233
UText * utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status)
Open a UText implementation over an ICU CharacterIterator.
int32_t tableSize
(public) Function table size, sizeof(UTextFuncs) Intended for use should the table grow to accommodat...
Definition: utext.h:1226
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:389
int32_t privB
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1529
UBool UTextAccess(UText *ut, int64_t nativeIndex, UBool forward)
Function type declaration for UText.access().
Definition: utext.h:1041
int32_t chunkOffset
(protected) Current iteration position within the text chunk (UTF-16 buffer).
Definition: utext.h:1422
int32_t extraSize
(protected) Size in bytes of the extra space (pExtra).
Definition: utext.h:1398
UChar32 utext_next32From(UText *ut, int64_t nativeIndex)
Set the iteration index and return the code point at that index.
int64_t UTextMapOffsetToNative(const UText *ut)
Function type declaration for UText.mapOffsetToNative().
Definition: utext.h:1160
int64_t utext_getPreviousNativeIndex(UText *ut)
Get the native index of the character preceding the current position.
int32_t utext_extract(UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status)
Extract text from a UText into a UChar buffer.
C API: Unicode Properties.
It is potentially time consuming for the provider to determine the length of the text.
Definition: utext.h:932
int32_t UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex)
Function type declaration for UText.mapIndexToUTF16().
Definition: utext.h:1178
void utext_setNativeIndex(UText *ut, int64_t nativeIndex)
Set the current iteration position to the nearest code point boundary at or preceding the specified i...
uint16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:342
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:85
int64_t privA
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1523
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:138
There is meta data associated with the text.
Definition: utext.h:952
const void * r
(protected) Pointer fields available for use by the text provider.
Definition: utext.h:1481
UBool utext_moveIndex32(UText *ut, int32_t delta)
Move the iterator position by delta code points.
UText * utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status)
Clone a UText.
int64_t chunkNativeLimit
(protected) Native index of the first character position following the current chunk.
Definition: utext.h:1392
int32_t sizeOfStruct
(public) sizeOfStruct=sizeof(UText) Allows possible backward compatible extension.
Definition: utext.h:1382
UTextClose * spare2
(private) Spare function pointer
Definition: utext.h:1319
int32_t b
(protected) Integer field reserved for use by the text provider.
Definition: utext.h:1506
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:401
"Smart pointer" class, closes a UText via utext_close().
int64_t utext_getNativeIndex(const UText *ut)
Get the current iterator position, which can range from 0 to the length of the text.
const UTextFuncs * pFuncs
(public) Pointer to Dispatch table for accessing functions for this UText.
Definition: utext.h:1445
UText * utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status)
Open a writable UText implementation for an ICU Replaceable object.
UTextClose * spare1
(private) Spare function pointer
Definition: utext.h:1313
uint32_t magic
(private) Magic.
Definition: utext.h:1358
UText * utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status)
Open a writable UText for a non-const UnicodeString.
int32_t UTextReplace(UText *ut, int64_t nativeStart, int64_t nativeLimit, const UChar *replacementText, int32_t replacmentLength, UErrorCode *status)
Function type declaration for UText.replace().
Definition: utext.h:1106
UText struct.
Definition: utext.h:1345
UTextReplace * replace
(public) Function pointer for UTextReplace.
Definition: utext.h:1275
Basic definitions for ICU, for both C and C++ APIs.
UBool utext_isLengthExpensive(const UText *ut)
Return TRUE if calculating the length of the text could be expensive.
UText * utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status)
Open a UText for a const UnicodeString.
int32_t utext_replace(UText *ut, int64_t nativeStart, int64_t nativeLimit, const UChar *replacementText, int32_t replacementLength, UErrorCode *status)
Replace a range of the original text with a replacement text.
Text chunks remain valid and usable until the text object is modified or deleted, not just until the ...
Definition: utext.h:939
const void * context
(protected) Pointer to string or text-containing object or similar.
Definition: utext.h:1460
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:289
UText * utext_close(UText *ut)
Close function for UText instances.
C++ API: Character Iterator.
UTextMapOffsetToNative * mapOffsetToNative
(public) Function pointer for UTextMapOffsetToNative.
Definition: utext.h:1291
int32_t reserved2
Definition: utext.h:1233
UText * utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status)
Common function for use by Text Provider implementations to allocate and/or initialize a new UText st...
UBool utext_hasMetaData(const UText *ut)
Test whether there is meta data associated with the text.
UText * utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status)
Open a read-only UText for UChar * string.
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:111
int8_t UBool
The ICU boolean type.
Definition: umachine.h:225
C++ API: Replaceable String.