ICU 64.2  64.2
uset.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2002-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: uset.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2002mar07
16 * created by: Markus W. Scherer
17 *
18 * C version of UnicodeSet.
19 */
20 
21 
29 #ifndef __USET_H__
30 #define __USET_H__
31 
32 #include "unicode/utypes.h"
33 #include "unicode/uchar.h"
34 #include "unicode/localpointer.h"
35 
36 #ifndef USET_DEFINED
37 
38 #ifndef U_IN_DOXYGEN
39 #define USET_DEFINED
40 #endif
41 
47 typedef struct USet USet;
48 #endif
49 
55 enum {
61 
89 
99 };
100 
156 typedef enum USetSpanCondition {
205 #ifndef U_HIDE_DEPRECATED_API
206 
211 #endif // U_HIDE_DEPRECATED_API
213 
214 enum {
222 };
223 
229 typedef struct USerializedSet {
234  const uint16_t *array;
239  int32_t bmpLength;
244  int32_t length;
251 
252 /*********************************************************************
253  * USet API
254  *********************************************************************/
255 
263 U_STABLE USet* U_EXPORT2
264 uset_openEmpty(void);
265 
276 U_STABLE USet* U_EXPORT2
277 uset_open(UChar32 start, UChar32 end);
278 
288 U_STABLE USet* U_EXPORT2
289 uset_openPattern(const UChar* pattern, int32_t patternLength,
290  UErrorCode* ec);
291 
303 U_STABLE USet* U_EXPORT2
304 uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
305  uint32_t options,
306  UErrorCode* ec);
307 
314 U_STABLE void U_EXPORT2
315 uset_close(USet* set);
316 
317 #if U_SHOW_CPLUSPLUS_API
318 
320 
331 
333 
334 #endif
335 
345 U_STABLE USet * U_EXPORT2
346 uset_clone(const USet *set);
347 
357 U_STABLE UBool U_EXPORT2
358 uset_isFrozen(const USet *set);
359 
374 U_STABLE void U_EXPORT2
375 uset_freeze(USet *set);
376 
387 U_STABLE USet * U_EXPORT2
388 uset_cloneAsThawed(const USet *set);
389 
399 U_STABLE void U_EXPORT2
400 uset_set(USet* set,
401  UChar32 start, UChar32 end);
402 
424 U_STABLE int32_t U_EXPORT2
426  const UChar *pattern, int32_t patternLength,
427  uint32_t options,
428  UErrorCode *status);
429 
452 U_STABLE void U_EXPORT2
454  UProperty prop, int32_t value, UErrorCode* ec);
455 
491 U_STABLE void U_EXPORT2
493  const UChar *prop, int32_t propLength,
494  const UChar *value, int32_t valueLength,
495  UErrorCode* ec);
496 
506 U_STABLE UBool U_EXPORT2
507 uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
508  int32_t pos);
509 
525 U_STABLE int32_t U_EXPORT2
526 uset_toPattern(const USet* set,
527  UChar* result, int32_t resultCapacity,
528  UBool escapeUnprintable,
529  UErrorCode* ec);
530 
539 U_STABLE void U_EXPORT2
540 uset_add(USet* set, UChar32 c);
541 
554 U_STABLE void U_EXPORT2
555 uset_addAll(USet* set, const USet *additionalSet);
556 
566 U_STABLE void U_EXPORT2
567 uset_addRange(USet* set, UChar32 start, UChar32 end);
568 
578 U_STABLE void U_EXPORT2
579 uset_addString(USet* set, const UChar* str, int32_t strLen);
580 
590 U_STABLE void U_EXPORT2
591 uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
592 
601 U_STABLE void U_EXPORT2
602 uset_remove(USet* set, UChar32 c);
603 
613 U_STABLE void U_EXPORT2
614 uset_removeRange(USet* set, UChar32 start, UChar32 end);
615 
625 U_STABLE void U_EXPORT2
626 uset_removeString(USet* set, const UChar* str, int32_t strLen);
627 
639 U_STABLE void U_EXPORT2
640 uset_removeAll(USet* set, const USet* removeSet);
641 
656 U_STABLE void U_EXPORT2
657 uset_retain(USet* set, UChar32 start, UChar32 end);
658 
671 U_STABLE void U_EXPORT2
672 uset_retainAll(USet* set, const USet* retain);
673 
682 U_STABLE void U_EXPORT2
683 uset_compact(USet* set);
684 
693 U_STABLE void U_EXPORT2
694 uset_complement(USet* set);
695 
707 U_STABLE void U_EXPORT2
708 uset_complementAll(USet* set, const USet* complement);
709 
717 U_STABLE void U_EXPORT2
718 uset_clear(USet* set);
719 
746 U_STABLE void U_EXPORT2
747 uset_closeOver(USet* set, int32_t attributes);
748 
755 U_STABLE void U_EXPORT2
757 
765 U_STABLE UBool U_EXPORT2
766 uset_isEmpty(const USet* set);
767 
776 U_STABLE UBool U_EXPORT2
777 uset_contains(const USet* set, UChar32 c);
778 
788 U_STABLE UBool U_EXPORT2
789 uset_containsRange(const USet* set, UChar32 start, UChar32 end);
790 
799 U_STABLE UBool U_EXPORT2
800 uset_containsString(const USet* set, const UChar* str, int32_t strLen);
801 
812 U_STABLE int32_t U_EXPORT2
813 uset_indexOf(const USet* set, UChar32 c);
814 
825 U_STABLE UChar32 U_EXPORT2
826 uset_charAt(const USet* set, int32_t charIndex);
827 
836 U_STABLE int32_t U_EXPORT2
837 uset_size(const USet* set);
838 
847 U_STABLE int32_t U_EXPORT2
848 uset_getItemCount(const USet* set);
849 
868 U_STABLE int32_t U_EXPORT2
869 uset_getItem(const USet* set, int32_t itemIndex,
870  UChar32* start, UChar32* end,
871  UChar* str, int32_t strCapacity,
872  UErrorCode* ec);
873 
882 U_STABLE UBool U_EXPORT2
883 uset_containsAll(const USet* set1, const USet* set2);
884 
895 U_STABLE UBool U_EXPORT2
896 uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
897 
906 U_STABLE UBool U_EXPORT2
907 uset_containsNone(const USet* set1, const USet* set2);
908 
917 U_STABLE UBool U_EXPORT2
918 uset_containsSome(const USet* set1, const USet* set2);
919 
939 U_STABLE int32_t U_EXPORT2
940 uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
941 
960 U_STABLE int32_t U_EXPORT2
961 uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
962 
982 U_STABLE int32_t U_EXPORT2
983 uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
984 
1003 U_STABLE int32_t U_EXPORT2
1004 uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1005 
1014 U_STABLE UBool U_EXPORT2
1015 uset_equals(const USet* set1, const USet* set2);
1016 
1017 /*********************************************************************
1018  * Serialized set API
1019  *********************************************************************/
1020 
1070 U_STABLE int32_t U_EXPORT2
1071 uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1072 
1081 U_STABLE UBool U_EXPORT2
1082 uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
1083 
1091 U_STABLE void U_EXPORT2
1093 
1102 U_STABLE UBool U_EXPORT2
1104 
1114 U_STABLE int32_t U_EXPORT2
1116 
1130 U_STABLE UBool U_EXPORT2
1131 uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
1132  UChar32* pStart, UChar32* pEnd);
1133 
1134 #endif
UBool uset_containsRange(const USet *set, UChar32 start, UChar32 end)
Returns TRUE if the given USet contains all characters c where start <= c && c <= end...
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]
A small buffer for the array to reduce memory allocations.
Definition: uset.h:249
void uset_removeAll(USet *set, const USet *removeSet)
Removes from this set all of its elements that are contained in the specified set.
void uset_clear(USet *set)
Removes all of the elements from this set.
UBool uset_containsAllCodePoints(const USet *set, const UChar *str, int32_t strLen)
Returns true if this set contains all the characters of the given string.
USet * uset_clone(const USet *set)
Returns a copy of this object.
int32_t uset_applyPattern(USet *set, const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *status)
Modifies the set to represent the set specified by the given pattern.
void uset_retain(USet *set, UChar32 start, UChar32 end)
Retain only the elements in this set that are contained in the specified range.
UBool uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, UChar32 *pStart, UChar32 *pEnd)
Returns a range of characters contained in the given serialized set.
void uset_addAll(USet *set, const USet *additionalSet)
Adds all of the elements in the specified set to this set if they&#39;re not already present.
Spans the longest substring that is a concatenation of set elements (characters or strings)...
Definition: uset.h:184
UBool uset_resemblesPattern(const UChar *pattern, int32_t patternLength, int32_t pos)
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet patt...
One more than the last span condition.
Definition: uset.h:210
void uset_set(USet *set, UChar32 start, UChar32 end)
Causes the USet object to represent the range start - end.
const uint16_t * array
The serialized Unicode Set.
Definition: uset.h:234
int32_t uset_size(const USet *set)
Returns the number of characters and strings contained in the given USet.
USet * uset_cloneAsThawed(const USet *set)
Clone the set and make the clone mutable.
void uset_removeString(USet *set, const UChar *str, int32_t strLen)
Removes the given string to the given USet.
"Smart pointer" class, closes a USet via uset_close().
void uset_addAllCodePoints(USet *set, const UChar *str, int32_t strLen)
Adds each of the characters in this string to the set.
UChar32 uset_charAt(const USet *set, int32_t charIndex)
Returns the character at the given index within this set, where the set is ordered by ascending code ...
UBool uset_contains(const USet *set, UChar32 c)
Returns TRUE if the given USet contains the given character.
UBool uset_isEmpty(const USet *set)
Returns TRUE if the given USet contains no characters and no strings.
int32_t uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
void uset_removeAllStrings(USet *set)
Remove all strings from this set.
void uset_compact(USet *set)
Reallocate this objects internal structures to take up the least possible space, without changing thi...
void uset_applyPropertyAlias(USet *set, const UChar *prop, int32_t propLength, const UChar *value, int32_t valueLength, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given property...
void uset_freeze(USet *set)
Freeze the set (make it immutable).
USet * uset_openPattern(const UChar *pattern, int32_t patternLength, UErrorCode *ec)
Creates a set from the given pattern.
Ignore white space within patterns unless quoted or escaped.
Definition: uset.h:60
struct USerializedSet USerializedSet
A serialized form of a Unicode set.
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:137
USet * uset_openEmpty(void)
Create an empty USet object.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:562
UBool uset_containsAll(const USet *set1, const USet *set2)
Returns true if set1 contains all the characters and strings of set2.
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
void uset_complementAll(USet *set, const USet *complement)
Complements in this set all elements contained in the specified set.
int32_t uset_getItemCount(const USet *set)
Returns the number of items in this set.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:389
void uset_removeRange(USet *set, UChar32 start, UChar32 end)
Removes the given range of characters from the given USet.
void uset_add(USet *set, UChar32 c)
Adds the given character to the given USet.
UBool uset_equals(const USet *set1, const USet *set2)
Returns true if set1 contains all of the characters and strings of set2, and vis versa.
int32_t uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
UBool uset_containsString(const USet *set, const UChar *str, int32_t strLen)
Returns TRUE if the given USet contains the given string.
int32_t uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
void uset_applyIntPropertyValue(USet *set, UProperty prop, int32_t value, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given binary or enum...
void uset_addString(USet *set, const UChar *str, int32_t strLen)
Adds the given string to the given USet.
USet * uset_openPatternOptions(const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *ec)
Creates a set from the given pattern.
int32_t uset_getSerializedRangeCount(const USerializedSet *set)
Returns the number of disjoint ranges of characters contained in the given serialized set...
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition: uset.h:156
C API: Unicode Properties.
int32_t uset_getItem(const USet *set, int32_t itemIndex, UChar32 *start, UChar32 *end, UChar *str, int32_t strCapacity, UErrorCode *ec)
Returns an item of this set.
uint16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:342
void uset_complement(USet *set)
Inverts this set.
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:138
UBool uset_containsSome(const USet *set1, const USet *set2)
Returns true if set1 contains some of the characters and strings of set2.
int32_t length
The total length of the array.
Definition: uset.h:244
void uset_retainAll(USet *set, const USet *retain)
Retains only the elements in this set that are contained in the specified set.
UProperty
Selection constants for Unicode properties.
Definition: uchar.h:195
UBool uset_serializedContains(const USerializedSet *set, UChar32 c)
Returns TRUE if the given USerializedSet contains the given character.
int32_t uset_toPattern(const USet *set, UChar *result, int32_t resultCapacity, UBool escapeUnprintable, UErrorCode *ec)
Returns a string representation of this set.
USet * uset_open(UChar32 start, UChar32 end)
Creates a USet object that contains the range of characters start..end, inclusive.
void uset_remove(USet *set, UChar32 c)
Removes the given character from the given USet.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:401
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Definition: uset.h:47
UBool uset_containsNone(const USet *set1, const USet *set2)
Returns true if set1 contains none of the characters and strings of set2.
UBool uset_isFrozen(const USet *set)
Determines whether the set has been frozen (made immutable) or not.
void uset_addRange(USet *set, UChar32 start, UChar32 end)
Adds the given range of characters to the given USet.
Capacity of USerializedSet::staticArray.
Definition: uset.h:221
Enable case insensitive matching.
Definition: uset.h:88
int32_t uset_indexOf(const USet *set, UChar32 c)
Returns the index of the given character within this set, where the set is ordered by ascending code ...
Enable case insensitive matching.
Definition: uset.h:98
Basic definitions for ICU, for both C and C++ APIs.
int32_t bmpLength
The length of the array that contains BMP characters.
Definition: uset.h:239
A serialized form of a Unicode set.
Definition: uset.h:229
Continues a span() while there is a set element at the current position.
Definition: uset.h:204
void uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c)
Set the USerializedSet to contain the given character (and nothing else).
UBool uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength)
Given a serialized array, fill in the given serialized set object.
int32_t uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
int32_t uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Serializes this set into an array of 16-bit integers.
void uset_closeOver(USet *set, int32_t attributes)
Close this set over the given attribute.
Continues a span() while there is no set element at the current position.
Definition: uset.h:169
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:111
int8_t UBool
The ICU boolean type.
Definition: umachine.h:225
void uset_close(USet *set)
Disposes of the storage used by a USet object.