145 #ifndef __UTF_OLD_H__ 146 #define __UTF_OLD_H__ 159 #ifndef U_HIDE_OBSOLETE_UTF_OLD_H 160 # define U_HIDE_OBSOLETE_UTF_OLD_H 0 163 #if !defined(U_HIDE_DEPRECATED_API) && !U_HIDE_OBSOLETE_UTF_OLD_H 171 #ifdef U_USE_UTF_DEPRECATES 179 typedef int32_t UTextOffset;
211 #define UTF8_ERROR_VALUE_1 0x15 218 #define UTF8_ERROR_VALUE_2 0x9f 226 #define UTF_ERROR_VALUE 0xffff 234 #define UTF_IS_ERROR(c) \ 235 (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2) 242 #define UTF_IS_VALID(c) \ 243 (UTF_IS_UNICODE_CHAR(c) && \ 244 (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2) 250 #define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800) 257 #define UTF_IS_UNICODE_NONCHAR(c) \ 259 ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \ 260 (uint32_t)(c)<=0x10ffff) 277 #define UTF_IS_UNICODE_CHAR(c) \ 278 ((uint32_t)(c)<0xd800 || \ 279 ((uint32_t)(c)>0xdfff && \ 280 (uint32_t)(c)<=0x10ffff && \ 281 !UTF_IS_UNICODE_NONCHAR(c))) 298 #elif defined(U_STATIC_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) 308 #define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte]) 314 #define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1) 317 #define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0) 319 #define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e) 321 #define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80) 324 #define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f) 340 # define UTF8_CHAR_LENGTH(c) \ 341 ((uint32_t)(c)<=0x7f ? 1 : \ 342 ((uint32_t)(c)<=0x7ff ? 2 : \ 343 ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \ 347 # define UTF8_CHAR_LENGTH(c) \ 348 ((uint32_t)(c)<=0x7f ? 1 : \ 349 ((uint32_t)(c)<=0x7ff ? 2 : \ 350 ((uint32_t)(c)<=0xffff ? 3 : \ 351 ((uint32_t)(c)<=0x10ffff ? 4 : \ 352 ((uint32_t)(c)<=0x3ffffff ? 5 : \ 353 ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \ 362 #define UTF8_MAX_CHAR_LENGTH 4 365 #define UTF8_ARRAY_SIZE(size) ((5*(size))/2) 368 #define UTF8_GET_CHAR_UNSAFE(s, i, c) { \ 369 int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \ 370 UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \ 371 UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \ 375 #define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 376 int32_t _utf8_get_char_safe_index=(int32_t)(i); \ 377 UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \ 378 UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \ 382 #define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \ 384 if((uint8_t)((c)-0xc0)<0x35) { \ 385 uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \ 386 UTF8_MASK_LEAD_BYTE(c, __count); \ 390 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 392 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 394 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 402 #define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \ 403 if((uint32_t)(c)<=0x7f) { \ 404 (s)[(i)++]=(uint8_t)(c); \ 406 if((uint32_t)(c)<=0x7ff) { \ 407 (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ 409 if((uint32_t)(c)<=0xffff) { \ 410 (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ 412 (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \ 413 (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \ 415 (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ 417 (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ 422 #define UTF8_FWD_1_UNSAFE(s, i) { \ 423 (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \ 427 #define UTF8_FWD_N_UNSAFE(s, i, n) { \ 430 UTF8_FWD_1_UNSAFE(s, i); \ 436 #define UTF8_SET_CHAR_START_UNSAFE(s, i) { \ 437 while(UTF8_IS_TRAIL((s)[i])) { --(i); } \ 441 #define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 444 if(UTF8_IS_LEAD(c)) { \ 445 (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \ 447 (c)=UTF8_ERROR_VALUE_1; \ 453 #define UTF8_APPEND_CHAR_SAFE(s, i, length, c) { \ 454 if((uint32_t)(c)<=0x7f) { \ 455 (s)[(i)++]=(uint8_t)(c); \ 457 (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \ 462 #define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length) 465 #define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n) 468 #define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i) 471 #define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \ 473 if(UTF8_IS_TRAIL(c)) { \ 474 uint8_t __b, __count=1, __shift=6; \ 481 UTF8_MASK_LEAD_BYTE(__b, __count); \ 482 (c)|=(UChar32)__b<<__shift; \ 485 (c)|=(UChar32)(__b&0x3f)<<__shift; \ 494 #define UTF8_BACK_1_UNSAFE(s, i) { \ 495 while(UTF8_IS_TRAIL((s)[--(i)])) {} \ 499 #define UTF8_BACK_N_UNSAFE(s, i, n) { \ 502 UTF8_BACK_1_UNSAFE(s, i); \ 508 #define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 509 UTF8_BACK_1_UNSAFE(s, i); \ 510 UTF8_FWD_1_UNSAFE(s, i); \ 514 #define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 518 (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \ 520 (c)=UTF8_ERROR_VALUE_1; \ 526 #define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i) 529 #define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n) 532 #define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length) 537 #define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800) 540 #define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00) 543 #define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0) 546 #define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) 549 #define UTF16_GET_PAIR_VALUE(first, second) \ 550 (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET) 553 #define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) 556 #define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) 559 #define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary) 562 #define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary) 565 #define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar) 568 #define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar) 571 #define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar) 574 #define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff) 577 #define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) 580 #define UTF16_MAX_CHAR_LENGTH 2 583 #define UTF16_ARRAY_SIZE(size) (size) 596 #define UTF16_GET_CHAR_UNSAFE(s, i, c) { \ 598 if(UTF_IS_SURROGATE(c)) { \ 599 if(UTF_IS_SURROGATE_FIRST(c)) { \ 600 (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \ 602 (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \ 608 #define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 610 if(UTF_IS_SURROGATE(c)) { \ 612 if(UTF_IS_SURROGATE_FIRST(c)) { \ 613 if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \ 614 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ 618 (c)=UTF_ERROR_VALUE; \ 621 if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ 622 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ 626 (c)=UTF_ERROR_VALUE; \ 629 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 630 (c)=UTF_ERROR_VALUE; \ 635 #define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \ 637 if(UTF_IS_FIRST_SURROGATE(c)) { \ 638 (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \ 643 #define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \ 644 if((uint32_t)(c)<=0xffff) { \ 645 (s)[(i)++]=(uint16_t)(c); \ 647 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 648 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 653 #define UTF16_FWD_1_UNSAFE(s, i) { \ 654 if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \ 660 #define UTF16_FWD_N_UNSAFE(s, i, n) { \ 663 UTF16_FWD_1_UNSAFE(s, i); \ 669 #define UTF16_SET_CHAR_START_UNSAFE(s, i) { \ 670 if(UTF_IS_SECOND_SURROGATE((s)[i])) { \ 676 #define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 678 if(UTF_IS_FIRST_SURROGATE(c)) { \ 680 if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \ 682 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ 686 (c)=UTF_ERROR_VALUE; \ 688 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 690 (c)=UTF_ERROR_VALUE; \ 695 #define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \ 696 if((uint32_t)(c)<=0xffff) { \ 697 (s)[(i)++]=(uint16_t)(c); \ 698 } else if((uint32_t)(c)<=0x10ffff) { \ 699 if((i)+1<(length)) { \ 700 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 701 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 703 (s)[(i)++]=UTF_ERROR_VALUE; \ 706 (s)[(i)++]=UTF_ERROR_VALUE; \ 711 #define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length) 714 #define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n) 717 #define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i) 720 #define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \ 722 if(UTF_IS_SECOND_SURROGATE(c)) { \ 723 (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \ 728 #define UTF16_BACK_1_UNSAFE(s, i) { \ 729 if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \ 735 #define UTF16_BACK_N_UNSAFE(s, i, n) { \ 738 UTF16_BACK_1_UNSAFE(s, i); \ 744 #define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 745 if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \ 751 #define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 753 if(UTF_IS_SECOND_SURROGATE(c)) { \ 755 if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ 757 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ 761 (c)=UTF_ERROR_VALUE; \ 763 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 765 (c)=UTF_ERROR_VALUE; \ 770 #define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i) 773 #define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n) 776 #define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) 796 #define UTF32_IS_SAFE(c, strict) \ 798 (uint32_t)(c)<=0x10ffff : \ 799 UTF_IS_UNICODE_CHAR(c)) 812 #define UTF32_IS_SINGLE(uchar) 1 814 #define UTF32_IS_LEAD(uchar) 0 816 #define UTF32_IS_TRAIL(uchar) 0 821 #define UTF32_NEED_MULTIPLE_UCHAR(c) 0 823 #define UTF32_CHAR_LENGTH(c) 1 825 #define UTF32_MAX_CHAR_LENGTH 1 830 #define UTF32_ARRAY_SIZE(size) (size) 833 #define UTF32_GET_CHAR_UNSAFE(s, i, c) { \ 838 #define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 840 if(!UTF32_IS_SAFE(c, strict)) { \ 841 (c)=UTF_ERROR_VALUE; \ 848 #define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \ 853 #define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \ 858 #define UTF32_FWD_1_UNSAFE(s, i) { \ 863 #define UTF32_FWD_N_UNSAFE(s, i, n) { \ 868 #define UTF32_SET_CHAR_START_UNSAFE(s, i) { \ 872 #define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 874 if(!UTF32_IS_SAFE(c, strict)) { \ 875 (c)=UTF_ERROR_VALUE; \ 880 #define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \ 881 if((uint32_t)(c)<=0x10ffff) { \ 889 #define UTF32_FWD_1_SAFE(s, i, length) { \ 894 #define UTF32_FWD_N_SAFE(s, i, length, n) { \ 895 if(((i)+=(n))>(length)) { \ 901 #define UTF32_SET_CHAR_START_SAFE(s, start, i) { \ 907 #define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \ 912 #define UTF32_BACK_1_UNSAFE(s, i) { \ 917 #define UTF32_BACK_N_UNSAFE(s, i, n) { \ 922 #define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 926 #define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 928 if(!UTF32_IS_SAFE(c, strict)) { \ 929 (c)=UTF_ERROR_VALUE; \ 934 #define UTF32_BACK_1_SAFE(s, start, i) { \ 939 #define UTF32_BACK_N_SAFE(s, start, i, n) { \ 947 #define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \ 957 #define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size) 960 #define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c) 963 #define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) 967 #define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c) 970 #define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) 974 #define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c) 977 #define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) 981 #define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i) 984 #define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length) 988 #define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n) 991 #define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n) 995 #define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i) 998 #define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i) 1002 #define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c) 1005 #define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) 1009 #define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i) 1012 #define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i) 1016 #define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n) 1019 #define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n) 1023 #define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) 1026 #define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) 1035 #define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar) 1042 #define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar) 1049 #define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar) 1056 #define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c) 1063 #define UTF_CHAR_LENGTH(c) U16_LENGTH(c) 1070 #define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH 1081 #define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c) 1094 #define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c) 1107 #define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) 1118 #define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length) 1129 #define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n) 1145 #define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i) 1158 #define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c) 1171 #define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i) 1184 #define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n) 1200 #define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) 1202 #endif // !U_HIDE_DEPRECATED_API && !U_HIDE_OBSOLETE_UTF_OLD_H
C API: 8-bit Unicode handling macros.
U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]
Internal array with numbers of trail bytes for any given byte used in lead byte position.
C API: Code point macros.
#define U_CFUNC
This is used in a declaration of a library private ICU C function.
C API: 16-bit Unicode handling macros.