ICU 64.2  64.2
messagepattern.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2011-2013, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: messagepattern.h
9 * encoding: UTF-8
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2011mar14
14 * created by: Markus W. Scherer
15 */
16 
17 #ifndef __MESSAGEPATTERN_H__
18 #define __MESSAGEPATTERN_H__
19 
25 #include "unicode/utypes.h"
26 
27 #if !UCONFIG_NO_FORMATTING
28 
29 #include "unicode/parseerr.h"
30 #include "unicode/unistr.h"
31 
90 };
95 
205 };
210 
258 };
263 
270 #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
271  ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
272 
273 enum {
280 
289 };
290 
297 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
298 
300 
301 class MessagePatternDoubleList;
302 class MessagePatternPartsList;
303 
361 public:
370  MessagePattern(UErrorCode &errorCode);
371 
382 
401  MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
402 
408  MessagePattern(const MessagePattern &other);
409 
416  MessagePattern &operator=(const MessagePattern &other);
417 
422  virtual ~MessagePattern();
423 
441  MessagePattern &parse(const UnicodeString &pattern,
442  UParseError *parseError, UErrorCode &errorCode);
443 
461  MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
462  UParseError *parseError, UErrorCode &errorCode);
463 
481  MessagePattern &parsePluralStyle(const UnicodeString &pattern,
482  UParseError *parseError, UErrorCode &errorCode);
483 
501  MessagePattern &parseSelectStyle(const UnicodeString &pattern,
502  UParseError *parseError, UErrorCode &errorCode);
503 
509  void clear();
510 
518  clear();
519  aposMode=mode;
520  }
521 
527  UBool operator==(const MessagePattern &other) const;
528 
534  inline UBool operator!=(const MessagePattern &other) const {
535  return !operator==(other);
536  }
537 
542  int32_t hashCode() const;
543 
549  return aposMode;
550  }
551 
552  // Java has package-private jdkAposMode() here.
553  // In C++, this is declared in the MessageImpl class.
554 
560  return msg;
561  }
562 
569  return hasArgNames;
570  }
571 
578  return hasArgNumbers;
579  }
580 
592  static int32_t validateArgumentName(const UnicodeString &name);
593 
604  UnicodeString autoQuoteApostropheDeep() const;
605 
606  class Part;
607 
614  int32_t countParts() const {
615  return partsLength;
616  }
617 
624  const Part &getPart(int32_t i) const {
625  return parts[i];
626  }
627 
636  return getPart(i).type;
637  }
638 
646  int32_t getPatternIndex(int32_t partIndex) const {
647  return getPart(partIndex).index;
648  }
649 
657  UnicodeString getSubstring(const Part &part) const {
658  return msg.tempSubString(part.index, part.length);
659  }
660 
668  UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
669  return 0==msg.compare(part.index, part.length, s);
670  }
671 
678  double getNumericValue(const Part &part) const;
679 
686  double getPluralOffset(int32_t pluralStart) const;
687 
696  int32_t getLimitPartIndex(int32_t start) const {
697  int32_t limit=getPart(start).limitPartIndex;
698  if(limit<start) {
699  return start;
700  }
701  return limit;
702  }
703 
711  class Part : public UMemory {
712  public:
717  Part() {}
718 
725  return type;
726  }
727 
733  int32_t getIndex() const {
734  return index;
735  }
736 
743  int32_t getLength() const {
744  return length;
745  }
746 
753  int32_t getLimit() const {
754  return index+length;
755  }
756 
763  int32_t getValue() const {
764  return value;
765  }
766 
774  UMessagePatternPartType msgType=getType();
775  if(msgType ==UMSGPAT_PART_TYPE_ARG_START || msgType ==UMSGPAT_PART_TYPE_ARG_LIMIT) {
776  return (UMessagePatternArgType)value;
777  } else {
778  return UMSGPAT_ARG_TYPE_NONE;
779  }
780  }
781 
791  }
792 
798  UBool operator==(const Part &other) const;
799 
805  inline UBool operator!=(const Part &other) const {
806  return !operator==(other);
807  }
808 
813  int32_t hashCode() const {
814  return ((type*37+index)*37+length)*37+value;
815  }
816 
817  private:
818  friend class MessagePattern;
819 
820  static const int32_t MAX_LENGTH=0xffff;
821  static const int32_t MAX_VALUE=0x7fff;
822 
823  // Some fields are not final because they are modified during pattern parsing.
824  // After pattern parsing, the parts are effectively immutable.
826  int32_t index;
827  uint16_t length;
828  int16_t value;
829  int32_t limitPartIndex;
830  };
831 
832 private:
833  void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
834 
835  void postParse();
836 
837  int32_t parseMessage(int32_t index, int32_t msgStartLength,
838  int32_t nestingLevel, UMessagePatternArgType parentType,
839  UParseError *parseError, UErrorCode &errorCode);
840 
841  int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
842  UParseError *parseError, UErrorCode &errorCode);
843 
844  int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
845 
846  int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
847  UParseError *parseError, UErrorCode &errorCode);
848 
849  int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
850  UParseError *parseError, UErrorCode &errorCode);
851 
860  static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
861 
862  int32_t parseArgNumber(int32_t start, int32_t limit) {
863  return parseArgNumber(msg, start, limit);
864  }
865 
874  void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
875  UParseError *parseError, UErrorCode &errorCode);
876 
877  // Java has package-private appendReducedApostrophes() here.
878  // In C++, this is declared in the MessageImpl class.
879 
880  int32_t skipWhiteSpace(int32_t index);
881 
882  int32_t skipIdentifier(int32_t index);
883 
888  int32_t skipDouble(int32_t index);
889 
890  static UBool isArgTypeChar(UChar32 c);
891 
892  UBool isChoice(int32_t index);
893 
894  UBool isPlural(int32_t index);
895 
896  UBool isSelect(int32_t index);
897 
898  UBool isOrdinal(int32_t index);
899 
904  UBool inMessageFormatPattern(int32_t nestingLevel);
905 
910  UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
911 
912  void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
913  int32_t value, UErrorCode &errorCode);
914 
915  void addLimitPart(int32_t start,
916  UMessagePatternPartType type, int32_t index, int32_t length,
917  int32_t value, UErrorCode &errorCode);
918 
919  void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
920 
921  void setParseError(UParseError *parseError, int32_t index);
922 
923  UBool init(UErrorCode &errorCode);
924  UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
925 
927  UnicodeString msg;
928  // ArrayList<Part> parts=new ArrayList<Part>();
929  MessagePatternPartsList *partsList;
930  Part *parts;
931  int32_t partsLength;
932  // ArrayList<Double> numericValues;
933  MessagePatternDoubleList *numericValuesList;
934  double *numericValues;
935  int32_t numericValuesLength;
936  UBool hasArgNames;
937  UBool hasArgNumbers;
938  UBool needsAutoQuoting;
939 };
940 
942 
943 #endif // !UCONFIG_NO_FORMATTING
944 
945 #endif // __MESSAGEPATTERN_H__
The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
An integer value, for example the offset or an explicit selector value in a PluralFormat style...
const UnicodeString & getPatternString() const
A numeric value, for example the offset or an explicit selector value in a PluralFormat style...
A literal apostrophe is represented by either a single or a double apostrophe pattern character...
The argument name.
const Part & getPart(int32_t i) const
Gets the i-th pattern "part".
UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const
Create a temporary substring for the specified range.
C++ API: Unicode String.
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
The argument has no specified type.
The argument style text.
static UBool hasNumericValue(UMessagePatternPartType type)
Indicates whether the Part type has a numeric value.
UMessagePatternPartType
MessagePattern::Part type constants.
The argument number, provided by the value.
int32_t getValue() const
Returns a value associated with this part.
The argument is an ordinal-number PluralFormat with the same style parts sequence and semantics as UM...
void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode)
Clears this MessagePattern and sets the UMessagePatternApostropheMode.
Parses and represents ICU MessageFormat patterns.
The argument is a ChoiceFormat with one or more ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:137
Return value from MessagePattern.validateArgumentName() for when the string is a valid "pattern ident...
int32_t getPatternIndex(int32_t partIndex) const
Returns the pattern index of the specified pattern "part".
A literal apostrophe must be represented by a double apostrophe pattern character.
UMessagePatternArgType getArgType() const
Returns the argument type if this part is of type ARG_START or ARG_LIMIT, otherwise UMSGPAT_ARG_TYPE_...
UMessagePatternApostropheMode getApostropheMode() const
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:389
UBool partSubstringMatches(const Part &part, const UnicodeString &s) const
Compares the part&#39;s substring with the input string s.
Indicates that a syntax character needs to be inserted for auto-quoting.
int32_t countParts() const
Returns the number of "parts" created by parsing the pattern string.
int32_t getLimit() const
Returns the pattern string limit (exclusive-end) index associated with this Part. ...
Return value from MessagePattern.validateArgumentName() for when the string is invalid.
UBool operator!=(const Part &other) const
UBool operator!=(const MessagePattern &other) const
Start of a message pattern (main or nested).
End of an argument.
UBool hasNumberedArguments() const
Does the parsed pattern have numbered arguments like {2}?
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:138
UMessagePatternPartType getType() const
Returns the type of this part.
C API: Parse Error Information.
End of a message pattern (main or nested).
A selector substring in a "complex" argument style.
Indicates a substring of the pattern string which is to be skipped when formatting.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:401
Part()
Default constructor, do not use.
A message pattern "part", representing a pattern parsing event.
Indicates a syntactic (non-escaped) # symbol in a plural variant.
UMessagePatternPartType getPartType(int32_t i) const
Returns the UMessagePatternPartType of the i-th pattern "part".
The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset (e...
int32_t getIndex() const
Returns the pattern string index associated with this Part.
The argument type.
UMessagePatternArgType
Argument type constants.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
Basic definitions for ICU, for both C and C++ APIs.
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside...
Definition: utypes.h:300
int32_t getLimitPartIndex(int32_t start) const
Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start...
UBool hasNamedArguments() const
Does the parsed pattern have named arguments like {first_name}?
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:289
int32_t getLength() const
Returns the length of the pattern substring associated with this Part.
The argument has a "simple" type which is provided by the ARG_TYPE part.
Start of an argument.
UnicodeString getSubstring(const Part &part) const
Returns the substring of the pattern string indicated by the Part.
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:223
UMemory is the common ICU base class.
Definition: uobject.h:112
UMessagePatternApostropheMode
Mode for when an apostrophe starts quoted literal text for MessageFormat output.
int8_t UBool
The ICU boolean type.
Definition: umachine.h:225