libstdc++
regex.tcc
Go to the documentation of this file.
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2017 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  * @file bits/regex.tcc
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{regex}
29  */
30 
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 namespace __detail
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36 
37  // Result of merging regex_match and regex_search.
38  //
39  // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
40  // the other one if possible, for test purpose).
41  //
42  // That __match_mode is true means regex_match, else regex_search.
43  template<typename _BiIter, typename _Alloc,
44  typename _CharT, typename _TraitsT,
45  _RegexExecutorPolicy __policy,
46  bool __match_mode>
47  bool
48  __regex_algo_impl(_BiIter __s,
49  _BiIter __e,
50  match_results<_BiIter, _Alloc>& __m,
51  const basic_regex<_CharT, _TraitsT>& __re,
53  {
54  if (__re._M_automaton == nullptr)
55  return false;
56 
57  typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
58  __m._M_begin = __s;
59  __m._M_resize(__re._M_automaton->_M_sub_count());
60  for (auto& __it : __res)
61  __it.matched = false;
62 
63  bool __ret;
64  if ((__re.flags() & regex_constants::__polynomial)
65  || (__policy == _RegexExecutorPolicy::_S_alternate
66  && !__re._M_automaton->_M_has_backref))
67  {
68  _Executor<_BiIter, _Alloc, _TraitsT, false>
69  __executor(__s, __e, __m, __re, __flags);
70  if (__match_mode)
71  __ret = __executor._M_match();
72  else
73  __ret = __executor._M_search();
74  }
75  else
76  {
77  _Executor<_BiIter, _Alloc, _TraitsT, true>
78  __executor(__s, __e, __m, __re, __flags);
79  if (__match_mode)
80  __ret = __executor._M_match();
81  else
82  __ret = __executor._M_search();
83  }
84  if (__ret)
85  {
86  for (auto& __it : __res)
87  if (!__it.matched)
88  __it.first = __it.second = __e;
89  auto& __pre = __m._M_prefix();
90  auto& __suf = __m._M_suffix();
91  if (__match_mode)
92  {
93  __pre.matched = false;
94  __pre.first = __s;
95  __pre.second = __s;
96  __suf.matched = false;
97  __suf.first = __e;
98  __suf.second = __e;
99  }
100  else
101  {
102  __pre.first = __s;
103  __pre.second = __res[0].first;
104  __pre.matched = (__pre.first != __pre.second);
105  __suf.first = __res[0].second;
106  __suf.second = __e;
107  __suf.matched = (__suf.first != __suf.second);
108  }
109  }
110  else
111  {
112  __m._M_resize(0);
113  for (auto& __it : __res)
114  {
115  __it.matched = false;
116  __it.first = __it.second = __e;
117  }
118  }
119  return __ret;
120  }
121 
122 _GLIBCXX_END_NAMESPACE_VERSION
123 }
124 
125 _GLIBCXX_BEGIN_NAMESPACE_VERSION
126 
127  template<typename _Ch_type>
128  template<typename _Fwd_iter>
129  typename regex_traits<_Ch_type>::string_type
131  lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
132  {
133  typedef std::ctype<char_type> __ctype_type;
134  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
135 
136  static const char* __collatenames[] =
137  {
138  "NUL",
139  "SOH",
140  "STX",
141  "ETX",
142  "EOT",
143  "ENQ",
144  "ACK",
145  "alert",
146  "backspace",
147  "tab",
148  "newline",
149  "vertical-tab",
150  "form-feed",
151  "carriage-return",
152  "SO",
153  "SI",
154  "DLE",
155  "DC1",
156  "DC2",
157  "DC3",
158  "DC4",
159  "NAK",
160  "SYN",
161  "ETB",
162  "CAN",
163  "EM",
164  "SUB",
165  "ESC",
166  "IS4",
167  "IS3",
168  "IS2",
169  "IS1",
170  "space",
171  "exclamation-mark",
172  "quotation-mark",
173  "number-sign",
174  "dollar-sign",
175  "percent-sign",
176  "ampersand",
177  "apostrophe",
178  "left-parenthesis",
179  "right-parenthesis",
180  "asterisk",
181  "plus-sign",
182  "comma",
183  "hyphen",
184  "period",
185  "slash",
186  "zero",
187  "one",
188  "two",
189  "three",
190  "four",
191  "five",
192  "six",
193  "seven",
194  "eight",
195  "nine",
196  "colon",
197  "semicolon",
198  "less-than-sign",
199  "equals-sign",
200  "greater-than-sign",
201  "question-mark",
202  "commercial-at",
203  "A",
204  "B",
205  "C",
206  "D",
207  "E",
208  "F",
209  "G",
210  "H",
211  "I",
212  "J",
213  "K",
214  "L",
215  "M",
216  "N",
217  "O",
218  "P",
219  "Q",
220  "R",
221  "S",
222  "T",
223  "U",
224  "V",
225  "W",
226  "X",
227  "Y",
228  "Z",
229  "left-square-bracket",
230  "backslash",
231  "right-square-bracket",
232  "circumflex",
233  "underscore",
234  "grave-accent",
235  "a",
236  "b",
237  "c",
238  "d",
239  "e",
240  "f",
241  "g",
242  "h",
243  "i",
244  "j",
245  "k",
246  "l",
247  "m",
248  "n",
249  "o",
250  "p",
251  "q",
252  "r",
253  "s",
254  "t",
255  "u",
256  "v",
257  "w",
258  "x",
259  "y",
260  "z",
261  "left-curly-bracket",
262  "vertical-line",
263  "right-curly-bracket",
264  "tilde",
265  "DEL",
266  };
267 
268  string __s;
269  for (; __first != __last; ++__first)
270  __s += __fctyp.narrow(*__first, 0);
271 
272  for (const auto& __it : __collatenames)
273  if (__s == __it)
274  return string_type(1, __fctyp.widen(
275  static_cast<char>(&__it - __collatenames)));
276 
277  // TODO Add digraph support:
278  // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
279 
280  return string_type();
281  }
282 
283  template<typename _Ch_type>
284  template<typename _Fwd_iter>
285  typename regex_traits<_Ch_type>::char_class_type
287  lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
288  {
289  typedef std::ctype<char_type> __ctype_type;
290  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
291 
292  // Mappings from class name to class mask.
293  static const pair<const char*, char_class_type> __classnames[] =
294  {
295  {"d", ctype_base::digit},
296  {"w", {ctype_base::alnum, _RegexMask::_S_under}},
297  {"s", ctype_base::space},
298  {"alnum", ctype_base::alnum},
299  {"alpha", ctype_base::alpha},
300  {"blank", ctype_base::blank},
301  {"cntrl", ctype_base::cntrl},
302  {"digit", ctype_base::digit},
303  {"graph", ctype_base::graph},
304  {"lower", ctype_base::lower},
305  {"print", ctype_base::print},
306  {"punct", ctype_base::punct},
307  {"space", ctype_base::space},
308  {"upper", ctype_base::upper},
309  {"xdigit", ctype_base::xdigit},
310  };
311 
312  string __s;
313  for (; __first != __last; ++__first)
314  __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
315 
316  for (const auto& __it : __classnames)
317  if (__s == __it.first)
318  {
319  if (__icase
320  && ((__it.second
321  & (ctype_base::lower | ctype_base::upper)) != 0))
322  return ctype_base::alpha;
323  return __it.second;
324  }
325  return 0;
326  }
327 
328  template<typename _Ch_type>
329  bool
331  isctype(_Ch_type __c, char_class_type __f) const
332  {
333  typedef std::ctype<char_type> __ctype_type;
334  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
335 
336  return __fctyp.is(__f._M_base, __c)
337  // [[:w:]]
338  || ((__f._M_extended & _RegexMask::_S_under)
339  && __c == __fctyp.widen('_'));
340  }
341 
342  template<typename _Ch_type>
343  int
345  value(_Ch_type __ch, int __radix) const
346  {
348  long __v;
349  if (__radix == 8)
350  __is >> std::oct;
351  else if (__radix == 16)
352  __is >> std::hex;
353  __is >> __v;
354  return __is.fail() ? -1 : __v;
355  }
356 
357  template<typename _Bi_iter, typename _Alloc>
358  template<typename _Out_iter>
360  format(_Out_iter __out,
361  const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
362  const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
363  match_flag_type __flags) const
364  {
365  __glibcxx_assert( ready() );
366  regex_traits<char_type> __traits;
367  typedef std::ctype<char_type> __ctype_type;
368  const __ctype_type&
369  __fctyp(use_facet<__ctype_type>(__traits.getloc()));
370 
371  auto __output = [&](size_t __idx)
372  {
373  auto& __sub = (*this)[__idx];
374  if (__sub.matched)
375  __out = std::copy(__sub.first, __sub.second, __out);
376  };
377 
378  if (__flags & regex_constants::format_sed)
379  {
380  for (; __fmt_first != __fmt_last;)
381  if (*__fmt_first == '&')
382  {
383  __output(0);
384  ++__fmt_first;
385  }
386  else if (*__fmt_first == '\\')
387  {
388  if (++__fmt_first != __fmt_last
389  && __fctyp.is(__ctype_type::digit, *__fmt_first))
390  __output(__traits.value(*__fmt_first++, 10));
391  else
392  *__out++ = '\\';
393  }
394  else
395  *__out++ = *__fmt_first++;
396  }
397  else
398  {
399  while (1)
400  {
401  auto __next = std::find(__fmt_first, __fmt_last, '$');
402  if (__next == __fmt_last)
403  break;
404 
405  __out = std::copy(__fmt_first, __next, __out);
406 
407  auto __eat = [&](char __ch) -> bool
408  {
409  if (*__next == __ch)
410  {
411  ++__next;
412  return true;
413  }
414  return false;
415  };
416 
417  if (++__next == __fmt_last)
418  *__out++ = '$';
419  else if (__eat('$'))
420  *__out++ = '$';
421  else if (__eat('&'))
422  __output(0);
423  else if (__eat('`'))
424  {
425  auto& __sub = _M_prefix();
426  if (__sub.matched)
427  __out = std::copy(__sub.first, __sub.second, __out);
428  }
429  else if (__eat('\''))
430  {
431  auto& __sub = _M_suffix();
432  if (__sub.matched)
433  __out = std::copy(__sub.first, __sub.second, __out);
434  }
435  else if (__fctyp.is(__ctype_type::digit, *__next))
436  {
437  long __num = __traits.value(*__next, 10);
438  if (++__next != __fmt_last
439  && __fctyp.is(__ctype_type::digit, *__next))
440  {
441  __num *= 10;
442  __num += __traits.value(*__next++, 10);
443  }
444  if (0 <= __num && __num < this->size())
445  __output(__num);
446  }
447  else
448  *__out++ = '$';
449  __fmt_first = __next;
450  }
451  __out = std::copy(__fmt_first, __fmt_last, __out);
452  }
453  return __out;
454  }
455 
456  template<typename _Out_iter, typename _Bi_iter,
457  typename _Rx_traits, typename _Ch_type>
458  _Out_iter
459  regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
461  const _Ch_type* __fmt,
463  {
465  _IterT __i(__first, __last, __e, __flags);
466  _IterT __end;
467  if (__i == __end)
468  {
469  if (!(__flags & regex_constants::format_no_copy))
470  __out = std::copy(__first, __last, __out);
471  }
472  else
473  {
474  sub_match<_Bi_iter> __last;
475  auto __len = char_traits<_Ch_type>::length(__fmt);
476  for (; __i != __end; ++__i)
477  {
478  if (!(__flags & regex_constants::format_no_copy))
479  __out = std::copy(__i->prefix().first, __i->prefix().second,
480  __out);
481  __out = __i->format(__out, __fmt, __fmt + __len, __flags);
482  __last = __i->suffix();
484  break;
485  }
486  if (!(__flags & regex_constants::format_no_copy))
487  __out = std::copy(__last.first, __last.second, __out);
488  }
489  return __out;
490  }
491 
492  template<typename _Bi_iter,
493  typename _Ch_type,
494  typename _Rx_traits>
495  bool
497  operator==(const regex_iterator& __rhs) const
498  {
499  if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
500  return true;
501  return _M_pregex == __rhs._M_pregex
502  && _M_begin == __rhs._M_begin
503  && _M_end == __rhs._M_end
504  && _M_flags == __rhs._M_flags
505  && _M_match[0] == __rhs._M_match[0];
506  }
507 
508  template<typename _Bi_iter,
509  typename _Ch_type,
510  typename _Rx_traits>
514  {
515  // In all cases in which the call to regex_search returns true,
516  // match.prefix().first shall be equal to the previous value of
517  // match[0].second, and for each index i in the half-open range
518  // [0, match.size()) for which match[i].matched is true,
519  // match[i].position() shall return distance(begin, match[i].first).
520  // [28.12.1.4.5]
521  if (_M_match[0].matched)
522  {
523  auto __start = _M_match[0].second;
524  auto __prefix_first = _M_match[0].second;
525  if (_M_match[0].first == _M_match[0].second)
526  {
527  if (__start == _M_end)
528  {
529  _M_pregex = nullptr;
530  return *this;
531  }
532  else
533  {
534  if (regex_search(__start, _M_end, _M_match, *_M_pregex,
535  _M_flags
538  {
539  __glibcxx_assert(_M_match[0].matched);
540  auto& __prefix = _M_match._M_prefix();
541  __prefix.first = __prefix_first;
542  __prefix.matched = __prefix.first != __prefix.second;
543  // [28.12.1.4.5]
544  _M_match._M_begin = _M_begin;
545  return *this;
546  }
547  else
548  ++__start;
549  }
550  }
552  if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
553  {
554  __glibcxx_assert(_M_match[0].matched);
555  auto& __prefix = _M_match._M_prefix();
556  __prefix.first = __prefix_first;
557  __prefix.matched = __prefix.first != __prefix.second;
558  // [28.12.1.4.5]
559  _M_match._M_begin = _M_begin;
560  }
561  else
562  _M_pregex = nullptr;
563  }
564  return *this;
565  }
566 
567  template<typename _Bi_iter,
568  typename _Ch_type,
569  typename _Rx_traits>
573  {
574  _M_position = __rhs._M_position;
575  _M_subs = __rhs._M_subs;
576  _M_n = __rhs._M_n;
577  _M_suffix = __rhs._M_suffix;
578  _M_has_m1 = __rhs._M_has_m1;
579  _M_normalize_result();
580  return *this;
581  }
582 
583  template<typename _Bi_iter,
584  typename _Ch_type,
585  typename _Rx_traits>
586  bool
589  {
590  if (_M_end_of_seq() && __rhs._M_end_of_seq())
591  return true;
592  if (_M_suffix.matched && __rhs._M_suffix.matched
593  && _M_suffix == __rhs._M_suffix)
594  return true;
595  if (_M_end_of_seq() || _M_suffix.matched
596  || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
597  return false;
598  return _M_position == __rhs._M_position
599  && _M_n == __rhs._M_n
600  && _M_subs == __rhs._M_subs;
601  }
602 
603  template<typename _Bi_iter,
604  typename _Ch_type,
605  typename _Rx_traits>
609  {
610  _Position __prev = _M_position;
611  if (_M_suffix.matched)
612  *this = regex_token_iterator();
613  else if (_M_n + 1 < _M_subs.size())
614  {
615  _M_n++;
616  _M_result = &_M_current_match();
617  }
618  else
619  {
620  _M_n = 0;
621  ++_M_position;
622  if (_M_position != _Position())
623  _M_result = &_M_current_match();
624  else if (_M_has_m1 && __prev->suffix().length() != 0)
625  {
626  _M_suffix.matched = true;
627  _M_suffix.first = __prev->suffix().first;
628  _M_suffix.second = __prev->suffix().second;
629  _M_result = &_M_suffix;
630  }
631  else
632  *this = regex_token_iterator();
633  }
634  return *this;
635  }
636 
637  template<typename _Bi_iter,
638  typename _Ch_type,
639  typename _Rx_traits>
640  void
642  _M_init(_Bi_iter __a, _Bi_iter __b)
643  {
644  _M_has_m1 = false;
645  for (auto __it : _M_subs)
646  if (__it == -1)
647  {
648  _M_has_m1 = true;
649  break;
650  }
651  if (_M_position != _Position())
652  _M_result = &_M_current_match();
653  else if (_M_has_m1)
654  {
655  _M_suffix.matched = true;
656  _M_suffix.first = __a;
657  _M_suffix.second = __b;
658  _M_result = &_M_suffix;
659  }
660  else
661  _M_result = nullptr;
662  }
663 
664 _GLIBCXX_END_NAMESPACE_VERSION
665 } // namespace
666 
bool fail() const
Fast error checking.
Definition: basic_ios.h:201
ISO C++ entities toplevel namespace is std.
_GLIBCXX17_INLINE constexpr match_flag_type format_sed
_Out_iter regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, const basic_regex< _Ch_type, _Rx_traits > &__e, const basic_string< _Ch_type, _St, _Sa > &__fmt, regex_constants::match_flag_type __flags=regex_constants::match_default)
Search for a regular expression within a range for multiple times, and replace the matched parts thro...
Definition: regex.h:2296
Controlling input for std::string.
Definition: iosfwd:100
bool operator==(const regex_iterator &__rhs) const
Tests the equivalence of two regex iterators.
Definition: regex.tcc:497
bool regex_search(_Bi_iter __s, _Bi_iter __e, match_results< _Bi_iter, _Alloc > &__m, const basic_regex< _Ch_type, _Rx_traits > &__re, regex_constants::match_flag_type __flags=regex_constants::match_default)
Definition: regex.h:2148
locale_type getloc() const
Gets a copy of the current locale in use by the regex_traits object.
Definition: regex.h:377
Managing sequences of characters and character-like objects.
string_type lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
Gets a collation element by name.
Definition: regex.tcc:131
_T1 first
second_type is the second bound type
Definition: stl_pair.h:203
regex_iterator & operator++()
Increments a regex_iterator.
Definition: regex.tcc:513
Describes aspects of a regular expression.
Definition: regex.h:87
char_class_type lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase=false) const
Maps one or more characters to a named character classification.
Definition: regex.tcc:287
_GLIBCXX17_INLINE constexpr match_flag_type match_prev_avail
ios_base & oct(ios_base &__base)
Calls base.setf(ios_base::oct, ios_base::basefield).
Definition: ios_base.h:1032
_GLIBCXX17_INLINE constexpr match_flag_type match_not_null
_GLIBCXX17_INLINE constexpr match_flag_type format_first_only
ios_base & hex(ios_base &__base)
Calls base.setf(ios_base::hex, ios_base::basefield).
Definition: ios_base.h:1024
Basis for explicit traits specializations.
Definition: char_traits.h:229
match_flag_type
This is a bitmask type indicating regex matching rules.
regex_token_iterator & operator++()
Increments a regex_token_iterator.
Definition: regex.tcc:608
bool operator==(const regex_token_iterator &__rhs) const
Compares a regex_token_iterator to another for equality.
Definition: regex.tcc:588
bool isctype(_Ch_type __c, char_class_type __f) const
Determines if c is a member of an identified class.
Definition: regex.tcc:331
_GLIBCXX17_INLINE constexpr match_flag_type match_continuous
int value(_Ch_type __ch, int __radix) const
Converts a digit to an int.
Definition: regex.tcc:345
_GLIBCXX17_INLINE constexpr match_flag_type format_no_copy
_Out_iter format(_Out_iter __out, const char_type *__fmt_first, const char_type *__fmt_last, match_flag_type __flags=regex_constants::format_default) const
_GLIBCXX17_INLINE constexpr syntax_option_type __polynomial
_T2 second
first is a copy of the first object
Definition: stl_pair.h:204
Struct holding two objects of arbitrary type.
Definition: stl_pair.h:198
regex_token_iterator & operator=(const regex_token_iterator &__rhs)
Assigns a regex_token_iterator to another.
Definition: regex.tcc:572
Primary class template ctype facet.This template class defines classification and conversion function...