libstdc++
regex_executor.h
Go to the documentation of this file.
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2017 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  * @file bits/regex_executor.h
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{regex}
29  */
30 
31 // FIXME convert comments to doxygen format.
32 
33 namespace std _GLIBCXX_VISIBILITY(default)
34 {
35 namespace __detail
36 {
37 _GLIBCXX_BEGIN_NAMESPACE_VERSION
38 
39  /**
40  * @addtogroup regex-detail
41  * @{
42  */
43 
44  /**
45  * @brief Takes a regex and an input string and does the matching.
46  *
47  * The %_Executor class has two modes: DFS mode and BFS mode, controlled
48  * by the template parameter %__dfs_mode.
49  */
50  template<typename _BiIter, typename _Alloc, typename _TraitsT,
51  bool __dfs_mode>
52  class _Executor
53  {
54  using __search_mode = integral_constant<bool, __dfs_mode>;
55  using __dfs = true_type;
56  using __bfs = false_type;
57 
58  enum class _Match_mode : unsigned char { _Exact, _Prefix };
59 
60  public:
61  typedef typename iterator_traits<_BiIter>::value_type _CharT;
62  typedef basic_regex<_CharT, _TraitsT> _RegexT;
63  typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
64  typedef regex_constants::match_flag_type _FlagT;
65  typedef typename _TraitsT::char_class_type _ClassT;
66  typedef _NFA<_TraitsT> _NFAT;
67 
68  public:
69  _Executor(_BiIter __begin,
70  _BiIter __end,
71  _ResultsVec& __results,
72  const _RegexT& __re,
73  _FlagT __flags)
74  : _M_begin(__begin),
75  _M_end(__end),
76  _M_re(__re),
77  _M_nfa(*__re._M_automaton),
78  _M_results(__results),
79  _M_rep_count(_M_nfa.size()),
80  _M_states(_M_nfa._M_start(), _M_nfa.size()),
81  _M_flags((__flags & regex_constants::match_prev_avail)
82  ? (__flags
83  & ~regex_constants::match_not_bol
84  & ~regex_constants::match_not_bow)
85  : __flags)
86  { }
87 
88  // Set matched when string exactly matches the pattern.
89  bool
90  _M_match()
91  {
92  _M_current = _M_begin;
93  return _M_main(_Match_mode::_Exact);
94  }
95 
96  // Set matched when some prefix of the string matches the pattern.
97  bool
98  _M_search_from_first()
99  {
100  _M_current = _M_begin;
101  return _M_main(_Match_mode::_Prefix);
102  }
103 
104  bool
105  _M_search();
106 
107  private:
108  void
109  _M_rep_once_more(_Match_mode __match_mode, _StateIdT);
110 
111  void
112  _M_handle_repeat(_Match_mode, _StateIdT);
113 
114  void
115  _M_handle_subexpr_begin(_Match_mode, _StateIdT);
116 
117  void
118  _M_handle_subexpr_end(_Match_mode, _StateIdT);
119 
120  void
121  _M_handle_line_begin_assertion(_Match_mode, _StateIdT);
122 
123  void
124  _M_handle_line_end_assertion(_Match_mode, _StateIdT);
125 
126  void
127  _M_handle_word_boundary(_Match_mode, _StateIdT);
128 
129  void
130  _M_handle_subexpr_lookahead(_Match_mode, _StateIdT);
131 
132  void
133  _M_handle_match(_Match_mode, _StateIdT);
134 
135  void
136  _M_handle_backref(_Match_mode, _StateIdT);
137 
138  void
139  _M_handle_accept(_Match_mode, _StateIdT);
140 
141  void
142  _M_handle_alternative(_Match_mode, _StateIdT);
143 
144  void
145  _M_dfs(_Match_mode __match_mode, _StateIdT __start);
146 
147  bool
148  _M_main(_Match_mode __match_mode)
149  { return _M_main_dispatch(__match_mode, __search_mode{}); }
150 
151  bool
152  _M_main_dispatch(_Match_mode __match_mode, __dfs);
153 
154  bool
155  _M_main_dispatch(_Match_mode __match_mode, __bfs);
156 
157  bool
158  _M_is_word(_CharT __ch) const
159  {
160  static const _CharT __s[2] = { 'w' };
161  return _M_re._M_automaton->_M_traits.isctype
162  (__ch, _M_re._M_automaton->_M_traits.lookup_classname(__s, __s+1));
163  }
164 
165  bool
166  _M_at_begin() const
167  {
168  return _M_current == _M_begin
169  && !(_M_flags & (regex_constants::match_not_bol
171  }
172 
173  bool
174  _M_at_end() const
175  {
176  return _M_current == _M_end
177  && !(_M_flags & regex_constants::match_not_eol);
178  }
179 
180  bool
181  _M_word_boundary() const;
182 
183  bool
184  _M_lookahead(_StateIdT __next);
185 
186  // Holds additional information used in BFS-mode.
187  template<typename _SearchMode, typename _ResultsVec>
188  struct _State_info;
189 
190  template<typename _ResultsVec>
191  struct _State_info<__bfs, _ResultsVec>
192  {
193  explicit
194  _State_info(_StateIdT __start, size_t __n)
195  : _M_visited_states(new bool[__n]()), _M_start(__start)
196  { }
197 
198  bool _M_visited(_StateIdT __i)
199  {
200  if (_M_visited_states[__i])
201  return true;
202  _M_visited_states[__i] = true;
203  return false;
204  }
205 
206  void _M_queue(_StateIdT __i, const _ResultsVec& __res)
207  { _M_match_queue.emplace_back(__i, __res); }
208 
209  // Dummy implementations for BFS mode.
210  _BiIter* _M_get_sol_pos() { return nullptr; }
211 
212  // Saves states that need to be considered for the next character.
213  vector<pair<_StateIdT, _ResultsVec>> _M_match_queue;
214  // Indicates which states are already visited.
215  unique_ptr<bool[]> _M_visited_states;
216  // To record current solution.
217  _StateIdT _M_start;
218  };
219 
220  template<typename _ResultsVec>
221  struct _State_info<__dfs, _ResultsVec>
222  {
223  explicit
224  _State_info(_StateIdT __start, size_t) : _M_start(__start)
225  { }
226 
227  // Dummy implementations for DFS mode.
228  bool _M_visited(_StateIdT) const { return false; }
229  void _M_queue(_StateIdT, const _ResultsVec&) { }
230 
231  _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
232 
233  // To record current solution.
234  _StateIdT _M_start;
235  _BiIter _M_sol_pos;
236  };
237 
238  public:
239  _ResultsVec _M_cur_results;
240  _BiIter _M_current;
241  _BiIter _M_begin;
242  const _BiIter _M_end;
243  const _RegexT& _M_re;
244  const _NFAT& _M_nfa;
245  _ResultsVec& _M_results;
246  vector<pair<_BiIter, int>> _M_rep_count;
247  _State_info<__search_mode, _ResultsVec> _M_states;
248  _FlagT _M_flags;
249  // Do we have a solution so far?
250  bool _M_has_sol;
251  };
252 
253  //@} regex-detail
254 _GLIBCXX_END_NAMESPACE_VERSION
255 } // namespace __detail
256 } // namespace std
257 
258 #include <bits/regex_executor.tcc>
_GLIBCXX17_INLINE constexpr match_flag_type match_not_eol
ISO C++ entities toplevel namespace is std.
_GLIBCXX17_INLINE constexpr match_flag_type match_not_bow
A standard container which offers fixed time access to individual elements in any order...
Definition: stl_vector.h:216
_GLIBCXX17_INLINE constexpr match_flag_type match_not_bol
integral_constant< bool, true > true_type
The type used as a compile-time boolean with true value.
Definition: type_traits:87
_GLIBCXX17_INLINE constexpr match_flag_type match_prev_avail
match_flag_type
This is a bitmask type indicating regex matching rules.
integral_constant< bool, false > false_type
The type used as a compile-time boolean with false value.
Definition: type_traits:90