libstdc++
regex.tcc
Go to the documentation of this file.
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2022 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  * @file bits/regex.tcc
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{regex}
29  */
30 
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 _GLIBCXX_BEGIN_NAMESPACE_VERSION
34 
35 namespace __detail
36 {
37  /// @cond undocumented
38 
39  // Result of merging regex_match and regex_search.
40  //
41  // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
42  // the other one if possible, for test purpose).
43  //
44  // That __match_mode is true means regex_match, else regex_search.
45  template<typename _BiIter, typename _Alloc,
46  typename _CharT, typename _TraitsT>
47  bool
48  __regex_algo_impl(_BiIter __s,
49  _BiIter __e,
50  match_results<_BiIter, _Alloc>& __m,
51  const basic_regex<_CharT, _TraitsT>& __re,
53  _RegexExecutorPolicy __policy,
54  bool __match_mode)
55  {
56  if (__re._M_automaton == nullptr)
57  return false;
58 
59  typename match_results<_BiIter, _Alloc>::_Unchecked& __res = __m;
60  __m._M_begin = __s;
61  __m._M_resize(__re._M_automaton->_M_sub_count());
62 
63  bool __ret;
64  if ((__re.flags() & regex_constants::__polynomial)
65  || (__policy == _RegexExecutorPolicy::_S_alternate
66  && !__re._M_automaton->_M_has_backref))
67  {
68  _Executor<_BiIter, _Alloc, _TraitsT, false>
69  __executor(__s, __e, __res, __re, __flags);
70  if (__match_mode)
71  __ret = __executor._M_match();
72  else
73  __ret = __executor._M_search();
74  }
75  else
76  {
77  _Executor<_BiIter, _Alloc, _TraitsT, true>
78  __executor(__s, __e, __res, __re, __flags);
79  if (__match_mode)
80  __ret = __executor._M_match();
81  else
82  __ret = __executor._M_search();
83  }
84  if (__ret)
85  {
86  for (auto& __it : __res)
87  if (!__it.matched)
88  __it.first = __it.second = __e;
89  auto& __pre = __m._M_prefix();
90  auto& __suf = __m._M_suffix();
91  if (__match_mode)
92  {
93  __pre.matched = false;
94  __pre.first = __s;
95  __pre.second = __s;
96  __suf.matched = false;
97  __suf.first = __e;
98  __suf.second = __e;
99  }
100  else
101  {
102  __pre.first = __s;
103  __pre.second = __res[0].first;
104  __pre.matched = (__pre.first != __pre.second);
105  __suf.first = __res[0].second;
106  __suf.second = __e;
107  __suf.matched = (__suf.first != __suf.second);
108  }
109  }
110  else
111  {
112  __m._M_establish_failed_match(__e);
113  }
114  return __ret;
115  }
116  /// @endcond
117 } // namespace __detail
118 
119  /// @cond
120 
121  template<typename _Ch_type>
122  template<typename _Fwd_iter>
123  typename regex_traits<_Ch_type>::string_type
125  lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
126  {
127  typedef std::ctype<char_type> __ctype_type;
128  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
129 
130  static const char* __collatenames[] =
131  {
132  "NUL",
133  "SOH",
134  "STX",
135  "ETX",
136  "EOT",
137  "ENQ",
138  "ACK",
139  "alert",
140  "backspace",
141  "tab",
142  "newline",
143  "vertical-tab",
144  "form-feed",
145  "carriage-return",
146  "SO",
147  "SI",
148  "DLE",
149  "DC1",
150  "DC2",
151  "DC3",
152  "DC4",
153  "NAK",
154  "SYN",
155  "ETB",
156  "CAN",
157  "EM",
158  "SUB",
159  "ESC",
160  "IS4",
161  "IS3",
162  "IS2",
163  "IS1",
164  "space",
165  "exclamation-mark",
166  "quotation-mark",
167  "number-sign",
168  "dollar-sign",
169  "percent-sign",
170  "ampersand",
171  "apostrophe",
172  "left-parenthesis",
173  "right-parenthesis",
174  "asterisk",
175  "plus-sign",
176  "comma",
177  "hyphen",
178  "period",
179  "slash",
180  "zero",
181  "one",
182  "two",
183  "three",
184  "four",
185  "five",
186  "six",
187  "seven",
188  "eight",
189  "nine",
190  "colon",
191  "semicolon",
192  "less-than-sign",
193  "equals-sign",
194  "greater-than-sign",
195  "question-mark",
196  "commercial-at",
197  "A",
198  "B",
199  "C",
200  "D",
201  "E",
202  "F",
203  "G",
204  "H",
205  "I",
206  "J",
207  "K",
208  "L",
209  "M",
210  "N",
211  "O",
212  "P",
213  "Q",
214  "R",
215  "S",
216  "T",
217  "U",
218  "V",
219  "W",
220  "X",
221  "Y",
222  "Z",
223  "left-square-bracket",
224  "backslash",
225  "right-square-bracket",
226  "circumflex",
227  "underscore",
228  "grave-accent",
229  "a",
230  "b",
231  "c",
232  "d",
233  "e",
234  "f",
235  "g",
236  "h",
237  "i",
238  "j",
239  "k",
240  "l",
241  "m",
242  "n",
243  "o",
244  "p",
245  "q",
246  "r",
247  "s",
248  "t",
249  "u",
250  "v",
251  "w",
252  "x",
253  "y",
254  "z",
255  "left-curly-bracket",
256  "vertical-line",
257  "right-curly-bracket",
258  "tilde",
259  "DEL",
260  };
261 
262  string __s;
263  for (; __first != __last; ++__first)
264  __s += __fctyp.narrow(*__first, 0);
265 
266  for (const auto& __it : __collatenames)
267  if (__s == __it)
268  return string_type(1, __fctyp.widen(
269  static_cast<char>(&__it - __collatenames)));
270 
271  // TODO Add digraph support:
272  // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
273 
274  return string_type();
275  }
276 
277  template<typename _Ch_type>
278  template<typename _Fwd_iter>
279  typename regex_traits<_Ch_type>::char_class_type
281  lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
282  {
283  typedef std::ctype<char_type> __ctype_type;
284  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
285 
286  // Mappings from class name to class mask.
287  static const pair<const char*, char_class_type> __classnames[] =
288  {
289  {"d", ctype_base::digit},
290  {"w", {ctype_base::alnum, _RegexMask::_S_under}},
291  {"s", ctype_base::space},
292  {"alnum", ctype_base::alnum},
293  {"alpha", ctype_base::alpha},
294  {"blank", ctype_base::blank},
295  {"cntrl", ctype_base::cntrl},
296  {"digit", ctype_base::digit},
297  {"graph", ctype_base::graph},
298  {"lower", ctype_base::lower},
299  {"print", ctype_base::print},
300  {"punct", ctype_base::punct},
301  {"space", ctype_base::space},
302  {"upper", ctype_base::upper},
303  {"xdigit", ctype_base::xdigit},
304  };
305 
306  string __s;
307  for (; __first != __last; ++__first)
308  __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
309 
310  for (const auto& __it : __classnames)
311  if (__s == __it.first)
312  {
313  if (__icase
314  && ((__it.second
315  & (ctype_base::lower | ctype_base::upper)) != 0))
316  return ctype_base::alpha;
317  return __it.second;
318  }
319  return 0;
320  }
321 
322  template<typename _Ch_type>
323  bool
325  isctype(_Ch_type __c, char_class_type __f) const
326  {
327  typedef std::ctype<char_type> __ctype_type;
328  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
329 
330  return __fctyp.is(__f._M_base, __c)
331  // [[:w:]]
332  || ((__f._M_extended & _RegexMask::_S_under)
333  && __c == __fctyp.widen('_'));
334  }
335 
336  template<typename _Ch_type>
337  int
339  value(_Ch_type __ch, int __radix) const
340  {
341  std::basic_istringstream<char_type> __is(string_type(1, __ch));
342  long __v;
343  if (__radix == 8)
344  __is >> std::oct;
345  else if (__radix == 16)
346  __is >> std::hex;
347  __is >> __v;
348  return __is.fail() ? -1 : __v;
349  }
350 
351  template<typename _Bi_iter, typename _Alloc>
352  template<typename _Out_iter>
353  _Out_iter
355  format(_Out_iter __out,
356  const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
357  const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
358  match_flag_type __flags) const
359  {
360  __glibcxx_assert( ready() );
361  regex_traits<char_type> __traits;
362  typedef std::ctype<char_type> __ctype_type;
363  const __ctype_type&
364  __fctyp(use_facet<__ctype_type>(__traits.getloc()));
365 
366  auto __output = [&](size_t __idx)
367  {
368  auto& __sub = (*this)[__idx];
369  if (__sub.matched)
370  __out = std::copy(__sub.first, __sub.second, __out);
371  };
372 
373  if (__flags & regex_constants::format_sed)
374  {
375  bool __escaping = false;
376  for (; __fmt_first != __fmt_last; __fmt_first++)
377  {
378  if (__escaping)
379  {
380  __escaping = false;
381  if (__fctyp.is(__ctype_type::digit, *__fmt_first))
382  __output(__traits.value(*__fmt_first, 10));
383  else
384  *__out++ = *__fmt_first;
385  continue;
386  }
387  if (*__fmt_first == '\\')
388  {
389  __escaping = true;
390  continue;
391  }
392  if (*__fmt_first == '&')
393  {
394  __output(0);
395  continue;
396  }
397  *__out++ = *__fmt_first;
398  }
399  if (__escaping)
400  *__out++ = '\\';
401  }
402  else
403  {
404  while (1)
405  {
406  auto __next = std::find(__fmt_first, __fmt_last, '$');
407  if (__next == __fmt_last)
408  break;
409 
410  __out = std::copy(__fmt_first, __next, __out);
411 
412  auto __eat = [&](char __ch) -> bool
413  {
414  if (*__next == __ch)
415  {
416  ++__next;
417  return true;
418  }
419  return false;
420  };
421 
422  if (++__next == __fmt_last)
423  *__out++ = '$';
424  else if (__eat('$'))
425  *__out++ = '$';
426  else if (__eat('&'))
427  __output(0);
428  else if (__eat('`'))
429  {
430  auto& __sub = _M_prefix();
431  if (__sub.matched)
432  __out = std::copy(__sub.first, __sub.second, __out);
433  }
434  else if (__eat('\''))
435  {
436  auto& __sub = _M_suffix();
437  if (__sub.matched)
438  __out = std::copy(__sub.first, __sub.second, __out);
439  }
440  else if (__fctyp.is(__ctype_type::digit, *__next))
441  {
442  long __num = __traits.value(*__next, 10);
443  if (++__next != __fmt_last
444  && __fctyp.is(__ctype_type::digit, *__next))
445  {
446  __num *= 10;
447  __num += __traits.value(*__next++, 10);
448  }
449  if (0 <= __num && __num < this->size())
450  __output(__num);
451  }
452  else
453  *__out++ = '$';
454  __fmt_first = __next;
455  }
456  __out = std::copy(__fmt_first, __fmt_last, __out);
457  }
458  return __out;
459  }
460 
461  template<typename _Out_iter, typename _Bi_iter,
462  typename _Rx_traits, typename _Ch_type>
463  _Out_iter
464  __regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
465  const basic_regex<_Ch_type, _Rx_traits>& __e,
466  const _Ch_type* __fmt, size_t __len,
468  {
469  typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
470  _IterT __i(__first, __last, __e, __flags);
471  _IterT __end;
472  if (__i == __end)
473  {
474  if (!(__flags & regex_constants::format_no_copy))
475  __out = std::copy(__first, __last, __out);
476  }
477  else
478  {
479  sub_match<_Bi_iter> __last;
480  for (; __i != __end; ++__i)
481  {
482  if (!(__flags & regex_constants::format_no_copy))
483  __out = std::copy(__i->prefix().first, __i->prefix().second,
484  __out);
485  __out = __i->format(__out, __fmt, __fmt + __len, __flags);
486  __last = __i->suffix();
488  break;
489  }
490  if (!(__flags & regex_constants::format_no_copy))
491  __out = std::copy(__last.first, __last.second, __out);
492  }
493  return __out;
494  }
495 
496  template<typename _Bi_iter,
497  typename _Ch_type,
498  typename _Rx_traits>
499  bool
501  operator==(const regex_iterator& __rhs) const noexcept
502  {
503  if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
504  return true;
505  return _M_pregex == __rhs._M_pregex
506  && _M_begin == __rhs._M_begin
507  && _M_end == __rhs._M_end
508  && _M_flags == __rhs._M_flags
509  && _M_match[0] == __rhs._M_match[0];
510  }
511 
512  template<typename _Bi_iter,
513  typename _Ch_type,
514  typename _Rx_traits>
515  regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
517  operator++()
518  {
519  // In all cases in which the call to regex_search returns true,
520  // match.prefix().first shall be equal to the previous value of
521  // match[0].second, and for each index i in the half-open range
522  // [0, match.size()) for which match[i].matched is true,
523  // match[i].position() shall return distance(begin, match[i].first).
524  // [28.12.1.4.5]
525  if (_M_match[0].matched)
526  {
527  auto __start = _M_match[0].second;
528  auto __prefix_first = _M_match[0].second;
529  if (_M_match[0].first == _M_match[0].second)
530  {
531  if (__start == _M_end)
532  {
533  _M_pregex = nullptr;
534  return *this;
535  }
536  else
537  {
538  if (regex_search(__start, _M_end, _M_match, *_M_pregex,
539  _M_flags
542  {
543  __glibcxx_assert(_M_match[0].matched);
544  auto& __prefix = _M_match._M_prefix();
545  __prefix.first = __prefix_first;
546  __prefix.matched = __prefix.first != __prefix.second;
547  // [28.12.1.4.5]
548  _M_match._M_begin = _M_begin;
549  return *this;
550  }
551  else
552  ++__start;
553  }
554  }
556  if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
557  {
558  __glibcxx_assert(_M_match[0].matched);
559  auto& __prefix = _M_match._M_prefix();
560  __prefix.first = __prefix_first;
561  __prefix.matched = __prefix.first != __prefix.second;
562  // [28.12.1.4.5]
563  _M_match._M_begin = _M_begin;
564  }
565  else
566  _M_pregex = nullptr;
567  }
568  return *this;
569  }
570 
571  template<typename _Bi_iter,
572  typename _Ch_type,
573  typename _Rx_traits>
574  regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
576  operator=(const regex_token_iterator& __rhs)
577  {
578  _M_position = __rhs._M_position;
579  _M_subs = __rhs._M_subs;
580  _M_n = __rhs._M_n;
581  _M_suffix = __rhs._M_suffix;
582  _M_has_m1 = __rhs._M_has_m1;
583  _M_normalize_result();
584  return *this;
585  }
586 
587  template<typename _Bi_iter,
588  typename _Ch_type,
589  typename _Rx_traits>
590  bool
592  operator==(const regex_token_iterator& __rhs) const
593  {
594  if (_M_end_of_seq() && __rhs._M_end_of_seq())
595  return true;
596  if (_M_suffix.matched && __rhs._M_suffix.matched
597  && _M_suffix == __rhs._M_suffix)
598  return true;
599  if (_M_end_of_seq() || _M_suffix.matched
600  || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
601  return false;
602  return _M_position == __rhs._M_position
603  && _M_n == __rhs._M_n
604  && _M_subs == __rhs._M_subs;
605  }
606 
607  template<typename _Bi_iter,
608  typename _Ch_type,
609  typename _Rx_traits>
610  regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
612  operator++()
613  {
614  _Position __prev = _M_position;
615  if (_M_suffix.matched)
616  *this = regex_token_iterator();
617  else if (_M_n + 1 < _M_subs.size())
618  {
619  _M_n++;
620  _M_result = &_M_current_match();
621  }
622  else
623  {
624  _M_n = 0;
625  ++_M_position;
626  if (_M_position != _Position())
627  _M_result = &_M_current_match();
628  else if (_M_has_m1 && __prev->suffix().length() != 0)
629  {
630  _M_suffix.matched = true;
631  _M_suffix.first = __prev->suffix().first;
632  _M_suffix.second = __prev->suffix().second;
633  _M_result = &_M_suffix;
634  }
635  else
636  *this = regex_token_iterator();
637  }
638  return *this;
639  }
640 
641  template<typename _Bi_iter,
642  typename _Ch_type,
643  typename _Rx_traits>
644  void
645  regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
646  _M_init(_Bi_iter __a, _Bi_iter __b)
647  {
648  _M_has_m1 = false;
649  for (auto __it : _M_subs)
650  if (__it == -1)
651  {
652  _M_has_m1 = true;
653  break;
654  }
655  if (_M_position != _Position())
656  _M_result = &_M_current_match();
657  else if (_M_has_m1)
658  {
659  _M_suffix.matched = true;
660  _M_suffix.first = __a;
661  _M_suffix.second = __b;
662  _M_result = &_M_suffix;
663  }
664  else
665  _M_result = nullptr;
666  }
667 
668  /// @endcond
669 
670 _GLIBCXX_END_NAMESPACE_VERSION
671 } // namespace
_Out_iter __regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, const basic_regex< _Ch_type, _Rx_traits > &__e, const _Ch_type *__fmt, size_t __len, regex_constants::match_flag_type __flags)
Determines if there is a match between the regular expression e and all of the character sequence [fi...
bool regex_search(_Bi_iter __s, _Bi_iter __e, match_results< _Bi_iter, _Alloc > &__m, const basic_regex< _Ch_type, _Rx_traits > &__re, regex_constants::match_flag_type __flags=regex_constants::match_default)
Definition: regex.h:2358
ISO C++ entities toplevel namespace is std.
ios_base & oct(ios_base &__base)
Calls base.setf(ios_base::oct, ios_base::basefield).
Definition: ios_base.h:1071
ios_base & hex(ios_base &__base)
Calls base.setf(ios_base::hex, ios_base::basefield).
Definition: ios_base.h:1063
constexpr auto size(const _Container &__cont) noexcept(noexcept(__cont.size())) -> decltype(__cont.size())
Return the size of a container.
Definition: range_access.h:264
constexpr syntax_option_type __polynomial
constexpr match_flag_type format_first_only
constexpr match_flag_type match_continuous
match_flag_type
This is a bitmask type indicating regex matching rules.
constexpr match_flag_type match_prev_avail
constexpr match_flag_type format_sed
constexpr match_flag_type match_not_null
constexpr match_flag_type format_no_copy
Controlling input for std::string.
Definition: sstream:538
Primary class template ctype facet.
_Out_iter format(_Out_iter __out, const char_type *__fmt_first, const char_type *__fmt_last, match_flag_type __flags=regex_constants::format_default) const
string_type lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
Gets a collation element by name.
char_class_type lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase=false) const
Maps one or more characters to a named character classification.
int value(_Ch_type __ch, int __radix) const
Converts a digit to an int.
bool isctype(_Ch_type __c, char_class_type __f) const
Determines if c is a member of an identified class.
regex_iterator & operator++()
Increments a regex_iterator.
bool operator==(const regex_iterator &) const noexcept
Tests the equivalence of two regex iterators.
bool operator==(const regex_token_iterator &__rhs) const
Compares a regex_token_iterator to another for equality.
regex_token_iterator & operator++()
Increments a regex_token_iterator.
regex_token_iterator & operator=(const regex_token_iterator &__rhs)
Assigns a regex_token_iterator to another.