(root)/
gcc-13.2.0/
libstdc++-v3/
include/
bits/
regex_compiler.h
       1  // class template regex -*- C++ -*-
       2  
       3  // Copyright (C) 2010-2023 Free Software Foundation, Inc.
       4  //
       5  // This file is part of the GNU ISO C++ Library.  This library is free
       6  // software; you can redistribute it and/or modify it under the
       7  // terms of the GNU General Public License as published by the
       8  // Free Software Foundation; either version 3, or (at your option)
       9  // any later version.
      10  
      11  // This library is distributed in the hope that it will be useful,
      12  // but WITHOUT ANY WARRANTY; without even the implied warranty of
      13  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14  // GNU General Public License for more details.
      15  
      16  // Under Section 7 of GPL version 3, you are granted additional
      17  // permissions described in the GCC Runtime Library Exception, version
      18  // 3.1, as published by the Free Software Foundation.
      19  
      20  // You should have received a copy of the GNU General Public License and
      21  // a copy of the GCC Runtime Library Exception along with this program;
      22  // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      23  // <http://www.gnu.org/licenses/>.
      24  
      25  /**
      26   *  @file bits/regex_compiler.h
      27   *  This is an internal header file, included by other library headers.
      28   *  Do not attempt to use it directly. @headername{regex}
      29   */
      30  
      31  namespace std _GLIBCXX_VISIBILITY(default)
      32  {
      33  _GLIBCXX_BEGIN_NAMESPACE_VERSION
      34  _GLIBCXX_BEGIN_NAMESPACE_CXX11
      35  
      36    template<typename>
      37      class regex_traits;
      38  
      39  _GLIBCXX_END_NAMESPACE_CXX11
      40  
      41  namespace __detail
      42  {
      43    /**
      44     * @addtogroup regex-detail
      45     * @{
      46     */
      47  
      48    template<typename, bool, bool>
      49      struct _BracketMatcher;
      50  
      51    /**
      52     * @brief Builds an NFA from an input iterator range.
      53     *
      54     * The %_TraitsT type should fulfill requirements [28.3].
      55     */
      56    template<typename _TraitsT>
      57      class _Compiler
      58      {
      59      public:
      60        typedef typename _TraitsT::char_type        _CharT;
      61        typedef _NFA<_TraitsT>              	  _RegexT;
      62        typedef regex_constants::syntax_option_type _FlagT;
      63  
      64        _Compiler(const _CharT* __b, const _CharT* __e,
      65  		const typename _TraitsT::locale_type& __traits, _FlagT __flags);
      66  
      67        shared_ptr<const _RegexT>
      68        _M_get_nfa() noexcept
      69        { return std::move(_M_nfa); }
      70  
      71      private:
      72        typedef _Scanner<_CharT>               _ScannerT;
      73        typedef typename _TraitsT::string_type _StringT;
      74        typedef typename _ScannerT::_TokenT    _TokenT;
      75        typedef _StateSeq<_TraitsT>            _StateSeqT;
      76        typedef std::stack<_StateSeqT>         _StackT;
      77        typedef std::ctype<_CharT>             _CtypeT;
      78  
      79        // accepts a specific token or returns false.
      80        bool
      81        _M_match_token(_TokenT __token);
      82  
      83        void
      84        _M_disjunction();
      85  
      86        void
      87        _M_alternative();
      88  
      89        bool
      90        _M_term();
      91  
      92        bool
      93        _M_assertion();
      94  
      95        bool
      96        _M_quantifier();
      97  
      98        bool
      99        _M_atom();
     100  
     101        bool
     102        _M_bracket_expression();
     103  
     104        template<bool __icase, bool __collate>
     105  	void
     106  	_M_insert_any_matcher_ecma();
     107  
     108        template<bool __icase, bool __collate>
     109  	void
     110  	_M_insert_any_matcher_posix();
     111  
     112        template<bool __icase, bool __collate>
     113  	void
     114  	_M_insert_char_matcher();
     115  
     116        template<bool __icase, bool __collate>
     117  	void
     118  	_M_insert_character_class_matcher();
     119  
     120        template<bool __icase, bool __collate>
     121  	void
     122  	_M_insert_bracket_matcher(bool __neg);
     123  
     124        // Cache of the last atom seen in a bracketed range expression.
     125        struct _BracketState
     126        {
     127  	enum class _Type : char { _None, _Char, _Class } _M_type = _Type::_None;
     128  	_CharT _M_char = _CharT();
     129  
     130  	void
     131  	set(_CharT __c) noexcept { _M_type = _Type::_Char; _M_char = __c; }
     132  
     133  	_GLIBCXX_NODISCARD _CharT
     134  	get() const noexcept { return _M_char; }
     135  
     136  	void
     137  	reset(_Type __t = _Type::_None) noexcept { _M_type = __t; }
     138  
     139  	explicit operator bool() const noexcept
     140  	{ return _M_type != _Type::_None; }
     141  
     142  	// Previous token was a single character.
     143  	_GLIBCXX_NODISCARD bool
     144  	_M_is_char() const noexcept { return _M_type == _Type::_Char; }
     145  
     146  	// Previous token was a character class, equivalent class,
     147  	// collating symbol etc.
     148  	_GLIBCXX_NODISCARD bool
     149  	_M_is_class() const noexcept { return _M_type == _Type::_Class; }
     150        };
     151  
     152        template<bool __icase, bool __collate>
     153  	using _BracketMatcher
     154  	  = std::__detail::_BracketMatcher<_TraitsT, __icase, __collate>;
     155  
     156        // Returns true if successfully parsed one term and should continue
     157        // compiling a bracket expression.
     158        // Returns false if the compiler should move on.
     159        template<bool __icase, bool __collate>
     160  	bool
     161  	_M_expression_term(_BracketState& __last_char,
     162  			   _BracketMatcher<__icase, __collate>& __matcher);
     163  
     164        int
     165        _M_cur_int_value(int __radix);
     166  
     167        bool
     168        _M_try_char();
     169  
     170        _StateSeqT
     171        _M_pop()
     172        {
     173  	auto ret = _M_stack.top();
     174  	_M_stack.pop();
     175  	return ret;
     176        }
     177  
     178        static _FlagT
     179        _S_validate(_FlagT __f)
     180        {
     181  	using namespace regex_constants;
     182  	switch (__f & (ECMAScript|basic|extended|awk|grep|egrep))
     183  	  {
     184  	  case ECMAScript:
     185  	  case basic:
     186  	  case extended:
     187  	  case awk:
     188  	  case grep:
     189  	  case egrep:
     190  	    return __f;
     191  	  case _FlagT(0):
     192  	    return __f | ECMAScript;
     193  	  default:
     194  	    std::__throw_regex_error(_S_grammar, "conflicting grammar options");
     195  	  }
     196        }
     197  
     198        _FlagT              _M_flags;
     199        _ScannerT           _M_scanner;
     200        shared_ptr<_RegexT> _M_nfa;
     201        _StringT            _M_value;
     202        _StackT             _M_stack;
     203        const _TraitsT&     _M_traits;
     204        const _CtypeT&      _M_ctype;
     205      };
     206  
     207    // [28.13.14]
     208    template<typename _TraitsT, bool __icase, bool __collate>
     209      class _RegexTranslatorBase
     210      {
     211      public:
     212        typedef typename _TraitsT::char_type	      _CharT;
     213        typedef typename _TraitsT::string_type	      _StringT;
     214        typedef _StringT _StrTransT;
     215  
     216        explicit
     217        _RegexTranslatorBase(const _TraitsT& __traits)
     218        : _M_traits(__traits)
     219        { }
     220  
     221        _CharT
     222        _M_translate(_CharT __ch) const
     223        {
     224  	if _GLIBCXX17_CONSTEXPR (__icase)
     225  	  return _M_traits.translate_nocase(__ch);
     226  	else if _GLIBCXX17_CONSTEXPR (__collate)
     227  	  return _M_traits.translate(__ch);
     228  	else
     229  	  return __ch;
     230        }
     231  
     232        _StrTransT
     233        _M_transform(_CharT __ch) const
     234        {
     235  	_StrTransT __str(1, __ch);
     236  	return _M_traits.transform(__str.begin(), __str.end());
     237        }
     238  
     239        // See LWG 523. It's not efficiently implementable when _TraitsT is not
     240        // std::regex_traits<>, and __collate is true. See specializations for
     241        // implementations of other cases.
     242        bool
     243        _M_match_range(const _StrTransT& __first, const _StrTransT& __last,
     244  		     const _StrTransT& __s) const
     245        { return __first <= __s && __s <= __last; }
     246  
     247      protected:
     248        bool _M_in_range_icase(_CharT __first, _CharT __last, _CharT __ch) const
     249        {
     250  	typedef std::ctype<_CharT> __ctype_type;
     251  	const auto& __fctyp = use_facet<__ctype_type>(this->_M_traits.getloc());
     252  	auto __lower = __fctyp.tolower(__ch);
     253  	auto __upper = __fctyp.toupper(__ch);
     254  	return (__first <= __lower && __lower <= __last)
     255  	  || (__first <= __upper && __upper <= __last);
     256        }
     257  
     258        const _TraitsT& _M_traits;
     259      };
     260  
     261    template<typename _TraitsT, bool __icase, bool __collate>
     262      class _RegexTranslator
     263      : public _RegexTranslatorBase<_TraitsT, __icase, __collate>
     264      {
     265      public:
     266        typedef _RegexTranslatorBase<_TraitsT, __icase, __collate> _Base;
     267        using _Base::_Base;
     268      };
     269  
     270    template<typename _TraitsT, bool __icase>
     271      class _RegexTranslator<_TraitsT, __icase, false>
     272      : public _RegexTranslatorBase<_TraitsT, __icase, false>
     273      {
     274      public:
     275        typedef _RegexTranslatorBase<_TraitsT, __icase, false> _Base;
     276        typedef typename _Base::_CharT _CharT;
     277        typedef _CharT _StrTransT;
     278  
     279        using _Base::_Base;
     280  
     281        _StrTransT
     282        _M_transform(_CharT __ch) const
     283        { return __ch; }
     284  
     285        bool
     286        _M_match_range(_CharT __first, _CharT __last, _CharT __ch) const
     287        {
     288  	if _GLIBCXX17_CONSTEXPR (!__icase)
     289  	  return __first <= __ch && __ch <= __last;
     290  	else
     291  	  return this->_M_in_range_icase(__first, __last, __ch);
     292        }
     293      };
     294  
     295    template<typename _CharType>
     296      class _RegexTranslator<std::regex_traits<_CharType>, true, true>
     297      : public _RegexTranslatorBase<std::regex_traits<_CharType>, true, true>
     298      {
     299      public:
     300        typedef _RegexTranslatorBase<std::regex_traits<_CharType>, true, true>
     301  	_Base;
     302        typedef typename _Base::_CharT _CharT;
     303        typedef typename _Base::_StrTransT _StrTransT;
     304  
     305        using _Base::_Base;
     306  
     307        bool
     308        _M_match_range(const _StrTransT& __first, const _StrTransT& __last,
     309  		     const _StrTransT& __str) const
     310        {
     311  	__glibcxx_assert(__first.size() == 1);
     312  	__glibcxx_assert(__last.size() == 1);
     313  	__glibcxx_assert(__str.size() == 1);
     314  	return this->_M_in_range_icase(__first[0], __last[0], __str[0]);
     315        }
     316      };
     317  
     318    template<typename _TraitsT>
     319      class _RegexTranslator<_TraitsT, false, false>
     320      {
     321      public:
     322        typedef typename _TraitsT::char_type _CharT;
     323        typedef _CharT                       _StrTransT;
     324  
     325        explicit
     326        _RegexTranslator(const _TraitsT&)
     327        { }
     328  
     329        _CharT
     330        _M_translate(_CharT __ch) const
     331        { return __ch; }
     332  
     333        _StrTransT
     334        _M_transform(_CharT __ch) const
     335        { return __ch; }
     336  
     337        bool
     338        _M_match_range(_CharT __first, _CharT __last, _CharT __ch) const
     339        { return __first <= __ch && __ch <= __last; }
     340      };
     341  
     342    template<typename _TraitsT, bool __is_ecma, bool __icase, bool __collate>
     343      struct _AnyMatcher;
     344  
     345    template<typename _TraitsT, bool __icase, bool __collate>
     346      struct _AnyMatcher<_TraitsT, false, __icase, __collate>
     347      {
     348        typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
     349        typedef typename _TransT::_CharT                       _CharT;
     350  
     351        explicit
     352        _AnyMatcher(const _TraitsT& __traits)
     353        : _M_translator(__traits)
     354        { }
     355  
     356        bool
     357        operator()(_CharT __ch) const
     358        {
     359  	static auto __nul = _M_translator._M_translate('\0');
     360  	return _M_translator._M_translate(__ch) != __nul;
     361        }
     362  
     363        _TransT _M_translator;
     364      };
     365  
     366    template<typename _TraitsT, bool __icase, bool __collate>
     367      struct _AnyMatcher<_TraitsT, true, __icase, __collate>
     368      {
     369        typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
     370        typedef typename _TransT::_CharT                       _CharT;
     371  
     372        explicit
     373        _AnyMatcher(const _TraitsT& __traits)
     374        : _M_translator(__traits)
     375        { }
     376  
     377        bool
     378        operator()(_CharT __ch) const
     379        { return _M_apply(__ch, typename is_same<_CharT, char>::type()); }
     380  
     381        bool
     382        _M_apply(_CharT __ch, true_type) const
     383        {
     384  	auto __c = _M_translator._M_translate(__ch);
     385  	auto __n = _M_translator._M_translate('\n');
     386  	auto __r = _M_translator._M_translate('\r');
     387  	return __c != __n && __c != __r;
     388        }
     389  
     390        bool
     391        _M_apply(_CharT __ch, false_type) const
     392        {
     393  	auto __c = _M_translator._M_translate(__ch);
     394  	auto __n = _M_translator._M_translate('\n');
     395  	auto __r = _M_translator._M_translate('\r');
     396  	auto __u2028 = _M_translator._M_translate(u'\u2028');
     397  	auto __u2029 = _M_translator._M_translate(u'\u2029');
     398  	return __c != __n && __c != __r && __c != __u2028 && __c != __u2029;
     399        }
     400  
     401        _TransT _M_translator;
     402      };
     403  
     404    template<typename _TraitsT, bool __icase, bool __collate>
     405      struct _CharMatcher
     406      {
     407        typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
     408        typedef typename _TransT::_CharT                       _CharT;
     409  
     410        _CharMatcher(_CharT __ch, const _TraitsT& __traits)
     411        : _M_translator(__traits), _M_ch(_M_translator._M_translate(__ch))
     412        { }
     413  
     414        bool
     415        operator()(_CharT __ch) const
     416        { return _M_ch == _M_translator._M_translate(__ch); }
     417  
     418        _TransT _M_translator;
     419        _CharT  _M_ch;
     420      };
     421  
     422    /// Matches a character range (bracket expression)
     423    template<typename _TraitsT, bool __icase, bool __collate>
     424      struct _BracketMatcher
     425      {
     426      public:
     427        typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
     428        typedef typename _TransT::_CharT                       _CharT;
     429        typedef typename _TransT::_StrTransT                   _StrTransT;
     430        typedef typename _TraitsT::string_type                 _StringT;
     431        typedef typename _TraitsT::char_class_type             _CharClassT;
     432  
     433      public:
     434        _BracketMatcher(bool __is_non_matching,
     435  		      const _TraitsT& __traits)
     436        : _M_class_set(0), _M_translator(__traits), _M_traits(__traits),
     437        _M_is_non_matching(__is_non_matching)
     438        { }
     439  
     440        bool
     441        operator()(_CharT __ch) const
     442        {
     443  	_GLIBCXX_DEBUG_ASSERT(_M_is_ready);
     444  	return _M_apply(__ch, _UseCache());
     445        }
     446  
     447        void
     448        _M_add_char(_CharT __c)
     449        {
     450  	_M_char_set.push_back(_M_translator._M_translate(__c));
     451  	_GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
     452        }
     453  
     454        _StringT
     455        _M_add_collate_element(const _StringT& __s)
     456        {
     457  	auto __st = _M_traits.lookup_collatename(__s.data(),
     458  						 __s.data() + __s.size());
     459  	if (__st.empty())
     460  	  __throw_regex_error(regex_constants::error_collate,
     461  			      "Invalid collate element.");
     462  	_M_char_set.push_back(_M_translator._M_translate(__st[0]));
     463  	_GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
     464  	return __st;
     465        }
     466  
     467        void
     468        _M_add_equivalence_class(const _StringT& __s)
     469        {
     470  	auto __st = _M_traits.lookup_collatename(__s.data(),
     471  						 __s.data() + __s.size());
     472  	if (__st.empty())
     473  	  __throw_regex_error(regex_constants::error_collate,
     474  			      "Invalid equivalence class.");
     475  	__st = _M_traits.transform_primary(__st.data(),
     476  					   __st.data() + __st.size());
     477  	_M_equiv_set.push_back(__st);
     478  	_GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
     479        }
     480  
     481        // __neg should be true for \D, \S and \W only.
     482        void
     483        _M_add_character_class(const _StringT& __s, bool __neg)
     484        {
     485  	auto __mask = _M_traits.lookup_classname(__s.data(),
     486  						 __s.data() + __s.size(),
     487  						 __icase);
     488  	if (__mask == 0)
     489  	  __throw_regex_error(regex_constants::error_collate,
     490  			      "Invalid character class.");
     491  	if (!__neg)
     492  	  _M_class_set |= __mask;
     493  	else
     494  	  _M_neg_class_set.push_back(__mask);
     495  	_GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
     496        }
     497  
     498        void
     499        _M_make_range(_CharT __l, _CharT __r)
     500        {
     501  	if (__l > __r)
     502  	  __throw_regex_error(regex_constants::error_range,
     503  			      "Invalid range in bracket expression.");
     504  	_M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
     505  					 _M_translator._M_transform(__r)));
     506  	_GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
     507        }
     508  
     509        void
     510        _M_ready()
     511        {
     512  	std::sort(_M_char_set.begin(), _M_char_set.end());
     513  	auto __end = std::unique(_M_char_set.begin(), _M_char_set.end());
     514  	_M_char_set.erase(__end, _M_char_set.end());
     515  	_M_make_cache(_UseCache());
     516  	_GLIBCXX_DEBUG_ONLY(_M_is_ready = true);
     517        }
     518  
     519      private:
     520        // Currently we only use the cache for char
     521        using _UseCache = typename std::is_same<_CharT, char>::type;
     522  
     523        static constexpr size_t
     524        _S_cache_size =
     525  	1ul << (sizeof(_CharT) * __CHAR_BIT__ * int(_UseCache::value));
     526  
     527        struct _Dummy { };
     528        using _CacheT = std::__conditional_t<_UseCache::value,
     529  					   std::bitset<_S_cache_size>,
     530  					   _Dummy>;
     531        using _UnsignedCharT = typename std::make_unsigned<_CharT>::type;
     532  
     533        bool
     534        _M_apply(_CharT __ch, false_type) const;
     535  
     536        bool
     537        _M_apply(_CharT __ch, true_type) const
     538        { return _M_cache[static_cast<_UnsignedCharT>(__ch)]; }
     539  
     540        void
     541        _M_make_cache(true_type)
     542        {
     543  	for (unsigned __i = 0; __i < _M_cache.size(); __i++)
     544  	  _M_cache[__i] = _M_apply(static_cast<_CharT>(__i), false_type());
     545        }
     546  
     547        void
     548        _M_make_cache(false_type)
     549        { }
     550  
     551      private:
     552        _GLIBCXX_STD_C::vector<_CharT>            _M_char_set;
     553        _GLIBCXX_STD_C::vector<_StringT>          _M_equiv_set;
     554        _GLIBCXX_STD_C::vector<pair<_StrTransT, _StrTransT>> _M_range_set;
     555        _GLIBCXX_STD_C::vector<_CharClassT>       _M_neg_class_set;
     556        _CharClassT                               _M_class_set;
     557        _TransT                                   _M_translator;
     558        const _TraitsT&                           _M_traits;
     559        bool                                      _M_is_non_matching;
     560        _CacheT					_M_cache;
     561  #ifdef _GLIBCXX_DEBUG
     562        bool                                      _M_is_ready = false;
     563  #endif
     564      };
     565  
     566   ///@} regex-detail
     567  } // namespace __detail
     568  _GLIBCXX_END_NAMESPACE_VERSION
     569  } // namespace std
     570  
     571  #include <bits/regex_compiler.tcc>