(root)/
gcc-13.2.0/
libstdc++-v3/
include/
bits/
regex_scanner.h
       1  // class template regex -*- C++ -*-
       2  
       3  // Copyright (C) 2013-2023 Free Software Foundation, Inc.
       4  //
       5  // This file is part of the GNU ISO C++ Library.  This library is free
       6  // software; you can redistribute it and/or modify it under the
       7  // terms of the GNU General Public License as published by the
       8  // Free Software Foundation; either version 3, or (at your option)
       9  // any later version.
      10  
      11  // This library is distributed in the hope that it will be useful,
      12  // but WITHOUT ANY WARRANTY; without even the implied warranty of
      13  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14  // GNU General Public License for more details.
      15  
      16  // Under Section 7 of GPL version 3, you are granted additional
      17  // permissions described in the GCC Runtime Library Exception, version
      18  // 3.1, as published by the Free Software Foundation.
      19  
      20  // You should have received a copy of the GNU General Public License and
      21  // a copy of the GCC Runtime Library Exception along with this program;
      22  // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      23  // <http://www.gnu.org/licenses/>.
      24  
      25  /**
      26   *  @file bits/regex_scanner.h
      27   *  This is an internal header file, included by other library headers.
      28   *  Do not attempt to use it directly. @headername{regex}
      29   */
      30  
      31  namespace std _GLIBCXX_VISIBILITY(default)
      32  {
      33  _GLIBCXX_BEGIN_NAMESPACE_VERSION
      34  
      35  namespace __detail
      36  {
      37    /**
      38     * @addtogroup regex-detail
      39     * @{
      40     */
      41  
      42    struct _ScannerBase
      43    {
      44    public:
      45      /// Token types returned from the scanner.
      46      enum _TokenT : unsigned
      47      {
      48        _S_token_anychar,
      49        _S_token_ord_char,
      50        _S_token_oct_num,
      51        _S_token_hex_num,
      52        _S_token_backref,
      53        _S_token_subexpr_begin,
      54        _S_token_subexpr_no_group_begin,
      55        _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
      56        _S_token_subexpr_end,
      57        _S_token_bracket_begin,
      58        _S_token_bracket_neg_begin,
      59        _S_token_bracket_end,
      60        _S_token_interval_begin,
      61        _S_token_interval_end,
      62        _S_token_quoted_class,
      63        _S_token_char_class_name,
      64        _S_token_collsymbol,
      65        _S_token_equiv_class_name,
      66        _S_token_opt,
      67        _S_token_or,
      68        _S_token_closure0,
      69        _S_token_closure1,
      70        _S_token_line_begin,
      71        _S_token_line_end,
      72        _S_token_word_bound, // neg if _M_value[0] == 'n'
      73        _S_token_comma,
      74        _S_token_dup_count,
      75        _S_token_eof,
      76        _S_token_bracket_dash,
      77        _S_token_unknown = -1u
      78      };
      79  
      80    protected:
      81      typedef regex_constants::syntax_option_type _FlagT;
      82  
      83      enum _StateT
      84      {
      85        _S_state_normal,
      86        _S_state_in_brace,
      87        _S_state_in_bracket,
      88      };
      89  
      90    protected:
      91      _ScannerBase(_FlagT __flags)
      92      : _M_state(_S_state_normal),
      93      _M_flags(__flags),
      94      _M_escape_tbl(_M_is_ecma()
      95  		  ? _M_ecma_escape_tbl
      96  		  : _M_awk_escape_tbl),
      97      _M_spec_char(_M_is_ecma()
      98  		 ? _M_ecma_spec_char
      99  		 : _M_flags & regex_constants::basic
     100  		 ? _M_basic_spec_char
     101  		 : _M_flags & regex_constants::extended
     102  		 ? _M_extended_spec_char
     103  		 : _M_flags & regex_constants::grep
     104  		 ?  ".[\\*^$\n"
     105  		 : _M_flags & regex_constants::egrep
     106  		 ? ".[\\()*+?{|^$\n"
     107  		 : _M_flags & regex_constants::awk
     108  		 ? _M_extended_spec_char
     109  		 : nullptr),
     110      _M_at_bracket_start(false)
     111      { __glibcxx_assert(_M_spec_char); }
     112  
     113    protected:
     114      const char*
     115      _M_find_escape(char __c)
     116      {
     117        auto __it = _M_escape_tbl;
     118        for (; __it->first != '\0'; ++__it)
     119  	if (__it->first == __c)
     120  	  return &__it->second;
     121        return nullptr;
     122      }
     123  
     124      bool
     125      _M_is_ecma() const
     126      { return _M_flags & regex_constants::ECMAScript; }
     127  
     128      bool
     129      _M_is_basic() const
     130      { return _M_flags & (regex_constants::basic | regex_constants::grep); }
     131  
     132      bool
     133      _M_is_extended() const
     134      {
     135        return _M_flags & (regex_constants::extended
     136  			 | regex_constants::egrep
     137  			 | regex_constants::awk);
     138      }
     139  
     140      bool
     141      _M_is_grep() const
     142      { return _M_flags & (regex_constants::grep | regex_constants::egrep); }
     143  
     144      bool
     145      _M_is_awk() const
     146      { return _M_flags & regex_constants::awk; }
     147  
     148    protected:
     149      // TODO: Make them static in the next abi change.
     150      const std::pair<char, _TokenT> _M_token_tbl[9] =
     151        {
     152  	{'^', _S_token_line_begin},
     153  	{'$', _S_token_line_end},
     154  	{'.', _S_token_anychar},
     155  	{'*', _S_token_closure0},
     156  	{'+', _S_token_closure1},
     157  	{'?', _S_token_opt},
     158  	{'|', _S_token_or},
     159  	{'\n', _S_token_or}, // grep and egrep
     160  	{'\0', _S_token_or},
     161        };
     162      const std::pair<char, char> _M_ecma_escape_tbl[8] =
     163        {
     164  	{'0', '\0'},
     165  	{'b', '\b'},
     166  	{'f', '\f'},
     167  	{'n', '\n'},
     168  	{'r', '\r'},
     169  	{'t', '\t'},
     170  	{'v', '\v'},
     171  	{'\0', '\0'},
     172        };
     173      const std::pair<char, char> _M_awk_escape_tbl[11] =
     174        {
     175  	{'"', '"'},
     176  	{'/', '/'},
     177  	{'\\', '\\'},
     178  	{'a', '\a'},
     179  	{'b', '\b'},
     180  	{'f', '\f'},
     181  	{'n', '\n'},
     182  	{'r', '\r'},
     183  	{'t', '\t'},
     184  	{'v', '\v'},
     185  	{'\0', '\0'},
     186        };
     187      const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|";
     188      const char* _M_basic_spec_char = ".[\\*^$";
     189      const char* _M_extended_spec_char = ".[\\()*+?{|^$";
     190  
     191      _StateT                       _M_state;
     192      _FlagT                        _M_flags;
     193      _TokenT                       _M_token;
     194      const std::pair<char, char>*  _M_escape_tbl;
     195      const char*                   _M_spec_char;
     196      bool                          _M_at_bracket_start;
     197    };
     198  
     199    /**
     200     * @brief Scans an input range for regex tokens.
     201     *
     202     * The %_Scanner class interprets the regular expression pattern in
     203     * the input range passed to its constructor as a sequence of parse
     204     * tokens passed to the regular expression compiler.  The sequence
     205     * of tokens provided depends on the flag settings passed to the
     206     * constructor: different regular expression grammars will interpret
     207     * the same input pattern in syntactically different ways.
     208     */
     209    template<typename _CharT>
     210      class _Scanner
     211      : public _ScannerBase
     212      {
     213      public:
     214        typedef std::basic_string<_CharT>                           _StringT;
     215        typedef regex_constants::syntax_option_type                 _FlagT;
     216        typedef const std::ctype<_CharT>                            _CtypeT;
     217  
     218        _Scanner(const _CharT* __begin, const _CharT* __end,
     219  	       _FlagT __flags, std::locale __loc);
     220  
     221        void
     222        _M_advance();
     223  
     224        _TokenT
     225        _M_get_token() const noexcept
     226        { return _M_token; }
     227  
     228        const _StringT&
     229        _M_get_value() const noexcept
     230        { return _M_value; }
     231  
     232  #ifdef _GLIBCXX_DEBUG
     233        std::ostream&
     234        _M_print(std::ostream&);
     235  #endif
     236  
     237      private:
     238        void
     239        _M_scan_normal();
     240  
     241        void
     242        _M_scan_in_bracket();
     243  
     244        void
     245        _M_scan_in_brace();
     246  
     247        void
     248        _M_eat_escape_ecma();
     249  
     250        void
     251        _M_eat_escape_posix();
     252  
     253        void
     254        _M_eat_escape_awk();
     255  
     256        void
     257        _M_eat_class(char);
     258  
     259        const _CharT*                 _M_current;
     260        const _CharT*                 _M_end;
     261        _CtypeT&                      _M_ctype;
     262        _StringT                      _M_value;
     263        void (_Scanner::* _M_eat_escape)();
     264      };
     265  
     266   ///@} regex-detail
     267  } // namespace __detail
     268  _GLIBCXX_END_NAMESPACE_VERSION
     269  } // namespace std
     270  
     271  #include <bits/regex_scanner.tcc>