1  // class template regex -*- C++ -*-
       2  
       3  // Copyright (C) 2013-2023 Free Software Foundation, Inc.
       4  //
       5  // This file is part of the GNU ISO C++ Library.  This library is free
       6  // software; you can redistribute it and/or modify it under the
       7  // terms of the GNU General Public License as published by the
       8  // Free Software Foundation; either version 3, or (at your option)
       9  // any later version.
      10  
      11  // This library is distributed in the hope that it will be useful,
      12  // but WITHOUT ANY WARRANTY; without even the implied warranty of
      13  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14  // GNU General Public License for more details.
      15  
      16  // Under Section 7 of GPL version 3, you are granted additional
      17  // permissions described in the GCC Runtime Library Exception, version
      18  // 3.1, as published by the Free Software Foundation.
      19  
      20  // You should have received a copy of the GNU General Public License and
      21  // a copy of the GCC Runtime Library Exception along with this program;
      22  // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      23  // <http://www.gnu.org/licenses/>.
      24  
      25  /**
      26   *  @file bits/regex_executor.h
      27   *  This is an internal header file, included by other library headers.
      28   *  Do not attempt to use it directly. @headername{regex}
      29   */
      30  
      31  // FIXME convert comments to doxygen format.
      32  
      33  namespace std _GLIBCXX_VISIBILITY(default)
      34  {
      35  _GLIBCXX_BEGIN_NAMESPACE_VERSION
      36  
      37  namespace __detail
      38  {
      39    /**
      40     * @addtogroup regex-detail
      41     * @{
      42     */
      43  
      44    /**
      45     * @brief Takes a regex and an input string and does the matching.
      46     *
      47     * The %_Executor class has two modes: DFS mode and BFS mode, controlled
      48     * by the template parameter %__dfs_mode.
      49     */
      50    template<typename _BiIter, typename _Alloc, typename _TraitsT,
      51  	   bool __dfs_mode>
      52      class _Executor
      53      {
      54        using __search_mode = integral_constant<bool, __dfs_mode>;
      55        using __dfs = true_type;
      56        using __bfs = false_type;
      57  
      58        enum class _Match_mode : unsigned char { _Exact, _Prefix };
      59  
      60      public:
      61        typedef typename iterator_traits<_BiIter>::value_type _CharT;
      62        typedef basic_regex<_CharT, _TraitsT>                 _RegexT;
      63        typedef _GLIBCXX_STD_C::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
      64        typedef regex_constants::match_flag_type              _FlagT;
      65        typedef typename _TraitsT::char_class_type            _ClassT;
      66        typedef _NFA<_TraitsT>                                _NFAT;
      67  
      68      public:
      69        _Executor(_BiIter         __begin,
      70  		_BiIter         __end,
      71  		_ResultsVec&    __results,
      72  		const _RegexT&  __re,
      73  		_FlagT          __flags)
      74        : _M_cur_results(__results.get_allocator()),
      75  	_M_begin(__begin),
      76  	_M_end(__end),
      77  	_M_re(__re),
      78  	_M_nfa(*__re._M_automaton),
      79  	_M_results(__results),
      80  	_M_rep_count(_M_nfa.size()),
      81  	_M_states(_M_nfa._M_start(), _M_nfa.size()),
      82  	_M_flags(__flags)
      83        {
      84  	using namespace regex_constants;
      85  	if (__flags & match_prev_avail) // ignore not_bol and not_bow
      86  	  _M_flags &= ~(match_not_bol | match_not_bow);
      87        }
      88  
      89        // Set matched when string exactly matches the pattern.
      90        bool
      91        _M_match()
      92        {
      93  	_M_current = _M_begin;
      94  	return _M_main(_Match_mode::_Exact);
      95        }
      96  
      97        // Set matched when some prefix of the string matches the pattern.
      98        bool
      99        _M_search_from_first()
     100        {
     101  	_M_current = _M_begin;
     102  	return _M_main(_Match_mode::_Prefix);
     103        }
     104  
     105        bool
     106        _M_search();
     107  
     108      private:
     109        void
     110        _M_rep_once_more(_Match_mode __match_mode, _StateIdT);
     111  
     112        void
     113        _M_handle_repeat(_Match_mode, _StateIdT);
     114  
     115        void
     116        _M_handle_subexpr_begin(_Match_mode, _StateIdT);
     117  
     118        void
     119        _M_handle_subexpr_end(_Match_mode, _StateIdT);
     120  
     121        void
     122        _M_handle_line_begin_assertion(_Match_mode, _StateIdT);
     123  
     124        void
     125        _M_handle_line_end_assertion(_Match_mode, _StateIdT);
     126  
     127        void
     128        _M_handle_word_boundary(_Match_mode, _StateIdT);
     129  
     130        void
     131        _M_handle_subexpr_lookahead(_Match_mode, _StateIdT);
     132  
     133        void
     134        _M_handle_match(_Match_mode, _StateIdT);
     135  
     136        void
     137        _M_handle_backref(_Match_mode, _StateIdT);
     138  
     139        void
     140        _M_handle_accept(_Match_mode, _StateIdT);
     141  
     142        void
     143        _M_handle_alternative(_Match_mode, _StateIdT);
     144  
     145        void
     146        _M_dfs(_Match_mode __match_mode, _StateIdT __start);
     147  
     148        bool
     149        _M_main(_Match_mode __match_mode)
     150        { return _M_main_dispatch(__match_mode, __search_mode{}); }
     151  
     152        bool
     153        _M_main_dispatch(_Match_mode __match_mode, __dfs);
     154  
     155        bool
     156        _M_main_dispatch(_Match_mode __match_mode, __bfs);
     157  
     158        bool
     159        _M_is_word(_CharT __ch) const
     160        {
     161  	static const _CharT __s[2] = { 'w' };
     162  	return _M_re._M_automaton->_M_traits.isctype
     163  	  (__ch, _M_re._M_automaton->_M_traits.lookup_classname(__s, __s+1));
     164        }
     165  
     166        bool
     167        _M_at_begin() const
     168        {
     169  	if (_M_current == _M_begin)
     170  	  {
     171  	    // match_not_bol means ^ does not match [_M_begin,_M_begin)
     172  	    if (_M_flags & regex_constants::match_not_bol)
     173  	      return false;
     174  	    // match_prev_avail means _M_begin is not the start of the input.
     175  	    if (_M_flags & regex_constants::match_prev_avail)
     176  	      {
     177  		// For ECMAScript multiline matches, check if the previous
     178  		// character is a line terminator.
     179  		if (_M_match_multiline())
     180  		  return _M_is_line_terminator(*std::prev(_M_current));
     181  		else
     182  		  return false;
     183  	      }
     184  	    else // ^ matches at _M_begin
     185  	      return true;
     186  	  }
     187  	else if (_M_match_multiline())
     188  	  return _M_is_line_terminator(*std::prev(_M_current));
     189  	else
     190  	  return false;
     191        }
     192  
     193        bool
     194        _M_at_end() const
     195        {
     196  	if (_M_current == _M_end)
     197  	  return !(_M_flags & regex_constants::match_not_eol);
     198  	else if (_M_match_multiline())
     199  	  return _M_is_line_terminator(*_M_current);
     200  	else
     201  	  return false;
     202        }
     203  
     204        bool
     205        _M_word_boundary() const;
     206  
     207        bool
     208        _M_lookahead(_StateIdT __next);
     209  
     210        bool
     211        _M_is_line_terminator(_CharT __c) const
     212        {
     213  	const auto& __traits = _M_re._M_automaton->_M_traits;
     214  	const auto& __ct = use_facet<ctype<_CharT>>(__traits.getloc());
     215  	const char __n{ __ct.narrow(__c, ' ') };
     216  	if (__n == '\n')
     217  	  return true;
     218  	if (_M_re._M_automaton->_M_options() & regex_constants::ECMAScript)
     219  	  {
     220  	    if (__n == '\r')
     221  	      return true;
     222  	    // FIXME: U+2028 (line separator) and U+2029 (paragraph separator)
     223  	  }
     224  	return false;
     225        }
     226  
     227        bool
     228        _M_match_multiline() const noexcept
     229        {
     230  	constexpr auto __m
     231  	  = regex_constants::ECMAScript | regex_constants::__multiline;
     232  	return (_M_re._M_automaton->_M_options() & __m) == __m;
     233        }
     234  
     235         // Holds additional information used in BFS-mode.
     236        template<typename _SearchMode, typename _ResultsVec>
     237  	struct _State_info;
     238  
     239        template<typename _ResultsVec>
     240  	struct _State_info<__bfs, _ResultsVec>
     241  	{
     242  	  explicit
     243  	  _State_info(_StateIdT __start, size_t __n)
     244  	  : _M_visited_states(new bool[__n]()), _M_start(__start)
     245  	  { }
     246  
     247  	  ~_State_info() { delete[] _M_visited_states; }
     248  
     249  	  _State_info(const _State_info&) = delete;
     250  	  _State_info& operator=(const _State_info&) = delete;
     251  
     252  	  bool _M_visited(_StateIdT __i)
     253  	  {
     254  	    if (_M_visited_states[__i])
     255  	      return true;
     256  	    _M_visited_states[__i] = true;
     257  	    return false;
     258  	  }
     259  
     260  	  void _M_queue(_StateIdT __i, const _ResultsVec& __res)
     261  	  { _M_match_queue.emplace_back(__i, __res); }
     262  
     263  	  // Dummy implementations for BFS mode.
     264  	  _BiIter* _M_get_sol_pos() { return nullptr; }
     265  
     266  	  // Saves states that need to be considered for the next character.
     267  	  _GLIBCXX_STD_C::vector<pair<_StateIdT, _ResultsVec>> _M_match_queue;
     268  	  // Indicates which states are already visited.
     269  	  bool*     _M_visited_states;
     270  	  // To record current solution.
     271  	  _StateIdT _M_start;
     272  	};
     273  
     274        template<typename _ResultsVec>
     275  	struct _State_info<__dfs, _ResultsVec>
     276  	{
     277  	  explicit
     278  	  _State_info(_StateIdT __start, size_t) : _M_start(__start)
     279  	  { }
     280  
     281  	  // Dummy implementations for DFS mode.
     282  	  bool _M_visited(_StateIdT) const { return false; }
     283  	  void _M_queue(_StateIdT, const _ResultsVec&) { }
     284  
     285  	  _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
     286  
     287  	  // To record current solution.
     288  	  _StateIdT _M_start;
     289  	  _BiIter   _M_sol_pos;
     290  	};
     291  
     292      public:
     293        _ResultsVec                                           _M_cur_results;
     294        _BiIter                                               _M_current;
     295        _BiIter                                               _M_begin;
     296        const _BiIter                                         _M_end;
     297        const _RegexT&                                        _M_re;
     298        const _NFAT&                                          _M_nfa;
     299        _ResultsVec&                                          _M_results;
     300        _GLIBCXX_STD_C::vector<pair<_BiIter, int>>            _M_rep_count;
     301        _State_info<__search_mode, _ResultsVec>		    _M_states;
     302        _FlagT                                                _M_flags;
     303        // Do we have a solution so far?
     304        bool                                                  _M_has_sol;
     305      };
     306  
     307   ///@} regex-detail
     308  } // namespace __detail
     309  _GLIBCXX_END_NAMESPACE_VERSION
     310  } // namespace std
     311  
     312  #include <bits/regex_executor.tcc>