(root)/
sed-4.9/
lib/
regexec.c
       1  /* Extended regular expression matching and search library.
       2     Copyright (C) 2002-2022 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4     Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
       5  
       6     The GNU C Library is free software; you can redistribute it and/or
       7     modify it under the terms of the GNU Lesser General Public
       8     License as published by the Free Software Foundation; either
       9     version 2.1 of the License, or (at your option) any later version.
      10  
      11     The GNU C Library is distributed in the hope that it will be useful,
      12     but WITHOUT ANY WARRANTY; without even the implied warranty of
      13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      14     Lesser General Public License for more details.
      15  
      16     You should have received a copy of the GNU Lesser General Public
      17     License along with the GNU C Library; if not, see
      18     <https://www.gnu.org/licenses/>.  */
      19  
      20  static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
      21  				     Idx n);
      22  static void match_ctx_clean (re_match_context_t *mctx);
      23  static void match_ctx_free (re_match_context_t *cache);
      24  static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
      25  					  Idx str_idx, Idx from, Idx to);
      26  static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx);
      27  static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
      28  					   Idx str_idx);
      29  static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
      30  						    Idx node, Idx str_idx);
      31  static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
      32  			   re_dfastate_t **limited_sts, Idx last_node,
      33  			   Idx last_str_idx);
      34  static reg_errcode_t re_search_internal (const regex_t *preg,
      35  					 const char *string, Idx length,
      36  					 Idx start, Idx last_start, Idx stop,
      37  					 size_t nmatch, regmatch_t pmatch[],
      38  					 int eflags);
      39  static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
      40  				  const char *string1, Idx length1,
      41  				  const char *string2, Idx length2,
      42  				  Idx start, regoff_t range,
      43  				  struct re_registers *regs,
      44  				  Idx stop, bool ret_len);
      45  static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
      46  				const char *string, Idx length, Idx start,
      47  				regoff_t range, Idx stop,
      48  				struct re_registers *regs,
      49  				bool ret_len);
      50  static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
      51                                Idx nregs, int regs_allocated);
      52  static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
      53  static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
      54  			   Idx *p_match_first);
      55  static Idx check_halt_state_context (const re_match_context_t *mctx,
      56  				     const re_dfastate_t *state, Idx idx);
      57  static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
      58  			 regmatch_t *prev_idx_match, Idx cur_node,
      59  			 Idx cur_idx, Idx nmatch);
      60  static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
      61  				      Idx str_idx, Idx dest_node, Idx nregs,
      62  				      regmatch_t *regs, regmatch_t *prevregs,
      63  				      re_node_set *eps_via_nodes);
      64  static reg_errcode_t set_regs (const regex_t *preg,
      65  			       const re_match_context_t *mctx,
      66  			       size_t nmatch, regmatch_t *pmatch,
      67  			       bool fl_backtrack);
      68  static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
      69  
      70  static int sift_states_iter_mb (const re_match_context_t *mctx,
      71  				re_sift_context_t *sctx,
      72  				Idx node_idx, Idx str_idx, Idx max_str_idx);
      73  static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
      74  					   re_sift_context_t *sctx);
      75  static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
      76  					  re_sift_context_t *sctx, Idx str_idx,
      77  					  re_node_set *cur_dest);
      78  static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
      79  					      re_sift_context_t *sctx,
      80  					      Idx str_idx,
      81  					      re_node_set *dest_nodes);
      82  static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
      83  					    re_node_set *dest_nodes,
      84  					    const re_node_set *candidates);
      85  static bool check_dst_limits (const re_match_context_t *mctx,
      86  			      const re_node_set *limits,
      87  			      Idx dst_node, Idx dst_idx, Idx src_node,
      88  			      Idx src_idx);
      89  static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
      90  					int boundaries, Idx subexp_idx,
      91  					Idx from_node, Idx bkref_idx);
      92  static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
      93  				      Idx limit, Idx subexp_idx,
      94  				      Idx node, Idx str_idx,
      95  				      Idx bkref_idx);
      96  static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
      97  					  re_node_set *dest_nodes,
      98  					  const re_node_set *candidates,
      99  					  re_node_set *limits,
     100  					  struct re_backref_cache_entry *bkref_ents,
     101  					  Idx str_idx);
     102  static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
     103  					re_sift_context_t *sctx,
     104  					Idx str_idx, const re_node_set *candidates);
     105  static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
     106  					re_dfastate_t **dst,
     107  					re_dfastate_t **src, Idx num);
     108  static re_dfastate_t *find_recover_state (reg_errcode_t *err,
     109  					 re_match_context_t *mctx);
     110  static re_dfastate_t *transit_state (reg_errcode_t *err,
     111  				     re_match_context_t *mctx,
     112  				     re_dfastate_t *state);
     113  static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
     114  					    re_match_context_t *mctx,
     115  					    re_dfastate_t *next_state);
     116  static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
     117  						re_node_set *cur_nodes,
     118  						Idx str_idx);
     119  #if 0
     120  static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
     121  					re_match_context_t *mctx,
     122  					re_dfastate_t *pstate);
     123  #endif
     124  static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
     125  				       re_dfastate_t *pstate);
     126  static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
     127  					  const re_node_set *nodes);
     128  static reg_errcode_t get_subexp (re_match_context_t *mctx,
     129  				 Idx bkref_node, Idx bkref_str_idx);
     130  static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
     131  				     const re_sub_match_top_t *sub_top,
     132  				     re_sub_match_last_t *sub_last,
     133  				     Idx bkref_node, Idx bkref_str);
     134  static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
     135  			     Idx subexp_idx, int type);
     136  static reg_errcode_t check_arrival (re_match_context_t *mctx,
     137  				    state_array_t *path, Idx top_node,
     138  				    Idx top_str, Idx last_node, Idx last_str,
     139  				    int type);
     140  static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
     141  						   Idx str_idx,
     142  						   re_node_set *cur_nodes,
     143  						   re_node_set *next_nodes);
     144  static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
     145  					       re_node_set *cur_nodes,
     146  					       Idx ex_subexp, int type);
     147  static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
     148  						   re_node_set *dst_nodes,
     149  						   Idx target, Idx ex_subexp,
     150  						   int type);
     151  static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
     152  					 re_node_set *cur_nodes, Idx cur_str,
     153  					 Idx subexp_num, int type);
     154  static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state);
     155  static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
     156  				    const re_string_t *input, Idx idx);
     157  #ifdef _LIBC
     158  static unsigned int find_collation_sequence_value (const unsigned char *mbs,
     159  						   size_t name_len);
     160  #endif
     161  static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
     162  				       const re_dfastate_t *state,
     163  				       re_node_set *states_node,
     164  				       bitset_t *states_ch);
     165  static bool check_node_accept (const re_match_context_t *mctx,
     166  			       const re_token_t *node, Idx idx);
     167  static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
     168  
     169  /* Entry point for POSIX code.  */
     170  
     171  /* regexec searches for a given pattern, specified by PREG, in the
     172     string STRING.
     173  
     174     If NMATCH is zero or REG_NOSUB was set in the cflags argument to
     175     'regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
     176     least NMATCH elements, and we set them to the offsets of the
     177     corresponding matched substrings.
     178  
     179     EFLAGS specifies "execution flags" which affect matching: if
     180     REG_NOTBOL is set, then ^ does not match at the beginning of the
     181     string; if REG_NOTEOL is set, then $ does not match at the end.
     182  
     183     Return 0 if a match is found, REG_NOMATCH if not, REG_BADPAT if
     184     EFLAGS is invalid.  */
     185  
     186  int
     187  regexec (const regex_t *__restrict preg, const char *__restrict string,
     188  	 size_t nmatch, regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
     189  {
     190    reg_errcode_t err;
     191    Idx start, length;
     192    re_dfa_t *dfa = preg->buffer;
     193  
     194    if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
     195      return REG_BADPAT;
     196  
     197    if (eflags & REG_STARTEND)
     198      {
     199        start = pmatch[0].rm_so;
     200        length = pmatch[0].rm_eo;
     201      }
     202    else
     203      {
     204        start = 0;
     205        length = strlen (string);
     206      }
     207  
     208    lock_lock (dfa->lock);
     209    if (preg->no_sub)
     210      err = re_search_internal (preg, string, length, start, length,
     211  			      length, 0, NULL, eflags);
     212    else
     213      err = re_search_internal (preg, string, length, start, length,
     214  			      length, nmatch, pmatch, eflags);
     215    lock_unlock (dfa->lock);
     216    return err != REG_NOERROR;
     217  }
     218  
     219  #ifdef _LIBC
     220  libc_hidden_def (__regexec)
     221  
     222  # include <shlib-compat.h>
     223  versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
     224  
     225  # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
     226  __typeof__ (__regexec) __compat_regexec;
     227  
     228  int
     229  attribute_compat_text_section
     230  __compat_regexec (const regex_t *__restrict preg,
     231  		  const char *__restrict string, size_t nmatch,
     232  		  regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
     233  {
     234    return regexec (preg, string, nmatch, pmatch,
     235  		  eflags & (REG_NOTBOL | REG_NOTEOL));
     236  }
     237  compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
     238  # endif
     239  #endif
     240  
     241  /* Entry points for GNU code.  */
     242  
     243  /* re_match, re_search, re_match_2, re_search_2
     244  
     245     The former two functions operate on STRING with length LENGTH,
     246     while the later two operate on concatenation of STRING1 and STRING2
     247     with lengths LENGTH1 and LENGTH2, respectively.
     248  
     249     re_match() matches the compiled pattern in BUFP against the string,
     250     starting at index START.
     251  
     252     re_search() first tries matching at index START, then it tries to match
     253     starting from index START + 1, and so on.  The last start position tried
     254     is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
     255     way as re_match().)
     256  
     257     The parameter STOP of re_{match,search}_2 specifies that no match exceeding
     258     the first STOP characters of the concatenation of the strings should be
     259     concerned.
     260  
     261     If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
     262     and all groups is stored in REGS.  (For the "_2" variants, the offsets are
     263     computed relative to the concatenation, not relative to the individual
     264     strings.)
     265  
     266     On success, re_match* functions return the length of the match, re_search*
     267     return the position of the start of the match.  They return -1 on
     268     match failure, -2 on error.  */
     269  
     270  regoff_t
     271  re_match (struct re_pattern_buffer *bufp, const char *string, Idx length,
     272  	  Idx start, struct re_registers *regs)
     273  {
     274    return re_search_stub (bufp, string, length, start, 0, length, regs, true);
     275  }
     276  #ifdef _LIBC
     277  weak_alias (__re_match, re_match)
     278  #endif
     279  
     280  regoff_t
     281  re_search (struct re_pattern_buffer *bufp, const char *string, Idx length,
     282  	   Idx start, regoff_t range, struct re_registers *regs)
     283  {
     284    return re_search_stub (bufp, string, length, start, range, length, regs,
     285  			 false);
     286  }
     287  #ifdef _LIBC
     288  weak_alias (__re_search, re_search)
     289  #endif
     290  
     291  regoff_t
     292  re_match_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
     293  	    const char *string2, Idx length2, Idx start,
     294  	    struct re_registers *regs, Idx stop)
     295  {
     296    return re_search_2_stub (bufp, string1, length1, string2, length2,
     297  			   start, 0, regs, stop, true);
     298  }
     299  #ifdef _LIBC
     300  weak_alias (__re_match_2, re_match_2)
     301  #endif
     302  
     303  regoff_t
     304  re_search_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
     305  	     const char *string2, Idx length2, Idx start, regoff_t range,
     306  	     struct re_registers *regs, Idx stop)
     307  {
     308    return re_search_2_stub (bufp, string1, length1, string2, length2,
     309  			   start, range, regs, stop, false);
     310  }
     311  #ifdef _LIBC
     312  weak_alias (__re_search_2, re_search_2)
     313  #endif
     314  
     315  static regoff_t
     316  re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1,
     317  		  Idx length1, const char *string2, Idx length2, Idx start,
     318  		  regoff_t range, struct re_registers *regs,
     319  		  Idx stop, bool ret_len)
     320  {
     321    const char *str;
     322    regoff_t rval;
     323    Idx len;
     324    char *s = NULL;
     325  
     326    if (__glibc_unlikely ((length1 < 0 || length2 < 0 || stop < 0
     327  			 || INT_ADD_WRAPV (length1, length2, &len))))
     328      return -2;
     329  
     330    /* Concatenate the strings.  */
     331    if (length2 > 0)
     332      if (length1 > 0)
     333        {
     334  	s = re_malloc (char, len);
     335  
     336  	if (__glibc_unlikely (s == NULL))
     337  	  return -2;
     338  #ifdef _LIBC
     339  	memcpy (__mempcpy (s, string1, length1), string2, length2);
     340  #else
     341  	memcpy (s, string1, length1);
     342  	memcpy (s + length1, string2, length2);
     343  #endif
     344  	str = s;
     345        }
     346      else
     347        str = string2;
     348    else
     349      str = string1;
     350  
     351    rval = re_search_stub (bufp, str, len, start, range, stop, regs,
     352  			 ret_len);
     353    re_free (s);
     354    return rval;
     355  }
     356  
     357  /* The parameters have the same meaning as those of re_search.
     358     Additional parameters:
     359     If RET_LEN is true the length of the match is returned (re_match style);
     360     otherwise the position of the match is returned.  */
     361  
     362  static regoff_t
     363  re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length,
     364  		Idx start, regoff_t range, Idx stop, struct re_registers *regs,
     365  		bool ret_len)
     366  {
     367    reg_errcode_t result;
     368    regmatch_t *pmatch;
     369    Idx nregs;
     370    regoff_t rval;
     371    int eflags = 0;
     372    re_dfa_t *dfa = bufp->buffer;
     373    Idx last_start = start + range;
     374  
     375    /* Check for out-of-range.  */
     376    if (__glibc_unlikely (start < 0 || start > length))
     377      return -1;
     378    if (__glibc_unlikely (length < last_start
     379  			|| (0 <= range && last_start < start)))
     380      last_start = length;
     381    else if (__glibc_unlikely (last_start < 0
     382  			     || (range < 0 && start <= last_start)))
     383      last_start = 0;
     384  
     385    lock_lock (dfa->lock);
     386  
     387    eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
     388    eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
     389  
     390    /* Compile fastmap if we haven't yet.  */
     391    if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
     392      re_compile_fastmap (bufp);
     393  
     394    if (__glibc_unlikely (bufp->no_sub))
     395      regs = NULL;
     396  
     397    /* We need at least 1 register.  */
     398    if (regs == NULL)
     399      nregs = 1;
     400    else if (__glibc_unlikely (bufp->regs_allocated == REGS_FIXED
     401  			     && regs->num_regs <= bufp->re_nsub))
     402      {
     403        nregs = regs->num_regs;
     404        if (__glibc_unlikely (nregs < 1))
     405  	{
     406  	  /* Nothing can be copied to regs.  */
     407  	  regs = NULL;
     408  	  nregs = 1;
     409  	}
     410      }
     411    else
     412      nregs = bufp->re_nsub + 1;
     413    pmatch = re_malloc (regmatch_t, nregs);
     414    if (__glibc_unlikely (pmatch == NULL))
     415      {
     416        rval = -2;
     417        goto out;
     418      }
     419  
     420    result = re_search_internal (bufp, string, length, start, last_start, stop,
     421  			       nregs, pmatch, eflags);
     422  
     423    rval = 0;
     424  
     425    /* I hope we needn't fill their regs with -1's when no match was found.  */
     426    if (result != REG_NOERROR)
     427      rval = result == REG_NOMATCH ? -1 : -2;
     428    else if (regs != NULL)
     429      {
     430        /* If caller wants register contents data back, copy them.  */
     431        bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
     432  					   bufp->regs_allocated);
     433        if (__glibc_unlikely (bufp->regs_allocated == REGS_UNALLOCATED))
     434  	rval = -2;
     435      }
     436  
     437    if (__glibc_likely (rval == 0))
     438      {
     439        if (ret_len)
     440  	{
     441  	  DEBUG_ASSERT (pmatch[0].rm_so == start);
     442  	  rval = pmatch[0].rm_eo - start;
     443  	}
     444        else
     445  	rval = pmatch[0].rm_so;
     446      }
     447    re_free (pmatch);
     448   out:
     449    lock_unlock (dfa->lock);
     450    return rval;
     451  }
     452  
     453  static unsigned
     454  re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
     455  	      int regs_allocated)
     456  {
     457    int rval = REGS_REALLOCATE;
     458    Idx i;
     459    Idx need_regs = nregs + 1;
     460    /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code
     461       uses.  */
     462  
     463    /* Have the register data arrays been allocated?  */
     464    if (regs_allocated == REGS_UNALLOCATED)
     465      { /* No.  So allocate them with malloc.  */
     466        regs->start = re_malloc (regoff_t, need_regs);
     467        if (__glibc_unlikely (regs->start == NULL))
     468  	return REGS_UNALLOCATED;
     469        regs->end = re_malloc (regoff_t, need_regs);
     470        if (__glibc_unlikely (regs->end == NULL))
     471  	{
     472  	  re_free (regs->start);
     473  	  return REGS_UNALLOCATED;
     474  	}
     475        regs->num_regs = need_regs;
     476      }
     477    else if (regs_allocated == REGS_REALLOCATE)
     478      { /* Yes.  If we need more elements than were already
     479  	 allocated, reallocate them.  If we need fewer, just
     480  	 leave it alone.  */
     481        if (__glibc_unlikely (need_regs > regs->num_regs))
     482  	{
     483  	  regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
     484  	  regoff_t *new_end;
     485  	  if (__glibc_unlikely (new_start == NULL))
     486  	    return REGS_UNALLOCATED;
     487  	  new_end = re_realloc (regs->end, regoff_t, need_regs);
     488  	  if (__glibc_unlikely (new_end == NULL))
     489  	    {
     490  	      re_free (new_start);
     491  	      return REGS_UNALLOCATED;
     492  	    }
     493  	  regs->start = new_start;
     494  	  regs->end = new_end;
     495  	  regs->num_regs = need_regs;
     496  	}
     497      }
     498    else
     499      {
     500        DEBUG_ASSERT (regs_allocated == REGS_FIXED);
     501        /* This function may not be called with REGS_FIXED and nregs too big.  */
     502        DEBUG_ASSERT (nregs <= regs->num_regs);
     503        rval = REGS_FIXED;
     504      }
     505  
     506    /* Copy the regs.  */
     507    for (i = 0; i < nregs; ++i)
     508      {
     509        regs->start[i] = pmatch[i].rm_so;
     510        regs->end[i] = pmatch[i].rm_eo;
     511      }
     512    for ( ; i < regs->num_regs; ++i)
     513      regs->start[i] = regs->end[i] = -1;
     514  
     515    return rval;
     516  }
     517  
     518  /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
     519     ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
     520     this memory for recording register information.  STARTS and ENDS
     521     must be allocated using the malloc library routine, and must each
     522     be at least NUM_REGS * sizeof (regoff_t) bytes long.
     523  
     524     If NUM_REGS == 0, then subsequent matches should allocate their own
     525     register data.
     526  
     527     Unless this function is called, the first search or match using
     528     PATTERN_BUFFER will allocate its own register data, without
     529     freeing the old data.  */
     530  
     531  void
     532  re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
     533  		  __re_size_t num_regs, regoff_t *starts, regoff_t *ends)
     534  {
     535    if (num_regs)
     536      {
     537        bufp->regs_allocated = REGS_REALLOCATE;
     538        regs->num_regs = num_regs;
     539        regs->start = starts;
     540        regs->end = ends;
     541      }
     542    else
     543      {
     544        bufp->regs_allocated = REGS_UNALLOCATED;
     545        regs->num_regs = 0;
     546        regs->start = regs->end = NULL;
     547      }
     548  }
     549  #ifdef _LIBC
     550  weak_alias (__re_set_registers, re_set_registers)
     551  #endif
     552  
     553  /* Entry points compatible with 4.2 BSD regex library.  We don't define
     554     them unless specifically requested.  */
     555  
     556  #if defined _REGEX_RE_COMP || defined _LIBC
     557  int
     558  # ifdef _LIBC
     559  weak_function
     560  # endif
     561  re_exec (const char *s)
     562  {
     563    return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
     564  }
     565  #endif /* _REGEX_RE_COMP */
     566  
     567  /* Internal entry point.  */
     568  
     569  /* Searches for a compiled pattern PREG in the string STRING, whose
     570     length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
     571     meaning as with regexec.  LAST_START is START + RANGE, where
     572     START and RANGE have the same meaning as with re_search.
     573     Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
     574     otherwise return the error code.
     575     Note: We assume front end functions already check ranges.
     576     (0 <= LAST_START && LAST_START <= LENGTH)  */
     577  
     578  static reg_errcode_t
     579  __attribute_warn_unused_result__
     580  re_search_internal (const regex_t *preg, const char *string, Idx length,
     581  		    Idx start, Idx last_start, Idx stop, size_t nmatch,
     582  		    regmatch_t pmatch[], int eflags)
     583  {
     584    reg_errcode_t err;
     585    const re_dfa_t *dfa = preg->buffer;
     586    Idx left_lim, right_lim;
     587    int incr;
     588    bool fl_longest_match;
     589    int match_kind;
     590    Idx match_first;
     591    Idx match_last = -1;
     592    Idx extra_nmatch;
     593    bool sb;
     594    int ch;
     595    re_match_context_t mctx = { .dfa = dfa };
     596    char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
     597  		    && start != last_start && !preg->can_be_null)
     598  		   ? preg->fastmap : NULL);
     599    RE_TRANSLATE_TYPE t = preg->translate;
     600  
     601    extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
     602    nmatch -= extra_nmatch;
     603  
     604    /* Check if the DFA haven't been compiled.  */
     605    if (__glibc_unlikely (preg->used == 0 || dfa->init_state == NULL
     606  			|| dfa->init_state_word == NULL
     607  			|| dfa->init_state_nl == NULL
     608  			|| dfa->init_state_begbuf == NULL))
     609      return REG_NOMATCH;
     610  
     611    /* We assume front-end functions already check them.  */
     612    DEBUG_ASSERT (0 <= last_start && last_start <= length);
     613  
     614    /* If initial states with non-begbuf contexts have no elements,
     615       the regex must be anchored.  If preg->newline_anchor is set,
     616       we'll never use init_state_nl, so do not check it.  */
     617    if (dfa->init_state->nodes.nelem == 0
     618        && dfa->init_state_word->nodes.nelem == 0
     619        && (dfa->init_state_nl->nodes.nelem == 0
     620  	  || !preg->newline_anchor))
     621      {
     622        if (start != 0 && last_start != 0)
     623          return REG_NOMATCH;
     624        start = last_start = 0;
     625      }
     626  
     627    /* We must check the longest matching, if nmatch > 0.  */
     628    fl_longest_match = (nmatch != 0 || dfa->nbackref);
     629  
     630    err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
     631  			    preg->translate, (preg->syntax & RE_ICASE) != 0,
     632  			    dfa);
     633    if (__glibc_unlikely (err != REG_NOERROR))
     634      goto free_return;
     635    mctx.input.stop = stop;
     636    mctx.input.raw_stop = stop;
     637    mctx.input.newline_anchor = preg->newline_anchor;
     638  
     639    err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
     640    if (__glibc_unlikely (err != REG_NOERROR))
     641      goto free_return;
     642  
     643    /* We will log all the DFA states through which the dfa pass,
     644       if nmatch > 1, or this dfa has "multibyte node", which is a
     645       back-reference or a node which can accept multibyte character or
     646       multi character collating element.  */
     647    if (nmatch > 1 || dfa->has_mb_node)
     648      {
     649        /* Avoid overflow.  */
     650        if (__glibc_unlikely ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
     651  			     <= mctx.input.bufs_len)))
     652  	{
     653  	  err = REG_ESPACE;
     654  	  goto free_return;
     655  	}
     656  
     657        mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
     658        if (__glibc_unlikely (mctx.state_log == NULL))
     659  	{
     660  	  err = REG_ESPACE;
     661  	  goto free_return;
     662  	}
     663      }
     664  
     665    match_first = start;
     666    mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
     667  			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
     668  
     669    /* Check incrementally whether the input string matches.  */
     670    incr = (last_start < start) ? -1 : 1;
     671    left_lim = (last_start < start) ? last_start : start;
     672    right_lim = (last_start < start) ? start : last_start;
     673    sb = dfa->mb_cur_max == 1;
     674    match_kind =
     675      (fastmap
     676       ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
     677  	| (start <= last_start ? 2 : 0)
     678  	| (t != NULL ? 1 : 0))
     679       : 8);
     680  
     681    for (;; match_first += incr)
     682      {
     683        err = REG_NOMATCH;
     684        if (match_first < left_lim || right_lim < match_first)
     685  	goto free_return;
     686  
     687        /* Advance as rapidly as possible through the string, until we
     688  	 find a plausible place to start matching.  This may be done
     689  	 with varying efficiency, so there are various possibilities:
     690  	 only the most common of them are specialized, in order to
     691  	 save on code size.  We use a switch statement for speed.  */
     692        switch (match_kind)
     693  	{
     694  	case 8:
     695  	  /* No fastmap.  */
     696  	  break;
     697  
     698  	case 7:
     699  	  /* Fastmap with single-byte translation, match forward.  */
     700  	  while (__glibc_likely (match_first < right_lim)
     701  		 && !fastmap[t[(unsigned char) string[match_first]]])
     702  	    ++match_first;
     703  	  goto forward_match_found_start_or_reached_end;
     704  
     705  	case 6:
     706  	  /* Fastmap without translation, match forward.  */
     707  	  while (__glibc_likely (match_first < right_lim)
     708  		 && !fastmap[(unsigned char) string[match_first]])
     709  	    ++match_first;
     710  
     711  	forward_match_found_start_or_reached_end:
     712  	  if (__glibc_unlikely (match_first == right_lim))
     713  	    {
     714  	      ch = match_first >= length
     715  		       ? 0 : (unsigned char) string[match_first];
     716  	      if (!fastmap[t ? t[ch] : ch])
     717  		goto free_return;
     718  	    }
     719  	  break;
     720  
     721  	case 4:
     722  	case 5:
     723  	  /* Fastmap without multi-byte translation, match backwards.  */
     724  	  while (match_first >= left_lim)
     725  	    {
     726  	      ch = match_first >= length
     727  		       ? 0 : (unsigned char) string[match_first];
     728  	      if (fastmap[t ? t[ch] : ch])
     729  		break;
     730  	      --match_first;
     731  	    }
     732  	  if (match_first < left_lim)
     733  	    goto free_return;
     734  	  break;
     735  
     736  	default:
     737  	  /* In this case, we can't determine easily the current byte,
     738  	     since it might be a component byte of a multibyte
     739  	     character.  Then we use the constructed buffer instead.  */
     740  	  for (;;)
     741  	    {
     742  	      /* If MATCH_FIRST is out of the valid range, reconstruct the
     743  		 buffers.  */
     744  	      __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
     745  	      if (__glibc_unlikely (offset
     746  				    >= (__re_size_t) mctx.input.valid_raw_len))
     747  		{
     748  		  err = re_string_reconstruct (&mctx.input, match_first,
     749  					       eflags);
     750  		  if (__glibc_unlikely (err != REG_NOERROR))
     751  		    goto free_return;
     752  
     753  		  offset = match_first - mctx.input.raw_mbs_idx;
     754  		}
     755  	      /* Use buffer byte if OFFSET is in buffer, otherwise '\0'.  */
     756  	      ch = (offset < mctx.input.valid_len
     757  		    ? re_string_byte_at (&mctx.input, offset) : 0);
     758  	      if (fastmap[ch])
     759  		break;
     760  	      match_first += incr;
     761  	      if (match_first < left_lim || match_first > right_lim)
     762  		{
     763  		  err = REG_NOMATCH;
     764  		  goto free_return;
     765  		}
     766  	    }
     767  	  break;
     768  	}
     769  
     770        /* Reconstruct the buffers so that the matcher can assume that
     771  	 the matching starts from the beginning of the buffer.  */
     772        err = re_string_reconstruct (&mctx.input, match_first, eflags);
     773        if (__glibc_unlikely (err != REG_NOERROR))
     774  	goto free_return;
     775  
     776        /* Don't consider this char as a possible match start if it part,
     777           yet isn't the head, of a multibyte character.  */
     778        if (!sb && !re_string_first_byte (&mctx.input, 0))
     779  	continue;
     780  
     781        /* It seems to be appropriate one, then use the matcher.  */
     782        /* We assume that the matching starts from 0.  */
     783        mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
     784        match_last = check_matching (&mctx, fl_longest_match,
     785  				   start <= last_start ? &match_first : NULL);
     786        if (match_last != -1)
     787  	{
     788  	  if (__glibc_unlikely (match_last == -2))
     789  	    {
     790  	      err = REG_ESPACE;
     791  	      goto free_return;
     792  	    }
     793  	  else
     794  	    {
     795  	      mctx.match_last = match_last;
     796  	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
     797  		{
     798  		  re_dfastate_t *pstate = mctx.state_log[match_last];
     799  		  mctx.last_node = check_halt_state_context (&mctx, pstate,
     800  							     match_last);
     801  		}
     802  	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
     803  		  || dfa->nbackref)
     804  		{
     805  		  err = prune_impossible_nodes (&mctx);
     806  		  if (err == REG_NOERROR)
     807  		    break;
     808  		  if (__glibc_unlikely (err != REG_NOMATCH))
     809  		    goto free_return;
     810  		  match_last = -1;
     811  		}
     812  	      else
     813  		break; /* We found a match.  */
     814  	    }
     815  	}
     816  
     817        match_ctx_clean (&mctx);
     818      }
     819  
     820    DEBUG_ASSERT (match_last != -1);
     821    DEBUG_ASSERT (err == REG_NOERROR);
     822  
     823    /* Set pmatch[] if we need.  */
     824    if (nmatch > 0)
     825      {
     826        Idx reg_idx;
     827  
     828        /* Initialize registers.  */
     829        for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
     830  	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
     831  
     832        /* Set the points where matching start/end.  */
     833        pmatch[0].rm_so = 0;
     834        pmatch[0].rm_eo = mctx.match_last;
     835        /* FIXME: This function should fail if mctx.match_last exceeds
     836  	 the maximum possible regoff_t value.  We need a new error
     837  	 code REG_OVERFLOW.  */
     838  
     839        if (!preg->no_sub && nmatch > 1)
     840  	{
     841  	  err = set_regs (preg, &mctx, nmatch, pmatch,
     842  			  dfa->has_plural_match && dfa->nbackref > 0);
     843  	  if (__glibc_unlikely (err != REG_NOERROR))
     844  	    goto free_return;
     845  	}
     846  
     847        /* At last, add the offset to each register, since we slid
     848  	 the buffers so that we could assume that the matching starts
     849  	 from 0.  */
     850        for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
     851  	if (pmatch[reg_idx].rm_so != -1)
     852  	  {
     853  	    if (__glibc_unlikely (mctx.input.offsets_needed != 0))
     854  	      {
     855  		pmatch[reg_idx].rm_so =
     856  		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
     857  		   ? mctx.input.valid_raw_len
     858  		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
     859  		pmatch[reg_idx].rm_eo =
     860  		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
     861  		   ? mctx.input.valid_raw_len
     862  		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
     863  	      }
     864  	    pmatch[reg_idx].rm_so += match_first;
     865  	    pmatch[reg_idx].rm_eo += match_first;
     866  	  }
     867        for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
     868  	{
     869  	  pmatch[nmatch + reg_idx].rm_so = -1;
     870  	  pmatch[nmatch + reg_idx].rm_eo = -1;
     871  	}
     872  
     873        if (dfa->subexp_map)
     874  	for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
     875  	  if (dfa->subexp_map[reg_idx] != reg_idx)
     876  	    {
     877  	      pmatch[reg_idx + 1].rm_so
     878  		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
     879  	      pmatch[reg_idx + 1].rm_eo
     880  		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
     881  	    }
     882      }
     883  
     884   free_return:
     885    re_free (mctx.state_log);
     886    if (dfa->nbackref)
     887      match_ctx_free (&mctx);
     888    re_string_destruct (&mctx.input);
     889    return err;
     890  }
     891  
     892  static reg_errcode_t
     893  __attribute_warn_unused_result__
     894  prune_impossible_nodes (re_match_context_t *mctx)
     895  {
     896    const re_dfa_t *const dfa = mctx->dfa;
     897    Idx halt_node, match_last;
     898    reg_errcode_t ret;
     899    re_dfastate_t **sifted_states;
     900    re_dfastate_t **lim_states = NULL;
     901    re_sift_context_t sctx;
     902    DEBUG_ASSERT (mctx->state_log != NULL);
     903    match_last = mctx->match_last;
     904    halt_node = mctx->last_node;
     905  
     906    /* Avoid overflow.  */
     907    if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
     908  			<= match_last))
     909      return REG_ESPACE;
     910  
     911    sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
     912    if (__glibc_unlikely (sifted_states == NULL))
     913      {
     914        ret = REG_ESPACE;
     915        goto free_return;
     916      }
     917    if (dfa->nbackref)
     918      {
     919        lim_states = re_malloc (re_dfastate_t *, match_last + 1);
     920        if (__glibc_unlikely (lim_states == NULL))
     921  	{
     922  	  ret = REG_ESPACE;
     923  	  goto free_return;
     924  	}
     925        while (1)
     926  	{
     927  	  memset (lim_states, '\0',
     928  		  sizeof (re_dfastate_t *) * (match_last + 1));
     929  	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
     930  			 match_last);
     931  	  ret = sift_states_backward (mctx, &sctx);
     932  	  re_node_set_free (&sctx.limits);
     933  	  if (__glibc_unlikely (ret != REG_NOERROR))
     934  	      goto free_return;
     935  	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
     936  	    break;
     937  	  do
     938  	    {
     939  	      --match_last;
     940  	      if (match_last < 0)
     941  		{
     942  		  ret = REG_NOMATCH;
     943  		  goto free_return;
     944  		}
     945  	    } while (mctx->state_log[match_last] == NULL
     946  		     || !mctx->state_log[match_last]->halt);
     947  	  halt_node = check_halt_state_context (mctx,
     948  						mctx->state_log[match_last],
     949  						match_last);
     950  	}
     951        ret = merge_state_array (dfa, sifted_states, lim_states,
     952  			       match_last + 1);
     953        re_free (lim_states);
     954        lim_states = NULL;
     955        if (__glibc_unlikely (ret != REG_NOERROR))
     956  	goto free_return;
     957      }
     958    else
     959      {
     960        sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
     961        ret = sift_states_backward (mctx, &sctx);
     962        re_node_set_free (&sctx.limits);
     963        if (__glibc_unlikely (ret != REG_NOERROR))
     964  	goto free_return;
     965        if (sifted_states[0] == NULL)
     966  	{
     967  	  ret = REG_NOMATCH;
     968  	  goto free_return;
     969  	}
     970      }
     971    re_free (mctx->state_log);
     972    mctx->state_log = sifted_states;
     973    sifted_states = NULL;
     974    mctx->last_node = halt_node;
     975    mctx->match_last = match_last;
     976    ret = REG_NOERROR;
     977   free_return:
     978    re_free (sifted_states);
     979    re_free (lim_states);
     980    return ret;
     981  }
     982  
     983  /* Acquire an initial state and return it.
     984     We must select appropriate initial state depending on the context,
     985     since initial states may have constraints like "\<", "^", etc..  */
     986  
     987  static __always_inline re_dfastate_t *
     988  acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
     989  			    Idx idx)
     990  {
     991    const re_dfa_t *const dfa = mctx->dfa;
     992    if (dfa->init_state->has_constraint)
     993      {
     994        unsigned int context;
     995        context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
     996        if (IS_WORD_CONTEXT (context))
     997  	return dfa->init_state_word;
     998        else if (IS_ORDINARY_CONTEXT (context))
     999  	return dfa->init_state;
    1000        else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
    1001  	return dfa->init_state_begbuf;
    1002        else if (IS_NEWLINE_CONTEXT (context))
    1003  	return dfa->init_state_nl;
    1004        else if (IS_BEGBUF_CONTEXT (context))
    1005  	{
    1006  	  /* It is relatively rare case, then calculate on demand.  */
    1007  	  return re_acquire_state_context (err, dfa,
    1008  					   dfa->init_state->entrance_nodes,
    1009  					   context);
    1010  	}
    1011        else
    1012  	/* Must not happen?  */
    1013  	return dfa->init_state;
    1014      }
    1015    else
    1016      return dfa->init_state;
    1017  }
    1018  
    1019  /* Check whether the regular expression match input string INPUT or not,
    1020     and return the index where the matching end.  Return -1 if
    1021     there is no match, and return -2 in case of an error.
    1022     FL_LONGEST_MATCH means we want the POSIX longest matching.
    1023     If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
    1024     next place where we may want to try matching.
    1025     Note that the matcher assumes that the matching starts from the current
    1026     index of the buffer.  */
    1027  
    1028  static Idx
    1029  __attribute_warn_unused_result__
    1030  check_matching (re_match_context_t *mctx, bool fl_longest_match,
    1031  		Idx *p_match_first)
    1032  {
    1033    const re_dfa_t *const dfa = mctx->dfa;
    1034    reg_errcode_t err;
    1035    Idx match = 0;
    1036    Idx match_last = -1;
    1037    Idx cur_str_idx = re_string_cur_idx (&mctx->input);
    1038    re_dfastate_t *cur_state;
    1039    bool at_init_state = p_match_first != NULL;
    1040    Idx next_start_idx = cur_str_idx;
    1041  
    1042    err = REG_NOERROR;
    1043    cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
    1044    /* An initial state must not be NULL (invalid).  */
    1045    if (__glibc_unlikely (cur_state == NULL))
    1046      {
    1047        DEBUG_ASSERT (err == REG_ESPACE);
    1048        return -2;
    1049      }
    1050  
    1051    if (mctx->state_log != NULL)
    1052      {
    1053        mctx->state_log[cur_str_idx] = cur_state;
    1054  
    1055        /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
    1056  	 later.  E.g. Processing back references.  */
    1057        if (__glibc_unlikely (dfa->nbackref))
    1058  	{
    1059  	  at_init_state = false;
    1060  	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
    1061  	  if (__glibc_unlikely (err != REG_NOERROR))
    1062  	    return err;
    1063  
    1064  	  if (cur_state->has_backref)
    1065  	    {
    1066  	      err = transit_state_bkref (mctx, &cur_state->nodes);
    1067  	      if (__glibc_unlikely (err != REG_NOERROR))
    1068  		return err;
    1069  	    }
    1070  	}
    1071      }
    1072  
    1073    /* If the RE accepts NULL string.  */
    1074    if (__glibc_unlikely (cur_state->halt))
    1075      {
    1076        if (!cur_state->has_constraint
    1077  	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
    1078  	{
    1079  	  if (!fl_longest_match)
    1080  	    return cur_str_idx;
    1081  	  else
    1082  	    {
    1083  	      match_last = cur_str_idx;
    1084  	      match = 1;
    1085  	    }
    1086  	}
    1087      }
    1088  
    1089    while (!re_string_eoi (&mctx->input))
    1090      {
    1091        re_dfastate_t *old_state = cur_state;
    1092        Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
    1093  
    1094        if ((__glibc_unlikely (next_char_idx >= mctx->input.bufs_len)
    1095  	   && mctx->input.bufs_len < mctx->input.len)
    1096  	  || (__glibc_unlikely (next_char_idx >= mctx->input.valid_len)
    1097  	      && mctx->input.valid_len < mctx->input.len))
    1098  	{
    1099  	  err = extend_buffers (mctx, next_char_idx + 1);
    1100  	  if (__glibc_unlikely (err != REG_NOERROR))
    1101  	    {
    1102  	      DEBUG_ASSERT (err == REG_ESPACE);
    1103  	      return -2;
    1104  	    }
    1105  	}
    1106  
    1107        cur_state = transit_state (&err, mctx, cur_state);
    1108        if (mctx->state_log != NULL)
    1109  	cur_state = merge_state_with_log (&err, mctx, cur_state);
    1110  
    1111        if (cur_state == NULL)
    1112  	{
    1113  	  /* Reached the invalid state or an error.  Try to recover a valid
    1114  	     state using the state log, if available and if we have not
    1115  	     already found a valid (even if not the longest) match.  */
    1116  	  if (__glibc_unlikely (err != REG_NOERROR))
    1117  	    return -2;
    1118  
    1119  	  if (mctx->state_log == NULL
    1120  	      || (match && !fl_longest_match)
    1121  	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
    1122  	    break;
    1123  	}
    1124  
    1125        if (__glibc_unlikely (at_init_state))
    1126  	{
    1127  	  if (old_state == cur_state)
    1128  	    next_start_idx = next_char_idx;
    1129  	  else
    1130  	    at_init_state = false;
    1131  	}
    1132  
    1133        if (cur_state->halt)
    1134  	{
    1135  	  /* Reached a halt state.
    1136  	     Check the halt state can satisfy the current context.  */
    1137  	  if (!cur_state->has_constraint
    1138  	      || check_halt_state_context (mctx, cur_state,
    1139  					   re_string_cur_idx (&mctx->input)))
    1140  	    {
    1141  	      /* We found an appropriate halt state.  */
    1142  	      match_last = re_string_cur_idx (&mctx->input);
    1143  	      match = 1;
    1144  
    1145  	      /* We found a match, do not modify match_first below.  */
    1146  	      p_match_first = NULL;
    1147  	      if (!fl_longest_match)
    1148  		break;
    1149  	    }
    1150  	}
    1151      }
    1152  
    1153    if (p_match_first)
    1154      *p_match_first += next_start_idx;
    1155  
    1156    return match_last;
    1157  }
    1158  
    1159  /* Check NODE match the current context.  */
    1160  
    1161  static bool
    1162  check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
    1163  {
    1164    re_token_type_t type = dfa->nodes[node].type;
    1165    unsigned int constraint = dfa->nodes[node].constraint;
    1166    if (type != END_OF_RE)
    1167      return false;
    1168    if (!constraint)
    1169      return true;
    1170    if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
    1171      return false;
    1172    return true;
    1173  }
    1174  
    1175  /* Check the halt state STATE match the current context.
    1176     Return 0 if not match, if the node, STATE has, is a halt node and
    1177     match the context, return the node.  */
    1178  
    1179  static Idx
    1180  check_halt_state_context (const re_match_context_t *mctx,
    1181  			  const re_dfastate_t *state, Idx idx)
    1182  {
    1183    Idx i;
    1184    unsigned int context;
    1185    DEBUG_ASSERT (state->halt);
    1186    context = re_string_context_at (&mctx->input, idx, mctx->eflags);
    1187    for (i = 0; i < state->nodes.nelem; ++i)
    1188      if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
    1189        return state->nodes.elems[i];
    1190    return 0;
    1191  }
    1192  
    1193  /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
    1194     corresponding to the DFA).
    1195     Return the destination node, and update EPS_VIA_NODES;
    1196     return -1 on match failure, -2 on error.  */
    1197  
    1198  static Idx
    1199  proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
    1200  		   regmatch_t *prevregs,
    1201  		   Idx *pidx, Idx node, re_node_set *eps_via_nodes,
    1202  		   struct re_fail_stack_t *fs)
    1203  {
    1204    const re_dfa_t *const dfa = mctx->dfa;
    1205    if (IS_EPSILON_NODE (dfa->nodes[node].type))
    1206      {
    1207        re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
    1208        re_node_set *edests = &dfa->edests[node];
    1209  
    1210        if (! re_node_set_contains (eps_via_nodes, node))
    1211          {
    1212            bool ok = re_node_set_insert (eps_via_nodes, node);
    1213            if (__glibc_unlikely (! ok))
    1214              return -2;
    1215          }
    1216  
    1217        /* Pick a valid destination, or return -1 if none is found.  */
    1218        Idx dest_node = -1;
    1219        for (Idx i = 0; i < edests->nelem; i++)
    1220  	{
    1221  	  Idx candidate = edests->elems[i];
    1222  	  if (!re_node_set_contains (cur_nodes, candidate))
    1223  	    continue;
    1224            if (dest_node == -1)
    1225  	    dest_node = candidate;
    1226  
    1227  	  else
    1228  	    {
    1229  	      /* In order to avoid infinite loop like "(a*)*", return the second
    1230  		 epsilon-transition if the first was already considered.  */
    1231  	      if (re_node_set_contains (eps_via_nodes, dest_node))
    1232  		return candidate;
    1233  
    1234  	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
    1235  	      else if (fs != NULL
    1236  		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
    1237  					   prevregs, eps_via_nodes))
    1238  		return -2;
    1239  
    1240  	      /* We know we are going to exit.  */
    1241  	      break;
    1242  	    }
    1243  	}
    1244        return dest_node;
    1245      }
    1246    else
    1247      {
    1248        Idx naccepted = 0;
    1249        re_token_type_t type = dfa->nodes[node].type;
    1250  
    1251        if (dfa->nodes[node].accept_mb)
    1252  	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
    1253        else if (type == OP_BACK_REF)
    1254  	{
    1255  	  Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
    1256  	  if (subexp_idx < nregs)
    1257  	    naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
    1258  	  if (fs != NULL)
    1259  	    {
    1260  	      if (subexp_idx >= nregs
    1261  		  || regs[subexp_idx].rm_so == -1
    1262  		  || regs[subexp_idx].rm_eo == -1)
    1263  		return -1;
    1264  	      else if (naccepted)
    1265  		{
    1266  		  char *buf = (char *) re_string_get_buffer (&mctx->input);
    1267  		  if (mctx->input.valid_len - *pidx < naccepted
    1268  		      || (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
    1269  				  naccepted)
    1270  			  != 0))
    1271  		    return -1;
    1272  		}
    1273  	    }
    1274  
    1275  	  if (naccepted == 0)
    1276  	    {
    1277  	      Idx dest_node;
    1278  	      bool ok = re_node_set_insert (eps_via_nodes, node);
    1279  	      if (__glibc_unlikely (! ok))
    1280  		return -2;
    1281  	      dest_node = dfa->edests[node].elems[0];
    1282  	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
    1283  					dest_node))
    1284  		return dest_node;
    1285  	    }
    1286  	}
    1287  
    1288        if (naccepted != 0
    1289  	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
    1290  	{
    1291  	  Idx dest_node = dfa->nexts[node];
    1292  	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
    1293  	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
    1294  		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
    1295  					       dest_node)))
    1296  	    return -1;
    1297  	  re_node_set_empty (eps_via_nodes);
    1298  	  return dest_node;
    1299  	}
    1300      }
    1301    return -1;
    1302  }
    1303  
    1304  static reg_errcode_t
    1305  __attribute_warn_unused_result__
    1306  push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
    1307  		 Idx nregs, regmatch_t *regs, regmatch_t *prevregs,
    1308  		 re_node_set *eps_via_nodes)
    1309  {
    1310    reg_errcode_t err;
    1311    Idx num = fs->num;
    1312    if (num == fs->alloc)
    1313      {
    1314        struct re_fail_stack_ent_t *new_array;
    1315        new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t,
    1316                                fs->alloc * 2);
    1317        if (new_array == NULL)
    1318  	return REG_ESPACE;
    1319        fs->alloc *= 2;
    1320        fs->stack = new_array;
    1321      }
    1322    fs->stack[num].idx = str_idx;
    1323    fs->stack[num].node = dest_node;
    1324    fs->stack[num].regs = re_malloc (regmatch_t, 2 * nregs);
    1325    if (fs->stack[num].regs == NULL)
    1326      return REG_ESPACE;
    1327    fs->num = num + 1;
    1328    memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
    1329    memcpy (fs->stack[num].regs + nregs, prevregs, sizeof (regmatch_t) * nregs);
    1330    err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
    1331    return err;
    1332  }
    1333  
    1334  static Idx
    1335  pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
    1336  		regmatch_t *regs, regmatch_t *prevregs,
    1337  		re_node_set *eps_via_nodes)
    1338  {
    1339    if (fs == NULL || fs->num == 0)
    1340      return -1;
    1341    Idx num = --fs->num;
    1342    *pidx = fs->stack[num].idx;
    1343    memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
    1344    memcpy (prevregs, fs->stack[num].regs + nregs, sizeof (regmatch_t) * nregs);
    1345    re_node_set_free (eps_via_nodes);
    1346    re_free (fs->stack[num].regs);
    1347    *eps_via_nodes = fs->stack[num].eps_via_nodes;
    1348    DEBUG_ASSERT (0 <= fs->stack[num].node);
    1349    return fs->stack[num].node;
    1350  }
    1351  
    1352  
    1353  #define DYNARRAY_STRUCT  regmatch_list
    1354  #define DYNARRAY_ELEMENT regmatch_t
    1355  #define DYNARRAY_PREFIX  regmatch_list_
    1356  #include <malloc/dynarray-skeleton.c>
    1357  
    1358  /* Set the positions where the subexpressions are starts/ends to registers
    1359     PMATCH.
    1360     Note: We assume that pmatch[0] is already set, and
    1361     pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
    1362  
    1363  static reg_errcode_t
    1364  __attribute_warn_unused_result__
    1365  set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
    1366  	  regmatch_t *pmatch, bool fl_backtrack)
    1367  {
    1368    const re_dfa_t *dfa = preg->buffer;
    1369    Idx idx, cur_node;
    1370    re_node_set eps_via_nodes;
    1371    struct re_fail_stack_t *fs;
    1372    struct re_fail_stack_t fs_body = { 0, 2, NULL };
    1373    struct regmatch_list prev_match;
    1374    regmatch_list_init (&prev_match);
    1375  
    1376    DEBUG_ASSERT (nmatch > 1);
    1377    DEBUG_ASSERT (mctx->state_log != NULL);
    1378    if (fl_backtrack)
    1379      {
    1380        fs = &fs_body;
    1381        fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
    1382        if (fs->stack == NULL)
    1383  	return REG_ESPACE;
    1384      }
    1385    else
    1386      fs = NULL;
    1387  
    1388    cur_node = dfa->init_node;
    1389    re_node_set_init_empty (&eps_via_nodes);
    1390  
    1391    if (!regmatch_list_resize (&prev_match, nmatch))
    1392      {
    1393        regmatch_list_free (&prev_match);
    1394        free_fail_stack_return (fs);
    1395        return REG_ESPACE;
    1396      }
    1397    regmatch_t *prev_idx_match = regmatch_list_begin (&prev_match);
    1398    memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
    1399  
    1400    for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
    1401      {
    1402        update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
    1403  
    1404        if ((idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
    1405  	  || (fs && re_node_set_contains (&eps_via_nodes, cur_node)))
    1406  	{
    1407  	  Idx reg_idx;
    1408  	  cur_node = -1;
    1409  	  if (fs)
    1410  	    {
    1411  	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
    1412  		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
    1413  		  {
    1414  		    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
    1415  					       prev_idx_match, &eps_via_nodes);
    1416  		    break;
    1417  		  }
    1418  	    }
    1419  	  if (cur_node < 0)
    1420  	    {
    1421  	      re_node_set_free (&eps_via_nodes);
    1422  	      regmatch_list_free (&prev_match);
    1423  	      return free_fail_stack_return (fs);
    1424  	    }
    1425  	}
    1426  
    1427        /* Proceed to next node.  */
    1428        cur_node = proceed_next_node (mctx, nmatch, pmatch, prev_idx_match,
    1429  				    &idx, cur_node,
    1430  				    &eps_via_nodes, fs);
    1431  
    1432        if (__glibc_unlikely (cur_node < 0))
    1433  	{
    1434  	  if (__glibc_unlikely (cur_node == -2))
    1435  	    {
    1436  	      re_node_set_free (&eps_via_nodes);
    1437  	      regmatch_list_free (&prev_match);
    1438  	      free_fail_stack_return (fs);
    1439  	      return REG_ESPACE;
    1440  	    }
    1441  	  cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
    1442  				     prev_idx_match, &eps_via_nodes);
    1443  	  if (cur_node < 0)
    1444  	    {
    1445  	      re_node_set_free (&eps_via_nodes);
    1446  	      regmatch_list_free (&prev_match);
    1447  	      free_fail_stack_return (fs);
    1448  	      return REG_NOMATCH;
    1449  	    }
    1450  	}
    1451      }
    1452    re_node_set_free (&eps_via_nodes);
    1453    regmatch_list_free (&prev_match);
    1454    return free_fail_stack_return (fs);
    1455  }
    1456  
    1457  static reg_errcode_t
    1458  free_fail_stack_return (struct re_fail_stack_t *fs)
    1459  {
    1460    if (fs)
    1461      {
    1462        Idx fs_idx;
    1463        for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
    1464  	{
    1465  	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
    1466  	  re_free (fs->stack[fs_idx].regs);
    1467  	}
    1468        re_free (fs->stack);
    1469      }
    1470    return REG_NOERROR;
    1471  }
    1472  
    1473  static void
    1474  update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
    1475  	     regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
    1476  {
    1477    int type = dfa->nodes[cur_node].type;
    1478    if (type == OP_OPEN_SUBEXP)
    1479      {
    1480        Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
    1481  
    1482        /* We are at the first node of this sub expression.  */
    1483        if (reg_num < nmatch)
    1484  	{
    1485  	  pmatch[reg_num].rm_so = cur_idx;
    1486  	  pmatch[reg_num].rm_eo = -1;
    1487  	}
    1488      }
    1489    else if (type == OP_CLOSE_SUBEXP)
    1490      {
    1491        /* We are at the last node of this sub expression.  */
    1492        Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
    1493        if (reg_num < nmatch)
    1494  	{
    1495  	  if (pmatch[reg_num].rm_so < cur_idx)
    1496  	    {
    1497  	      pmatch[reg_num].rm_eo = cur_idx;
    1498  	      /* This is a non-empty match or we are not inside an optional
    1499  		 subexpression.  Accept this right away.  */
    1500  	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
    1501  	    }
    1502  	  else
    1503  	    {
    1504  	      if (dfa->nodes[cur_node].opt_subexp
    1505  		  && prev_idx_match[reg_num].rm_so != -1)
    1506  		/* We transited through an empty match for an optional
    1507  		   subexpression, like (a?)*, and this is not the subexp's
    1508  		   first match.  Copy back the old content of the registers
    1509  		   so that matches of an inner subexpression are undone as
    1510  		   well, like in ((a?))*.  */
    1511  		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
    1512  	      else
    1513  		/* We completed a subexpression, but it may be part of
    1514  		   an optional one, so do not update PREV_IDX_MATCH.  */
    1515  		pmatch[reg_num].rm_eo = cur_idx;
    1516  	    }
    1517  	}
    1518      }
    1519  }
    1520  
    1521  /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
    1522     and sift the nodes in each states according to the following rules.
    1523     Updated state_log will be wrote to STATE_LOG.
    1524  
    1525     Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
    1526       1. When STR_IDX == MATCH_LAST(the last index in the state_log):
    1527  	If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
    1528  	the LAST_NODE, we throw away the node 'a'.
    1529       2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
    1530  	string 's' and transit to 'b':
    1531  	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
    1532  	   away the node 'a'.
    1533  	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
    1534  	    thrown away, we throw away the node 'a'.
    1535       3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
    1536  	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
    1537  	   node 'a'.
    1538  	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
    1539  	    we throw away the node 'a'.  */
    1540  
    1541  #define STATE_NODE_CONTAINS(state,node) \
    1542    ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
    1543  
    1544  static reg_errcode_t
    1545  sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
    1546  {
    1547    reg_errcode_t err;
    1548    int null_cnt = 0;
    1549    Idx str_idx = sctx->last_str_idx;
    1550    re_node_set cur_dest;
    1551  
    1552    DEBUG_ASSERT (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
    1553  
    1554    /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
    1555       transit to the last_node and the last_node itself.  */
    1556    err = re_node_set_init_1 (&cur_dest, sctx->last_node);
    1557    if (__glibc_unlikely (err != REG_NOERROR))
    1558      return err;
    1559    err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
    1560    if (__glibc_unlikely (err != REG_NOERROR))
    1561      goto free_return;
    1562  
    1563    /* Then check each states in the state_log.  */
    1564    while (str_idx > 0)
    1565      {
    1566        /* Update counters.  */
    1567        null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
    1568        if (null_cnt > mctx->max_mb_elem_len)
    1569  	{
    1570  	  memset (sctx->sifted_states, '\0',
    1571  		  sizeof (re_dfastate_t *) * str_idx);
    1572  	  re_node_set_free (&cur_dest);
    1573  	  return REG_NOERROR;
    1574  	}
    1575        re_node_set_empty (&cur_dest);
    1576        --str_idx;
    1577  
    1578        if (mctx->state_log[str_idx])
    1579  	{
    1580  	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
    1581  	  if (__glibc_unlikely (err != REG_NOERROR))
    1582  	    goto free_return;
    1583  	}
    1584  
    1585        /* Add all the nodes which satisfy the following conditions:
    1586  	 - It can epsilon transit to a node in CUR_DEST.
    1587  	 - It is in CUR_SRC.
    1588  	 And update state_log.  */
    1589        err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
    1590        if (__glibc_unlikely (err != REG_NOERROR))
    1591  	goto free_return;
    1592      }
    1593    err = REG_NOERROR;
    1594   free_return:
    1595    re_node_set_free (&cur_dest);
    1596    return err;
    1597  }
    1598  
    1599  static reg_errcode_t
    1600  __attribute_warn_unused_result__
    1601  build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
    1602  		     Idx str_idx, re_node_set *cur_dest)
    1603  {
    1604    const re_dfa_t *const dfa = mctx->dfa;
    1605    const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
    1606    Idx i;
    1607  
    1608    /* Then build the next sifted state.
    1609       We build the next sifted state on 'cur_dest', and update
    1610       'sifted_states[str_idx]' with 'cur_dest'.
    1611       Note:
    1612       'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
    1613       'cur_src' points the node_set of the old 'state_log[str_idx]'
    1614       (with the epsilon nodes pre-filtered out).  */
    1615    for (i = 0; i < cur_src->nelem; i++)
    1616      {
    1617        Idx prev_node = cur_src->elems[i];
    1618        int naccepted = 0;
    1619        bool ok;
    1620        DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[prev_node].type));
    1621  
    1622        /* If the node may accept "multi byte".  */
    1623        if (dfa->nodes[prev_node].accept_mb)
    1624  	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
    1625  					 str_idx, sctx->last_str_idx);
    1626  
    1627        /* We don't check backreferences here.
    1628  	 See update_cur_sifted_state().  */
    1629        if (!naccepted
    1630  	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
    1631  	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
    1632  				  dfa->nexts[prev_node]))
    1633  	naccepted = 1;
    1634  
    1635        if (naccepted == 0)
    1636  	continue;
    1637  
    1638        if (sctx->limits.nelem)
    1639  	{
    1640  	  Idx to_idx = str_idx + naccepted;
    1641  	  if (check_dst_limits (mctx, &sctx->limits,
    1642  				dfa->nexts[prev_node], to_idx,
    1643  				prev_node, str_idx))
    1644  	    continue;
    1645  	}
    1646        ok = re_node_set_insert (cur_dest, prev_node);
    1647        if (__glibc_unlikely (! ok))
    1648  	return REG_ESPACE;
    1649      }
    1650  
    1651    return REG_NOERROR;
    1652  }
    1653  
    1654  /* Helper functions.  */
    1655  
    1656  static reg_errcode_t
    1657  clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
    1658  {
    1659    Idx top = mctx->state_log_top;
    1660  
    1661    if ((next_state_log_idx >= mctx->input.bufs_len
    1662         && mctx->input.bufs_len < mctx->input.len)
    1663        || (next_state_log_idx >= mctx->input.valid_len
    1664  	  && mctx->input.valid_len < mctx->input.len))
    1665      {
    1666        reg_errcode_t err;
    1667        err = extend_buffers (mctx, next_state_log_idx + 1);
    1668        if (__glibc_unlikely (err != REG_NOERROR))
    1669  	return err;
    1670      }
    1671  
    1672    if (top < next_state_log_idx)
    1673      {
    1674        DEBUG_ASSERT (mctx->state_log != NULL);
    1675        memset (mctx->state_log + top + 1, '\0',
    1676  	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
    1677        mctx->state_log_top = next_state_log_idx;
    1678      }
    1679    return REG_NOERROR;
    1680  }
    1681  
    1682  static reg_errcode_t
    1683  merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
    1684  		   re_dfastate_t **src, Idx num)
    1685  {
    1686    Idx st_idx;
    1687    reg_errcode_t err;
    1688    for (st_idx = 0; st_idx < num; ++st_idx)
    1689      {
    1690        if (dst[st_idx] == NULL)
    1691  	dst[st_idx] = src[st_idx];
    1692        else if (src[st_idx] != NULL)
    1693  	{
    1694  	  re_node_set merged_set;
    1695  	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
    1696  					&src[st_idx]->nodes);
    1697  	  if (__glibc_unlikely (err != REG_NOERROR))
    1698  	    return err;
    1699  	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
    1700  	  re_node_set_free (&merged_set);
    1701  	  if (__glibc_unlikely (err != REG_NOERROR))
    1702  	    return err;
    1703  	}
    1704      }
    1705    return REG_NOERROR;
    1706  }
    1707  
    1708  static reg_errcode_t
    1709  update_cur_sifted_state (const re_match_context_t *mctx,
    1710  			 re_sift_context_t *sctx, Idx str_idx,
    1711  			 re_node_set *dest_nodes)
    1712  {
    1713    const re_dfa_t *const dfa = mctx->dfa;
    1714    reg_errcode_t err = REG_NOERROR;
    1715    const re_node_set *candidates;
    1716    candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
    1717  		: &mctx->state_log[str_idx]->nodes);
    1718  
    1719    if (dest_nodes->nelem == 0)
    1720      sctx->sifted_states[str_idx] = NULL;
    1721    else
    1722      {
    1723        if (candidates)
    1724  	{
    1725  	  /* At first, add the nodes which can epsilon transit to a node in
    1726  	     DEST_NODE.  */
    1727  	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
    1728  	  if (__glibc_unlikely (err != REG_NOERROR))
    1729  	    return err;
    1730  
    1731  	  /* Then, check the limitations in the current sift_context.  */
    1732  	  if (sctx->limits.nelem)
    1733  	    {
    1734  	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
    1735  					 mctx->bkref_ents, str_idx);
    1736  	      if (__glibc_unlikely (err != REG_NOERROR))
    1737  		return err;
    1738  	    }
    1739  	}
    1740  
    1741        sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
    1742        if (__glibc_unlikely (err != REG_NOERROR))
    1743  	return err;
    1744      }
    1745  
    1746    if (candidates && mctx->state_log[str_idx]->has_backref)
    1747      {
    1748        err = sift_states_bkref (mctx, sctx, str_idx, candidates);
    1749        if (__glibc_unlikely (err != REG_NOERROR))
    1750  	return err;
    1751      }
    1752    return REG_NOERROR;
    1753  }
    1754  
    1755  static reg_errcode_t
    1756  __attribute_warn_unused_result__
    1757  add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
    1758  		       const re_node_set *candidates)
    1759  {
    1760    reg_errcode_t err = REG_NOERROR;
    1761    Idx i;
    1762  
    1763    re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
    1764    if (__glibc_unlikely (err != REG_NOERROR))
    1765      return err;
    1766  
    1767    if (!state->inveclosure.alloc)
    1768      {
    1769        err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
    1770        if (__glibc_unlikely (err != REG_NOERROR))
    1771  	return REG_ESPACE;
    1772        for (i = 0; i < dest_nodes->nelem; i++)
    1773  	{
    1774  	  err = re_node_set_merge (&state->inveclosure,
    1775  				   dfa->inveclosures + dest_nodes->elems[i]);
    1776  	  if (__glibc_unlikely (err != REG_NOERROR))
    1777  	    return REG_ESPACE;
    1778  	}
    1779      }
    1780    return re_node_set_add_intersect (dest_nodes, candidates,
    1781  				    &state->inveclosure);
    1782  }
    1783  
    1784  static reg_errcode_t
    1785  sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
    1786  		       const re_node_set *candidates)
    1787  {
    1788      Idx ecl_idx;
    1789      reg_errcode_t err;
    1790      re_node_set *inv_eclosure = dfa->inveclosures + node;
    1791      re_node_set except_nodes;
    1792      re_node_set_init_empty (&except_nodes);
    1793      for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
    1794        {
    1795  	Idx cur_node = inv_eclosure->elems[ecl_idx];
    1796  	if (cur_node == node)
    1797  	  continue;
    1798  	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
    1799  	  {
    1800  	    Idx edst1 = dfa->edests[cur_node].elems[0];
    1801  	    Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
    1802  			 ? dfa->edests[cur_node].elems[1] : -1);
    1803  	    if ((!re_node_set_contains (inv_eclosure, edst1)
    1804  		 && re_node_set_contains (dest_nodes, edst1))
    1805  		|| (edst2 > 0
    1806  		    && !re_node_set_contains (inv_eclosure, edst2)
    1807  		    && re_node_set_contains (dest_nodes, edst2)))
    1808  	      {
    1809  		err = re_node_set_add_intersect (&except_nodes, candidates,
    1810  						 dfa->inveclosures + cur_node);
    1811  		if (__glibc_unlikely (err != REG_NOERROR))
    1812  		  {
    1813  		    re_node_set_free (&except_nodes);
    1814  		    return err;
    1815  		  }
    1816  	      }
    1817  	  }
    1818        }
    1819      for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
    1820        {
    1821  	Idx cur_node = inv_eclosure->elems[ecl_idx];
    1822  	if (!re_node_set_contains (&except_nodes, cur_node))
    1823  	  {
    1824  	    Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
    1825  	    re_node_set_remove_at (dest_nodes, idx);
    1826  	  }
    1827        }
    1828      re_node_set_free (&except_nodes);
    1829      return REG_NOERROR;
    1830  }
    1831  
    1832  static bool
    1833  check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
    1834  		  Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
    1835  {
    1836    const re_dfa_t *const dfa = mctx->dfa;
    1837    Idx lim_idx, src_pos, dst_pos;
    1838  
    1839    Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
    1840    Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
    1841    for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
    1842      {
    1843        Idx subexp_idx;
    1844        struct re_backref_cache_entry *ent;
    1845        ent = mctx->bkref_ents + limits->elems[lim_idx];
    1846        subexp_idx = dfa->nodes[ent->node].opr.idx;
    1847  
    1848        dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
    1849  					   subexp_idx, dst_node, dst_idx,
    1850  					   dst_bkref_idx);
    1851        src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
    1852  					   subexp_idx, src_node, src_idx,
    1853  					   src_bkref_idx);
    1854  
    1855        /* In case of:
    1856  	 <src> <dst> ( <subexp> )
    1857  	 ( <subexp> ) <src> <dst>
    1858  	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
    1859        if (src_pos == dst_pos)
    1860  	continue; /* This is unrelated limitation.  */
    1861        else
    1862  	return true;
    1863      }
    1864    return false;
    1865  }
    1866  
    1867  static int
    1868  check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
    1869  			     Idx subexp_idx, Idx from_node, Idx bkref_idx)
    1870  {
    1871    const re_dfa_t *const dfa = mctx->dfa;
    1872    const re_node_set *eclosures = dfa->eclosures + from_node;
    1873    Idx node_idx;
    1874  
    1875    /* Else, we are on the boundary: examine the nodes on the epsilon
    1876       closure.  */
    1877    for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
    1878      {
    1879        Idx node = eclosures->elems[node_idx];
    1880        switch (dfa->nodes[node].type)
    1881  	{
    1882  	case OP_BACK_REF:
    1883  	  if (bkref_idx != -1)
    1884  	    {
    1885  	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
    1886  	      do
    1887  		{
    1888  		  Idx dst;
    1889  		  int cpos;
    1890  
    1891  		  if (ent->node != node)
    1892  		    continue;
    1893  
    1894  		  if (subexp_idx < BITSET_WORD_BITS
    1895  		      && !(ent->eps_reachable_subexps_map
    1896  			   & ((bitset_word_t) 1 << subexp_idx)))
    1897  		    continue;
    1898  
    1899  		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
    1900  		     OP_CLOSE_SUBEXP cases below.  But, if the
    1901  		     destination node is the same node as the source
    1902  		     node, don't recurse because it would cause an
    1903  		     infinite loop: a regex that exhibits this behavior
    1904  		     is ()\1*\1*  */
    1905  		  dst = dfa->edests[node].elems[0];
    1906  		  if (dst == from_node)
    1907  		    {
    1908  		      if (boundaries & 1)
    1909  			return -1;
    1910  		      else /* if (boundaries & 2) */
    1911  			return 0;
    1912  		    }
    1913  
    1914  		  cpos =
    1915  		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
    1916  						 dst, bkref_idx);
    1917  		  if (cpos == -1 /* && (boundaries & 1) */)
    1918  		    return -1;
    1919  		  if (cpos == 0 && (boundaries & 2))
    1920  		    return 0;
    1921  
    1922  		  if (subexp_idx < BITSET_WORD_BITS)
    1923  		    ent->eps_reachable_subexps_map
    1924  		      &= ~((bitset_word_t) 1 << subexp_idx);
    1925  		}
    1926  	      while (ent++->more);
    1927  	    }
    1928  	  break;
    1929  
    1930  	case OP_OPEN_SUBEXP:
    1931  	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
    1932  	    return -1;
    1933  	  break;
    1934  
    1935  	case OP_CLOSE_SUBEXP:
    1936  	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
    1937  	    return 0;
    1938  	  break;
    1939  
    1940  	default:
    1941  	    break;
    1942  	}
    1943      }
    1944  
    1945    return (boundaries & 2) ? 1 : 0;
    1946  }
    1947  
    1948  static int
    1949  check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
    1950  			   Idx subexp_idx, Idx from_node, Idx str_idx,
    1951  			   Idx bkref_idx)
    1952  {
    1953    struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
    1954    int boundaries;
    1955  
    1956    /* If we are outside the range of the subexpression, return -1 or 1.  */
    1957    if (str_idx < lim->subexp_from)
    1958      return -1;
    1959  
    1960    if (lim->subexp_to < str_idx)
    1961      return 1;
    1962  
    1963    /* If we are within the subexpression, return 0.  */
    1964    boundaries = (str_idx == lim->subexp_from);
    1965    boundaries |= (str_idx == lim->subexp_to) << 1;
    1966    if (boundaries == 0)
    1967      return 0;
    1968  
    1969    /* Else, examine epsilon closure.  */
    1970    return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
    1971  				      from_node, bkref_idx);
    1972  }
    1973  
    1974  /* Check the limitations of sub expressions LIMITS, and remove the nodes
    1975     which are against limitations from DEST_NODES. */
    1976  
    1977  static reg_errcode_t
    1978  check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
    1979  		     const re_node_set *candidates, re_node_set *limits,
    1980  		     struct re_backref_cache_entry *bkref_ents, Idx str_idx)
    1981  {
    1982    reg_errcode_t err;
    1983    Idx node_idx, lim_idx;
    1984  
    1985    for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
    1986      {
    1987        Idx subexp_idx;
    1988        struct re_backref_cache_entry *ent;
    1989        ent = bkref_ents + limits->elems[lim_idx];
    1990  
    1991        if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
    1992  	continue; /* This is unrelated limitation.  */
    1993  
    1994        subexp_idx = dfa->nodes[ent->node].opr.idx;
    1995        if (ent->subexp_to == str_idx)
    1996  	{
    1997  	  Idx ops_node = -1;
    1998  	  Idx cls_node = -1;
    1999  	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
    2000  	    {
    2001  	      Idx node = dest_nodes->elems[node_idx];
    2002  	      re_token_type_t type = dfa->nodes[node].type;
    2003  	      if (type == OP_OPEN_SUBEXP
    2004  		  && subexp_idx == dfa->nodes[node].opr.idx)
    2005  		ops_node = node;
    2006  	      else if (type == OP_CLOSE_SUBEXP
    2007  		       && subexp_idx == dfa->nodes[node].opr.idx)
    2008  		cls_node = node;
    2009  	    }
    2010  
    2011  	  /* Check the limitation of the open subexpression.  */
    2012  	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
    2013  	  if (ops_node >= 0)
    2014  	    {
    2015  	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
    2016  					   candidates);
    2017  	      if (__glibc_unlikely (err != REG_NOERROR))
    2018  		return err;
    2019  	    }
    2020  
    2021  	  /* Check the limitation of the close subexpression.  */
    2022  	  if (cls_node >= 0)
    2023  	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
    2024  	      {
    2025  		Idx node = dest_nodes->elems[node_idx];
    2026  		if (!re_node_set_contains (dfa->inveclosures + node,
    2027  					   cls_node)
    2028  		    && !re_node_set_contains (dfa->eclosures + node,
    2029  					      cls_node))
    2030  		  {
    2031  		    /* It is against this limitation.
    2032  		       Remove it form the current sifted state.  */
    2033  		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
    2034  						 candidates);
    2035  		    if (__glibc_unlikely (err != REG_NOERROR))
    2036  		      return err;
    2037  		    --node_idx;
    2038  		  }
    2039  	      }
    2040  	}
    2041        else /* (ent->subexp_to != str_idx)  */
    2042  	{
    2043  	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
    2044  	    {
    2045  	      Idx node = dest_nodes->elems[node_idx];
    2046  	      re_token_type_t type = dfa->nodes[node].type;
    2047  	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
    2048  		{
    2049  		  if (subexp_idx != dfa->nodes[node].opr.idx)
    2050  		    continue;
    2051  		  /* It is against this limitation.
    2052  		     Remove it form the current sifted state.  */
    2053  		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
    2054  					       candidates);
    2055  		  if (__glibc_unlikely (err != REG_NOERROR))
    2056  		    return err;
    2057  		}
    2058  	    }
    2059  	}
    2060      }
    2061    return REG_NOERROR;
    2062  }
    2063  
    2064  static reg_errcode_t
    2065  __attribute_warn_unused_result__
    2066  sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
    2067  		   Idx str_idx, const re_node_set *candidates)
    2068  {
    2069    const re_dfa_t *const dfa = mctx->dfa;
    2070    reg_errcode_t err;
    2071    Idx node_idx, node;
    2072    re_sift_context_t local_sctx;
    2073    Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
    2074  
    2075    if (first_idx == -1)
    2076      return REG_NOERROR;
    2077  
    2078    local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
    2079  
    2080    for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
    2081      {
    2082        Idx enabled_idx;
    2083        re_token_type_t type;
    2084        struct re_backref_cache_entry *entry;
    2085        node = candidates->elems[node_idx];
    2086        type = dfa->nodes[node].type;
    2087        /* Avoid infinite loop for the REs like "()\1+".  */
    2088        if (node == sctx->last_node && str_idx == sctx->last_str_idx)
    2089  	continue;
    2090        if (type != OP_BACK_REF)
    2091  	continue;
    2092  
    2093        entry = mctx->bkref_ents + first_idx;
    2094        enabled_idx = first_idx;
    2095        do
    2096  	{
    2097  	  Idx subexp_len;
    2098  	  Idx to_idx;
    2099  	  Idx dst_node;
    2100  	  bool ok;
    2101  	  re_dfastate_t *cur_state;
    2102  
    2103  	  if (entry->node != node)
    2104  	    continue;
    2105  	  subexp_len = entry->subexp_to - entry->subexp_from;
    2106  	  to_idx = str_idx + subexp_len;
    2107  	  dst_node = (subexp_len ? dfa->nexts[node]
    2108  		      : dfa->edests[node].elems[0]);
    2109  
    2110  	  if (to_idx > sctx->last_str_idx
    2111  	      || sctx->sifted_states[to_idx] == NULL
    2112  	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
    2113  	      || check_dst_limits (mctx, &sctx->limits, node,
    2114  				   str_idx, dst_node, to_idx))
    2115  	    continue;
    2116  
    2117  	  if (local_sctx.sifted_states == NULL)
    2118  	    {
    2119  	      local_sctx = *sctx;
    2120  	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
    2121  	      if (__glibc_unlikely (err != REG_NOERROR))
    2122  		goto free_return;
    2123  	    }
    2124  	  local_sctx.last_node = node;
    2125  	  local_sctx.last_str_idx = str_idx;
    2126  	  ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
    2127  	  if (__glibc_unlikely (! ok))
    2128  	    {
    2129  	      err = REG_ESPACE;
    2130  	      goto free_return;
    2131  	    }
    2132  	  cur_state = local_sctx.sifted_states[str_idx];
    2133  	  err = sift_states_backward (mctx, &local_sctx);
    2134  	  if (__glibc_unlikely (err != REG_NOERROR))
    2135  	    goto free_return;
    2136  	  if (sctx->limited_states != NULL)
    2137  	    {
    2138  	      err = merge_state_array (dfa, sctx->limited_states,
    2139  				       local_sctx.sifted_states,
    2140  				       str_idx + 1);
    2141  	      if (__glibc_unlikely (err != REG_NOERROR))
    2142  		goto free_return;
    2143  	    }
    2144  	  local_sctx.sifted_states[str_idx] = cur_state;
    2145  	  re_node_set_remove (&local_sctx.limits, enabled_idx);
    2146  
    2147  	  /* mctx->bkref_ents may have changed, reload the pointer.  */
    2148  	  entry = mctx->bkref_ents + enabled_idx;
    2149  	}
    2150        while (enabled_idx++, entry++->more);
    2151      }
    2152    err = REG_NOERROR;
    2153   free_return:
    2154    if (local_sctx.sifted_states != NULL)
    2155      {
    2156        re_node_set_free (&local_sctx.limits);
    2157      }
    2158  
    2159    return err;
    2160  }
    2161  
    2162  
    2163  static int
    2164  sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
    2165  		     Idx node_idx, Idx str_idx, Idx max_str_idx)
    2166  {
    2167    const re_dfa_t *const dfa = mctx->dfa;
    2168    int naccepted;
    2169    /* Check the node can accept "multi byte".  */
    2170    naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
    2171    if (naccepted > 0 && str_idx + naccepted <= max_str_idx
    2172        && !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
    2173  			       dfa->nexts[node_idx]))
    2174      /* The node can't accept the "multi byte", or the
    2175         destination was already thrown away, then the node
    2176         couldn't accept the current input "multi byte".   */
    2177      naccepted = 0;
    2178    /* Otherwise, it is sure that the node could accept
    2179       'naccepted' bytes input.  */
    2180    return naccepted;
    2181  }
    2182  
    2183  /* Functions for state transition.  */
    2184  
    2185  /* Return the next state to which the current state STATE will transit by
    2186     accepting the current input byte, and update STATE_LOG if necessary.
    2187     Return NULL on failure.
    2188     If STATE can accept a multibyte char/collating element/back reference
    2189     update the destination of STATE_LOG.  */
    2190  
    2191  static re_dfastate_t *
    2192  __attribute_warn_unused_result__
    2193  transit_state (reg_errcode_t *err, re_match_context_t *mctx,
    2194  	       re_dfastate_t *state)
    2195  {
    2196    re_dfastate_t **trtable;
    2197    unsigned char ch;
    2198  
    2199    /* If the current state can accept multibyte.  */
    2200    if (__glibc_unlikely (state->accept_mb))
    2201      {
    2202        *err = transit_state_mb (mctx, state);
    2203        if (__glibc_unlikely (*err != REG_NOERROR))
    2204  	return NULL;
    2205      }
    2206  
    2207    /* Then decide the next state with the single byte.  */
    2208  #if 0
    2209    if (0)
    2210      /* don't use transition table  */
    2211      return transit_state_sb (err, mctx, state);
    2212  #endif
    2213  
    2214    /* Use transition table  */
    2215    ch = re_string_fetch_byte (&mctx->input);
    2216    for (;;)
    2217      {
    2218        trtable = state->trtable;
    2219        if (__glibc_likely (trtable != NULL))
    2220  	return trtable[ch];
    2221  
    2222        trtable = state->word_trtable;
    2223        if (__glibc_likely (trtable != NULL))
    2224  	{
    2225  	  unsigned int context;
    2226  	  context
    2227  	    = re_string_context_at (&mctx->input,
    2228  				    re_string_cur_idx (&mctx->input) - 1,
    2229  				    mctx->eflags);
    2230  	  if (IS_WORD_CONTEXT (context))
    2231  	    return trtable[ch + SBC_MAX];
    2232  	  else
    2233  	    return trtable[ch];
    2234  	}
    2235  
    2236        if (!build_trtable (mctx->dfa, state))
    2237  	{
    2238  	  *err = REG_ESPACE;
    2239  	  return NULL;
    2240  	}
    2241  
    2242        /* Retry, we now have a transition table.  */
    2243      }
    2244  }
    2245  
    2246  /* Update the state_log if we need */
    2247  static re_dfastate_t *
    2248  merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
    2249  		      re_dfastate_t *next_state)
    2250  {
    2251    const re_dfa_t *const dfa = mctx->dfa;
    2252    Idx cur_idx = re_string_cur_idx (&mctx->input);
    2253  
    2254    if (cur_idx > mctx->state_log_top)
    2255      {
    2256        mctx->state_log[cur_idx] = next_state;
    2257        mctx->state_log_top = cur_idx;
    2258      }
    2259    else if (mctx->state_log[cur_idx] == 0)
    2260      {
    2261        mctx->state_log[cur_idx] = next_state;
    2262      }
    2263    else
    2264      {
    2265        re_dfastate_t *pstate;
    2266        unsigned int context;
    2267        re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
    2268        /* If (state_log[cur_idx] != 0), it implies that cur_idx is
    2269  	 the destination of a multibyte char/collating element/
    2270  	 back reference.  Then the next state is the union set of
    2271  	 these destinations and the results of the transition table.  */
    2272        pstate = mctx->state_log[cur_idx];
    2273        log_nodes = pstate->entrance_nodes;
    2274        if (next_state != NULL)
    2275  	{
    2276  	  table_nodes = next_state->entrance_nodes;
    2277  	  *err = re_node_set_init_union (&next_nodes, table_nodes,
    2278  					     log_nodes);
    2279  	  if (__glibc_unlikely (*err != REG_NOERROR))
    2280  	    return NULL;
    2281  	}
    2282        else
    2283  	next_nodes = *log_nodes;
    2284        /* Note: We already add the nodes of the initial state,
    2285  	 then we don't need to add them here.  */
    2286  
    2287        context = re_string_context_at (&mctx->input,
    2288  				      re_string_cur_idx (&mctx->input) - 1,
    2289  				      mctx->eflags);
    2290        next_state = mctx->state_log[cur_idx]
    2291  	= re_acquire_state_context (err, dfa, &next_nodes, context);
    2292        /* We don't need to check errors here, since the return value of
    2293  	 this function is next_state and ERR is already set.  */
    2294  
    2295        if (table_nodes != NULL)
    2296  	re_node_set_free (&next_nodes);
    2297      }
    2298  
    2299    if (__glibc_unlikely (dfa->nbackref) && next_state != NULL)
    2300      {
    2301        /* Check OP_OPEN_SUBEXP in the current state in case that we use them
    2302  	 later.  We must check them here, since the back references in the
    2303  	 next state might use them.  */
    2304        *err = check_subexp_matching_top (mctx, &next_state->nodes,
    2305  					cur_idx);
    2306        if (__glibc_unlikely (*err != REG_NOERROR))
    2307  	return NULL;
    2308  
    2309        /* If the next state has back references.  */
    2310        if (next_state->has_backref)
    2311  	{
    2312  	  *err = transit_state_bkref (mctx, &next_state->nodes);
    2313  	  if (__glibc_unlikely (*err != REG_NOERROR))
    2314  	    return NULL;
    2315  	  next_state = mctx->state_log[cur_idx];
    2316  	}
    2317      }
    2318  
    2319    return next_state;
    2320  }
    2321  
    2322  /* Skip bytes in the input that correspond to part of a
    2323     multi-byte match, then look in the log for a state
    2324     from which to restart matching.  */
    2325  static re_dfastate_t *
    2326  find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
    2327  {
    2328    re_dfastate_t *cur_state;
    2329    do
    2330      {
    2331        Idx max = mctx->state_log_top;
    2332        Idx cur_str_idx = re_string_cur_idx (&mctx->input);
    2333  
    2334        do
    2335  	{
    2336  	  if (++cur_str_idx > max)
    2337  	    return NULL;
    2338  	  re_string_skip_bytes (&mctx->input, 1);
    2339  	}
    2340        while (mctx->state_log[cur_str_idx] == NULL);
    2341  
    2342        cur_state = merge_state_with_log (err, mctx, NULL);
    2343      }
    2344    while (*err == REG_NOERROR && cur_state == NULL);
    2345    return cur_state;
    2346  }
    2347  
    2348  /* Helper functions for transit_state.  */
    2349  
    2350  /* From the node set CUR_NODES, pick up the nodes whose types are
    2351     OP_OPEN_SUBEXP and which have corresponding back references in the regular
    2352     expression. And register them to use them later for evaluating the
    2353     corresponding back references.  */
    2354  
    2355  static reg_errcode_t
    2356  check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
    2357  			   Idx str_idx)
    2358  {
    2359    const re_dfa_t *const dfa = mctx->dfa;
    2360    Idx node_idx;
    2361    reg_errcode_t err;
    2362  
    2363    /* TODO: This isn't efficient.
    2364  	   Because there might be more than one nodes whose types are
    2365  	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
    2366  	   nodes.
    2367  	   E.g. RE: (a){2}  */
    2368    for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
    2369      {
    2370        Idx node = cur_nodes->elems[node_idx];
    2371        if (dfa->nodes[node].type == OP_OPEN_SUBEXP
    2372  	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
    2373  	  && (dfa->used_bkref_map
    2374  	      & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
    2375  	{
    2376  	  err = match_ctx_add_subtop (mctx, node, str_idx);
    2377  	  if (__glibc_unlikely (err != REG_NOERROR))
    2378  	    return err;
    2379  	}
    2380      }
    2381    return REG_NOERROR;
    2382  }
    2383  
    2384  #if 0
    2385  /* Return the next state to which the current state STATE will transit by
    2386     accepting the current input byte.  Return NULL on failure.  */
    2387  
    2388  static re_dfastate_t *
    2389  transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
    2390  		  re_dfastate_t *state)
    2391  {
    2392    const re_dfa_t *const dfa = mctx->dfa;
    2393    re_node_set next_nodes;
    2394    re_dfastate_t *next_state;
    2395    Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
    2396    unsigned int context;
    2397  
    2398    *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
    2399    if (__glibc_unlikely (*err != REG_NOERROR))
    2400      return NULL;
    2401    for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
    2402      {
    2403        Idx cur_node = state->nodes.elems[node_cnt];
    2404        if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
    2405  	{
    2406  	  *err = re_node_set_merge (&next_nodes,
    2407  				    dfa->eclosures + dfa->nexts[cur_node]);
    2408  	  if (__glibc_unlikely (*err != REG_NOERROR))
    2409  	    {
    2410  	      re_node_set_free (&next_nodes);
    2411  	      return NULL;
    2412  	    }
    2413  	}
    2414      }
    2415    context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
    2416    next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
    2417    /* We don't need to check errors here, since the return value of
    2418       this function is next_state and ERR is already set.  */
    2419  
    2420    re_node_set_free (&next_nodes);
    2421    re_string_skip_bytes (&mctx->input, 1);
    2422    return next_state;
    2423  }
    2424  #endif
    2425  
    2426  static reg_errcode_t
    2427  transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
    2428  {
    2429    const re_dfa_t *const dfa = mctx->dfa;
    2430    reg_errcode_t err;
    2431    Idx i;
    2432  
    2433    for (i = 0; i < pstate->nodes.nelem; ++i)
    2434      {
    2435        re_node_set dest_nodes, *new_nodes;
    2436        Idx cur_node_idx = pstate->nodes.elems[i];
    2437        int naccepted;
    2438        Idx dest_idx;
    2439        unsigned int context;
    2440        re_dfastate_t *dest_state;
    2441  
    2442        if (!dfa->nodes[cur_node_idx].accept_mb)
    2443  	continue;
    2444  
    2445        if (dfa->nodes[cur_node_idx].constraint)
    2446  	{
    2447  	  context = re_string_context_at (&mctx->input,
    2448  					  re_string_cur_idx (&mctx->input),
    2449  					  mctx->eflags);
    2450  	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
    2451  					   context))
    2452  	    continue;
    2453  	}
    2454  
    2455        /* How many bytes the node can accept?  */
    2456        naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
    2457  					   re_string_cur_idx (&mctx->input));
    2458        if (naccepted == 0)
    2459  	continue;
    2460  
    2461        /* The node can accepts 'naccepted' bytes.  */
    2462        dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
    2463        mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
    2464  			       : mctx->max_mb_elem_len);
    2465        err = clean_state_log_if_needed (mctx, dest_idx);
    2466        if (__glibc_unlikely (err != REG_NOERROR))
    2467  	return err;
    2468        DEBUG_ASSERT (dfa->nexts[cur_node_idx] != -1);
    2469        new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
    2470  
    2471        dest_state = mctx->state_log[dest_idx];
    2472        if (dest_state == NULL)
    2473  	dest_nodes = *new_nodes;
    2474        else
    2475  	{
    2476  	  err = re_node_set_init_union (&dest_nodes,
    2477  					dest_state->entrance_nodes, new_nodes);
    2478  	  if (__glibc_unlikely (err != REG_NOERROR))
    2479  	    return err;
    2480  	}
    2481        context = re_string_context_at (&mctx->input, dest_idx - 1,
    2482  				      mctx->eflags);
    2483        mctx->state_log[dest_idx]
    2484  	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
    2485        if (dest_state != NULL)
    2486  	re_node_set_free (&dest_nodes);
    2487        if (__glibc_unlikely (mctx->state_log[dest_idx] == NULL
    2488  			    && err != REG_NOERROR))
    2489  	return err;
    2490      }
    2491    return REG_NOERROR;
    2492  }
    2493  
    2494  static reg_errcode_t
    2495  transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
    2496  {
    2497    const re_dfa_t *const dfa = mctx->dfa;
    2498    reg_errcode_t err;
    2499    Idx i;
    2500    Idx cur_str_idx = re_string_cur_idx (&mctx->input);
    2501  
    2502    for (i = 0; i < nodes->nelem; ++i)
    2503      {
    2504        Idx dest_str_idx, prev_nelem, bkc_idx;
    2505        Idx node_idx = nodes->elems[i];
    2506        unsigned int context;
    2507        const re_token_t *node = dfa->nodes + node_idx;
    2508        re_node_set *new_dest_nodes;
    2509  
    2510        /* Check whether 'node' is a backreference or not.  */
    2511        if (node->type != OP_BACK_REF)
    2512  	continue;
    2513  
    2514        if (node->constraint)
    2515  	{
    2516  	  context = re_string_context_at (&mctx->input, cur_str_idx,
    2517  					  mctx->eflags);
    2518  	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
    2519  	    continue;
    2520  	}
    2521  
    2522        /* 'node' is a backreference.
    2523  	 Check the substring which the substring matched.  */
    2524        bkc_idx = mctx->nbkref_ents;
    2525        err = get_subexp (mctx, node_idx, cur_str_idx);
    2526        if (__glibc_unlikely (err != REG_NOERROR))
    2527  	goto free_return;
    2528  
    2529        /* And add the epsilon closures (which is 'new_dest_nodes') of
    2530  	 the backreference to appropriate state_log.  */
    2531        DEBUG_ASSERT (dfa->nexts[node_idx] != -1);
    2532        for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
    2533  	{
    2534  	  Idx subexp_len;
    2535  	  re_dfastate_t *dest_state;
    2536  	  struct re_backref_cache_entry *bkref_ent;
    2537  	  bkref_ent = mctx->bkref_ents + bkc_idx;
    2538  	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
    2539  	    continue;
    2540  	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
    2541  	  new_dest_nodes = (subexp_len == 0
    2542  			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
    2543  			    : dfa->eclosures + dfa->nexts[node_idx]);
    2544  	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
    2545  			  - bkref_ent->subexp_from);
    2546  	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
    2547  					  mctx->eflags);
    2548  	  dest_state = mctx->state_log[dest_str_idx];
    2549  	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
    2550  			: mctx->state_log[cur_str_idx]->nodes.nelem);
    2551  	  /* Add 'new_dest_node' to state_log.  */
    2552  	  if (dest_state == NULL)
    2553  	    {
    2554  	      mctx->state_log[dest_str_idx]
    2555  		= re_acquire_state_context (&err, dfa, new_dest_nodes,
    2556  					    context);
    2557  	      if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
    2558  				    && err != REG_NOERROR))
    2559  		goto free_return;
    2560  	    }
    2561  	  else
    2562  	    {
    2563  	      re_node_set dest_nodes;
    2564  	      err = re_node_set_init_union (&dest_nodes,
    2565  					    dest_state->entrance_nodes,
    2566  					    new_dest_nodes);
    2567  	      if (__glibc_unlikely (err != REG_NOERROR))
    2568  		{
    2569  		  re_node_set_free (&dest_nodes);
    2570  		  goto free_return;
    2571  		}
    2572  	      mctx->state_log[dest_str_idx]
    2573  		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
    2574  	      re_node_set_free (&dest_nodes);
    2575  	      if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
    2576  				    && err != REG_NOERROR))
    2577  		goto free_return;
    2578  	    }
    2579  	  /* We need to check recursively if the backreference can epsilon
    2580  	     transit.  */
    2581  	  if (subexp_len == 0
    2582  	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
    2583  	    {
    2584  	      err = check_subexp_matching_top (mctx, new_dest_nodes,
    2585  					       cur_str_idx);
    2586  	      if (__glibc_unlikely (err != REG_NOERROR))
    2587  		goto free_return;
    2588  	      err = transit_state_bkref (mctx, new_dest_nodes);
    2589  	      if (__glibc_unlikely (err != REG_NOERROR))
    2590  		goto free_return;
    2591  	    }
    2592  	}
    2593      }
    2594    err = REG_NOERROR;
    2595   free_return:
    2596    return err;
    2597  }
    2598  
    2599  /* Enumerate all the candidates which the backreference BKREF_NODE can match
    2600     at BKREF_STR_IDX, and register them by match_ctx_add_entry().
    2601     Note that we might collect inappropriate candidates here.
    2602     However, the cost of checking them strictly here is too high, then we
    2603     delay these checking for prune_impossible_nodes().  */
    2604  
    2605  static reg_errcode_t
    2606  __attribute_warn_unused_result__
    2607  get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
    2608  {
    2609    const re_dfa_t *const dfa = mctx->dfa;
    2610    Idx subexp_num, sub_top_idx;
    2611    const char *buf = (const char *) re_string_get_buffer (&mctx->input);
    2612    /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
    2613    Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
    2614    if (cache_idx != -1)
    2615      {
    2616        const struct re_backref_cache_entry *entry
    2617  	= mctx->bkref_ents + cache_idx;
    2618        do
    2619  	if (entry->node == bkref_node)
    2620  	  return REG_NOERROR; /* We already checked it.  */
    2621        while (entry++->more);
    2622      }
    2623  
    2624    subexp_num = dfa->nodes[bkref_node].opr.idx;
    2625  
    2626    /* For each sub expression  */
    2627    for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
    2628      {
    2629        reg_errcode_t err;
    2630        re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
    2631        re_sub_match_last_t *sub_last;
    2632        Idx sub_last_idx, sl_str, bkref_str_off;
    2633  
    2634        if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
    2635  	continue; /* It isn't related.  */
    2636  
    2637        sl_str = sub_top->str_idx;
    2638        bkref_str_off = bkref_str_idx;
    2639        /* At first, check the last node of sub expressions we already
    2640  	 evaluated.  */
    2641        for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
    2642  	{
    2643  	  regoff_t sl_str_diff;
    2644  	  sub_last = sub_top->lasts[sub_last_idx];
    2645  	  sl_str_diff = sub_last->str_idx - sl_str;
    2646  	  /* The matched string by the sub expression match with the substring
    2647  	     at the back reference?  */
    2648  	  if (sl_str_diff > 0)
    2649  	    {
    2650  	      if (__glibc_unlikely (bkref_str_off + sl_str_diff
    2651  				    > mctx->input.valid_len))
    2652  		{
    2653  		  /* Not enough chars for a successful match.  */
    2654  		  if (bkref_str_off + sl_str_diff > mctx->input.len)
    2655  		    break;
    2656  
    2657  		  err = clean_state_log_if_needed (mctx,
    2658  						   bkref_str_off
    2659  						   + sl_str_diff);
    2660  		  if (__glibc_unlikely (err != REG_NOERROR))
    2661  		    return err;
    2662  		  buf = (const char *) re_string_get_buffer (&mctx->input);
    2663  		}
    2664  	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
    2665  		/* We don't need to search this sub expression any more.  */
    2666  		break;
    2667  	    }
    2668  	  bkref_str_off += sl_str_diff;
    2669  	  sl_str += sl_str_diff;
    2670  	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
    2671  				bkref_str_idx);
    2672  
    2673  	  /* Reload buf, since the preceding call might have reallocated
    2674  	     the buffer.  */
    2675  	  buf = (const char *) re_string_get_buffer (&mctx->input);
    2676  
    2677  	  if (err == REG_NOMATCH)
    2678  	    continue;
    2679  	  if (__glibc_unlikely (err != REG_NOERROR))
    2680  	    return err;
    2681  	}
    2682  
    2683        if (sub_last_idx < sub_top->nlasts)
    2684  	continue;
    2685        if (sub_last_idx > 0)
    2686  	++sl_str;
    2687        /* Then, search for the other last nodes of the sub expression.  */
    2688        for (; sl_str <= bkref_str_idx; ++sl_str)
    2689  	{
    2690  	  Idx cls_node;
    2691  	  regoff_t sl_str_off;
    2692  	  const re_node_set *nodes;
    2693  	  sl_str_off = sl_str - sub_top->str_idx;
    2694  	  /* The matched string by the sub expression match with the substring
    2695  	     at the back reference?  */
    2696  	  if (sl_str_off > 0)
    2697  	    {
    2698  	      if (__glibc_unlikely (bkref_str_off >= mctx->input.valid_len))
    2699  		{
    2700  		  /* If we are at the end of the input, we cannot match.  */
    2701  		  if (bkref_str_off >= mctx->input.len)
    2702  		    break;
    2703  
    2704  		  err = extend_buffers (mctx, bkref_str_off + 1);
    2705  		  if (__glibc_unlikely (err != REG_NOERROR))
    2706  		    return err;
    2707  
    2708  		  buf = (const char *) re_string_get_buffer (&mctx->input);
    2709  		}
    2710  	      if (buf [bkref_str_off++] != buf[sl_str - 1])
    2711  		break; /* We don't need to search this sub expression
    2712  			  any more.  */
    2713  	    }
    2714  	  if (mctx->state_log[sl_str] == NULL)
    2715  	    continue;
    2716  	  /* Does this state have a ')' of the sub expression?  */
    2717  	  nodes = &mctx->state_log[sl_str]->nodes;
    2718  	  cls_node = find_subexp_node (dfa, nodes, subexp_num,
    2719  				       OP_CLOSE_SUBEXP);
    2720  	  if (cls_node == -1)
    2721  	    continue; /* No.  */
    2722  	  if (sub_top->path == NULL)
    2723  	    {
    2724  	      sub_top->path = calloc (sizeof (state_array_t),
    2725  				      sl_str - sub_top->str_idx + 1);
    2726  	      if (sub_top->path == NULL)
    2727  		return REG_ESPACE;
    2728  	    }
    2729  	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
    2730  	     in the current context?  */
    2731  	  err = check_arrival (mctx, sub_top->path, sub_top->node,
    2732  			       sub_top->str_idx, cls_node, sl_str,
    2733  			       OP_CLOSE_SUBEXP);
    2734  	  if (err == REG_NOMATCH)
    2735  	      continue;
    2736  	  if (__glibc_unlikely (err != REG_NOERROR))
    2737  	      return err;
    2738  	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
    2739  	  if (__glibc_unlikely (sub_last == NULL))
    2740  	    return REG_ESPACE;
    2741  	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
    2742  				bkref_str_idx);
    2743  	  buf = (const char *) re_string_get_buffer (&mctx->input);
    2744  	  if (err == REG_NOMATCH)
    2745  	    continue;
    2746  	  if (__glibc_unlikely (err != REG_NOERROR))
    2747  	    return err;
    2748  	}
    2749      }
    2750    return REG_NOERROR;
    2751  }
    2752  
    2753  /* Helper functions for get_subexp().  */
    2754  
    2755  /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
    2756     If it can arrive, register the sub expression expressed with SUB_TOP
    2757     and SUB_LAST.  */
    2758  
    2759  static reg_errcode_t
    2760  get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
    2761  		re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
    2762  {
    2763    reg_errcode_t err;
    2764    Idx to_idx;
    2765    /* Can the subexpression arrive the back reference?  */
    2766    err = check_arrival (mctx, &sub_last->path, sub_last->node,
    2767  		       sub_last->str_idx, bkref_node, bkref_str,
    2768  		       OP_OPEN_SUBEXP);
    2769    if (err != REG_NOERROR)
    2770      return err;
    2771    err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
    2772  			     sub_last->str_idx);
    2773    if (__glibc_unlikely (err != REG_NOERROR))
    2774      return err;
    2775    to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
    2776    return clean_state_log_if_needed (mctx, to_idx);
    2777  }
    2778  
    2779  /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
    2780     Search '(' if FL_OPEN, or search ')' otherwise.
    2781     TODO: This function isn't efficient...
    2782  	 Because there might be more than one nodes whose types are
    2783  	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
    2784  	 nodes.
    2785  	 E.g. RE: (a){2}  */
    2786  
    2787  static Idx
    2788  find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
    2789  		  Idx subexp_idx, int type)
    2790  {
    2791    Idx cls_idx;
    2792    for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
    2793      {
    2794        Idx cls_node = nodes->elems[cls_idx];
    2795        const re_token_t *node = dfa->nodes + cls_node;
    2796        if (node->type == type
    2797  	  && node->opr.idx == subexp_idx)
    2798  	return cls_node;
    2799      }
    2800    return -1;
    2801  }
    2802  
    2803  /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
    2804     LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
    2805     heavily reused.
    2806     Return REG_NOERROR if it can arrive, REG_NOMATCH if it cannot,
    2807     REG_ESPACE if memory is exhausted.  */
    2808  
    2809  static reg_errcode_t
    2810  __attribute_warn_unused_result__
    2811  check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
    2812  	       Idx top_str, Idx last_node, Idx last_str, int type)
    2813  {
    2814    const re_dfa_t *const dfa = mctx->dfa;
    2815    reg_errcode_t err = REG_NOERROR;
    2816    Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
    2817    re_dfastate_t *cur_state = NULL;
    2818    re_node_set *cur_nodes, next_nodes;
    2819    re_dfastate_t **backup_state_log;
    2820    unsigned int context;
    2821  
    2822    subexp_num = dfa->nodes[top_node].opr.idx;
    2823    /* Extend the buffer if we need.  */
    2824    if (__glibc_unlikely (path->alloc < last_str + mctx->max_mb_elem_len + 1))
    2825      {
    2826        re_dfastate_t **new_array;
    2827        Idx old_alloc = path->alloc;
    2828        Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;
    2829        Idx new_alloc;
    2830        if (__glibc_unlikely (IDX_MAX - old_alloc < incr_alloc))
    2831  	return REG_ESPACE;
    2832        new_alloc = old_alloc + incr_alloc;
    2833        if (__glibc_unlikely (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc))
    2834  	return REG_ESPACE;
    2835        new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
    2836        if (__glibc_unlikely (new_array == NULL))
    2837  	return REG_ESPACE;
    2838        path->array = new_array;
    2839        path->alloc = new_alloc;
    2840        memset (new_array + old_alloc, '\0',
    2841  	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
    2842      }
    2843  
    2844    str_idx = path->next_idx ? path->next_idx : top_str;
    2845  
    2846    /* Temporary modify MCTX.  */
    2847    backup_state_log = mctx->state_log;
    2848    backup_cur_idx = mctx->input.cur_idx;
    2849    mctx->state_log = path->array;
    2850    mctx->input.cur_idx = str_idx;
    2851  
    2852    /* Setup initial node set.  */
    2853    context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
    2854    if (str_idx == top_str)
    2855      {
    2856        err = re_node_set_init_1 (&next_nodes, top_node);
    2857        if (__glibc_unlikely (err != REG_NOERROR))
    2858  	return err;
    2859        err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
    2860        if (__glibc_unlikely (err != REG_NOERROR))
    2861  	{
    2862  	  re_node_set_free (&next_nodes);
    2863  	  return err;
    2864  	}
    2865      }
    2866    else
    2867      {
    2868        cur_state = mctx->state_log[str_idx];
    2869        if (cur_state && cur_state->has_backref)
    2870  	{
    2871  	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
    2872  	  if (__glibc_unlikely (err != REG_NOERROR))
    2873  	    return err;
    2874  	}
    2875        else
    2876  	re_node_set_init_empty (&next_nodes);
    2877      }
    2878    if (str_idx == top_str || (cur_state && cur_state->has_backref))
    2879      {
    2880        if (next_nodes.nelem)
    2881  	{
    2882  	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
    2883  				    subexp_num, type);
    2884  	  if (__glibc_unlikely (err != REG_NOERROR))
    2885  	    {
    2886  	      re_node_set_free (&next_nodes);
    2887  	      return err;
    2888  	    }
    2889  	}
    2890        cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
    2891        if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
    2892  	{
    2893  	  re_node_set_free (&next_nodes);
    2894  	  return err;
    2895  	}
    2896        mctx->state_log[str_idx] = cur_state;
    2897      }
    2898  
    2899    for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
    2900      {
    2901        re_node_set_empty (&next_nodes);
    2902        if (mctx->state_log[str_idx + 1])
    2903  	{
    2904  	  err = re_node_set_merge (&next_nodes,
    2905  				   &mctx->state_log[str_idx + 1]->nodes);
    2906  	  if (__glibc_unlikely (err != REG_NOERROR))
    2907  	    {
    2908  	      re_node_set_free (&next_nodes);
    2909  	      return err;
    2910  	    }
    2911  	}
    2912        if (cur_state)
    2913  	{
    2914  	  err = check_arrival_add_next_nodes (mctx, str_idx,
    2915  					      &cur_state->non_eps_nodes,
    2916  					      &next_nodes);
    2917  	  if (__glibc_unlikely (err != REG_NOERROR))
    2918  	    {
    2919  	      re_node_set_free (&next_nodes);
    2920  	      return err;
    2921  	    }
    2922  	}
    2923        ++str_idx;
    2924        if (next_nodes.nelem)
    2925  	{
    2926  	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
    2927  	  if (__glibc_unlikely (err != REG_NOERROR))
    2928  	    {
    2929  	      re_node_set_free (&next_nodes);
    2930  	      return err;
    2931  	    }
    2932  	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
    2933  				    subexp_num, type);
    2934  	  if (__glibc_unlikely (err != REG_NOERROR))
    2935  	    {
    2936  	      re_node_set_free (&next_nodes);
    2937  	      return err;
    2938  	    }
    2939  	}
    2940        context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
    2941        cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
    2942        if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
    2943  	{
    2944  	  re_node_set_free (&next_nodes);
    2945  	  return err;
    2946  	}
    2947        mctx->state_log[str_idx] = cur_state;
    2948        null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
    2949      }
    2950    re_node_set_free (&next_nodes);
    2951    cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
    2952  	       : &mctx->state_log[last_str]->nodes);
    2953    path->next_idx = str_idx;
    2954  
    2955    /* Fix MCTX.  */
    2956    mctx->state_log = backup_state_log;
    2957    mctx->input.cur_idx = backup_cur_idx;
    2958  
    2959    /* Then check the current node set has the node LAST_NODE.  */
    2960    if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
    2961      return REG_NOERROR;
    2962  
    2963    return REG_NOMATCH;
    2964  }
    2965  
    2966  /* Helper functions for check_arrival.  */
    2967  
    2968  /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
    2969     to NEXT_NODES.
    2970     TODO: This function is similar to the functions transit_state*(),
    2971  	 however this function has many additional works.
    2972  	 Can't we unify them?  */
    2973  
    2974  static reg_errcode_t
    2975  __attribute_warn_unused_result__
    2976  check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
    2977  			      re_node_set *cur_nodes, re_node_set *next_nodes)
    2978  {
    2979    const re_dfa_t *const dfa = mctx->dfa;
    2980    bool ok;
    2981    Idx cur_idx;
    2982    reg_errcode_t err = REG_NOERROR;
    2983    re_node_set union_set;
    2984    re_node_set_init_empty (&union_set);
    2985    for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
    2986      {
    2987        int naccepted = 0;
    2988        Idx cur_node = cur_nodes->elems[cur_idx];
    2989        DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[cur_node].type));
    2990  
    2991        /* If the node may accept "multi byte".  */
    2992        if (dfa->nodes[cur_node].accept_mb)
    2993  	{
    2994  	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
    2995  					       str_idx);
    2996  	  if (naccepted > 1)
    2997  	    {
    2998  	      re_dfastate_t *dest_state;
    2999  	      Idx next_node = dfa->nexts[cur_node];
    3000  	      Idx next_idx = str_idx + naccepted;
    3001  	      dest_state = mctx->state_log[next_idx];
    3002  	      re_node_set_empty (&union_set);
    3003  	      if (dest_state)
    3004  		{
    3005  		  err = re_node_set_merge (&union_set, &dest_state->nodes);
    3006  		  if (__glibc_unlikely (err != REG_NOERROR))
    3007  		    {
    3008  		      re_node_set_free (&union_set);
    3009  		      return err;
    3010  		    }
    3011  		}
    3012  	      ok = re_node_set_insert (&union_set, next_node);
    3013  	      if (__glibc_unlikely (! ok))
    3014  		{
    3015  		  re_node_set_free (&union_set);
    3016  		  return REG_ESPACE;
    3017  		}
    3018  	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
    3019  							    &union_set);
    3020  	      if (__glibc_unlikely (mctx->state_log[next_idx] == NULL
    3021  				    && err != REG_NOERROR))
    3022  		{
    3023  		  re_node_set_free (&union_set);
    3024  		  return err;
    3025  		}
    3026  	    }
    3027  	}
    3028  
    3029        if (naccepted
    3030  	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
    3031  	{
    3032  	  ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
    3033  	  if (__glibc_unlikely (! ok))
    3034  	    {
    3035  	      re_node_set_free (&union_set);
    3036  	      return REG_ESPACE;
    3037  	    }
    3038  	}
    3039      }
    3040    re_node_set_free (&union_set);
    3041    return REG_NOERROR;
    3042  }
    3043  
    3044  /* For all the nodes in CUR_NODES, add the epsilon closures of them to
    3045     CUR_NODES, however exclude the nodes which are:
    3046      - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
    3047      - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
    3048  */
    3049  
    3050  static reg_errcode_t
    3051  check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
    3052  			  Idx ex_subexp, int type)
    3053  {
    3054    reg_errcode_t err;
    3055    Idx idx, outside_node;
    3056    re_node_set new_nodes;
    3057    DEBUG_ASSERT (cur_nodes->nelem);
    3058    err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
    3059    if (__glibc_unlikely (err != REG_NOERROR))
    3060      return err;
    3061    /* Create a new node set NEW_NODES with the nodes which are epsilon
    3062       closures of the node in CUR_NODES.  */
    3063  
    3064    for (idx = 0; idx < cur_nodes->nelem; ++idx)
    3065      {
    3066        Idx cur_node = cur_nodes->elems[idx];
    3067        const re_node_set *eclosure = dfa->eclosures + cur_node;
    3068        outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
    3069        if (outside_node == -1)
    3070  	{
    3071  	  /* There are no problematic nodes, just merge them.  */
    3072  	  err = re_node_set_merge (&new_nodes, eclosure);
    3073  	  if (__glibc_unlikely (err != REG_NOERROR))
    3074  	    {
    3075  	      re_node_set_free (&new_nodes);
    3076  	      return err;
    3077  	    }
    3078  	}
    3079        else
    3080  	{
    3081  	  /* There are problematic nodes, re-calculate incrementally.  */
    3082  	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
    3083  					      ex_subexp, type);
    3084  	  if (__glibc_unlikely (err != REG_NOERROR))
    3085  	    {
    3086  	      re_node_set_free (&new_nodes);
    3087  	      return err;
    3088  	    }
    3089  	}
    3090      }
    3091    re_node_set_free (cur_nodes);
    3092    *cur_nodes = new_nodes;
    3093    return REG_NOERROR;
    3094  }
    3095  
    3096  /* Helper function for check_arrival_expand_ecl.
    3097     Check incrementally the epsilon closure of TARGET, and if it isn't
    3098     problematic append it to DST_NODES.  */
    3099  
    3100  static reg_errcode_t
    3101  __attribute_warn_unused_result__
    3102  check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
    3103  			      Idx target, Idx ex_subexp, int type)
    3104  {
    3105    Idx cur_node;
    3106    for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
    3107      {
    3108        bool ok;
    3109  
    3110        if (dfa->nodes[cur_node].type == type
    3111  	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
    3112  	{
    3113  	  if (type == OP_CLOSE_SUBEXP)
    3114  	    {
    3115  	      ok = re_node_set_insert (dst_nodes, cur_node);
    3116  	      if (__glibc_unlikely (! ok))
    3117  		return REG_ESPACE;
    3118  	    }
    3119  	  break;
    3120  	}
    3121        ok = re_node_set_insert (dst_nodes, cur_node);
    3122        if (__glibc_unlikely (! ok))
    3123  	return REG_ESPACE;
    3124        if (dfa->edests[cur_node].nelem == 0)
    3125  	break;
    3126        if (dfa->edests[cur_node].nelem == 2)
    3127  	{
    3128  	  reg_errcode_t err;
    3129  	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
    3130  					      dfa->edests[cur_node].elems[1],
    3131  					      ex_subexp, type);
    3132  	  if (__glibc_unlikely (err != REG_NOERROR))
    3133  	    return err;
    3134  	}
    3135        cur_node = dfa->edests[cur_node].elems[0];
    3136      }
    3137    return REG_NOERROR;
    3138  }
    3139  
    3140  
    3141  /* For all the back references in the current state, calculate the
    3142     destination of the back references by the appropriate entry
    3143     in MCTX->BKREF_ENTS.  */
    3144  
    3145  static reg_errcode_t
    3146  __attribute_warn_unused_result__
    3147  expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
    3148  		    Idx cur_str, Idx subexp_num, int type)
    3149  {
    3150    const re_dfa_t *const dfa = mctx->dfa;
    3151    reg_errcode_t err;
    3152    Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
    3153    struct re_backref_cache_entry *ent;
    3154  
    3155    if (cache_idx_start == -1)
    3156      return REG_NOERROR;
    3157  
    3158   restart:
    3159    ent = mctx->bkref_ents + cache_idx_start;
    3160    do
    3161      {
    3162        Idx to_idx, next_node;
    3163  
    3164        /* Is this entry ENT is appropriate?  */
    3165        if (!re_node_set_contains (cur_nodes, ent->node))
    3166  	continue; /* No.  */
    3167  
    3168        to_idx = cur_str + ent->subexp_to - ent->subexp_from;
    3169        /* Calculate the destination of the back reference, and append it
    3170  	 to MCTX->STATE_LOG.  */
    3171        if (to_idx == cur_str)
    3172  	{
    3173  	  /* The backreference did epsilon transit, we must re-check all the
    3174  	     node in the current state.  */
    3175  	  re_node_set new_dests;
    3176  	  reg_errcode_t err2, err3;
    3177  	  next_node = dfa->edests[ent->node].elems[0];
    3178  	  if (re_node_set_contains (cur_nodes, next_node))
    3179  	    continue;
    3180  	  err = re_node_set_init_1 (&new_dests, next_node);
    3181  	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
    3182  	  err3 = re_node_set_merge (cur_nodes, &new_dests);
    3183  	  re_node_set_free (&new_dests);
    3184  	  if (__glibc_unlikely (err != REG_NOERROR || err2 != REG_NOERROR
    3185  				|| err3 != REG_NOERROR))
    3186  	    {
    3187  	      err = (err != REG_NOERROR ? err
    3188  		     : (err2 != REG_NOERROR ? err2 : err3));
    3189  	      return err;
    3190  	    }
    3191  	  /* TODO: It is still inefficient...  */
    3192  	  goto restart;
    3193  	}
    3194        else
    3195  	{
    3196  	  re_node_set union_set;
    3197  	  next_node = dfa->nexts[ent->node];
    3198  	  if (mctx->state_log[to_idx])
    3199  	    {
    3200  	      bool ok;
    3201  	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
    3202  					next_node))
    3203  		continue;
    3204  	      err = re_node_set_init_copy (&union_set,
    3205  					   &mctx->state_log[to_idx]->nodes);
    3206  	      ok = re_node_set_insert (&union_set, next_node);
    3207  	      if (__glibc_unlikely (err != REG_NOERROR || ! ok))
    3208  		{
    3209  		  re_node_set_free (&union_set);
    3210  		  err = err != REG_NOERROR ? err : REG_ESPACE;
    3211  		  return err;
    3212  		}
    3213  	    }
    3214  	  else
    3215  	    {
    3216  	      err = re_node_set_init_1 (&union_set, next_node);
    3217  	      if (__glibc_unlikely (err != REG_NOERROR))
    3218  		return err;
    3219  	    }
    3220  	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
    3221  	  re_node_set_free (&union_set);
    3222  	  if (__glibc_unlikely (mctx->state_log[to_idx] == NULL
    3223  				&& err != REG_NOERROR))
    3224  	    return err;
    3225  	}
    3226      }
    3227    while (ent++->more);
    3228    return REG_NOERROR;
    3229  }
    3230  
    3231  /* Build transition table for the state.
    3232     Return true if successful.  */
    3233  
    3234  static bool __attribute_noinline__
    3235  build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
    3236  {
    3237    reg_errcode_t err;
    3238    Idx i, j;
    3239    int ch;
    3240    bool need_word_trtable = false;
    3241    bitset_word_t elem, mask;
    3242    Idx ndests; /* Number of the destination states from 'state'.  */
    3243    re_dfastate_t **trtable;
    3244    re_dfastate_t *dest_states[SBC_MAX];
    3245    re_dfastate_t *dest_states_word[SBC_MAX];
    3246    re_dfastate_t *dest_states_nl[SBC_MAX];
    3247    re_node_set follows;
    3248    bitset_t acceptable;
    3249  
    3250    /* We build DFA states which corresponds to the destination nodes
    3251       from 'state'.  'dests_node[i]' represents the nodes which i-th
    3252       destination state contains, and 'dests_ch[i]' represents the
    3253       characters which i-th destination state accepts.  */
    3254    re_node_set dests_node[SBC_MAX];
    3255    bitset_t dests_ch[SBC_MAX];
    3256  
    3257    /* Initialize transition table.  */
    3258    state->word_trtable = state->trtable = NULL;
    3259  
    3260    /* At first, group all nodes belonging to 'state' into several
    3261       destinations.  */
    3262    ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
    3263    if (__glibc_unlikely (ndests <= 0))
    3264      {
    3265        /* Return false in case of an error, true otherwise.  */
    3266        if (ndests == 0)
    3267  	{
    3268  	  state->trtable = (re_dfastate_t **)
    3269  	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
    3270            if (__glibc_unlikely (state->trtable == NULL))
    3271              return false;
    3272  	  return true;
    3273  	}
    3274        return false;
    3275      }
    3276  
    3277    err = re_node_set_alloc (&follows, ndests + 1);
    3278    if (__glibc_unlikely (err != REG_NOERROR))
    3279      {
    3280      out_free:
    3281        re_node_set_free (&follows);
    3282        for (i = 0; i < ndests; ++i)
    3283  	re_node_set_free (dests_node + i);
    3284        return false;
    3285      }
    3286  
    3287    bitset_empty (acceptable);
    3288  
    3289    /* Then build the states for all destinations.  */
    3290    for (i = 0; i < ndests; ++i)
    3291      {
    3292        Idx next_node;
    3293        re_node_set_empty (&follows);
    3294        /* Merge the follows of this destination states.  */
    3295        for (j = 0; j < dests_node[i].nelem; ++j)
    3296  	{
    3297  	  next_node = dfa->nexts[dests_node[i].elems[j]];
    3298  	  if (next_node != -1)
    3299  	    {
    3300  	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
    3301  	      if (__glibc_unlikely (err != REG_NOERROR))
    3302  		goto out_free;
    3303  	    }
    3304  	}
    3305        dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
    3306        if (__glibc_unlikely (dest_states[i] == NULL && err != REG_NOERROR))
    3307  	goto out_free;
    3308        /* If the new state has context constraint,
    3309  	 build appropriate states for these contexts.  */
    3310        if (dest_states[i]->has_constraint)
    3311  	{
    3312  	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
    3313  							  CONTEXT_WORD);
    3314  	  if (__glibc_unlikely (dest_states_word[i] == NULL
    3315  				&& err != REG_NOERROR))
    3316  	    goto out_free;
    3317  
    3318  	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
    3319  	    need_word_trtable = true;
    3320  
    3321  	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
    3322  							CONTEXT_NEWLINE);
    3323  	  if (__glibc_unlikely (dest_states_nl[i] == NULL && err != REG_NOERROR))
    3324  	    goto out_free;
    3325  	}
    3326        else
    3327  	{
    3328  	  dest_states_word[i] = dest_states[i];
    3329  	  dest_states_nl[i] = dest_states[i];
    3330  	}
    3331        bitset_merge (acceptable, dests_ch[i]);
    3332      }
    3333  
    3334    if (!__glibc_unlikely (need_word_trtable))
    3335      {
    3336        /* We don't care about whether the following character is a word
    3337  	 character, or we are in a single-byte character set so we can
    3338  	 discern by looking at the character code: allocate a
    3339  	 256-entry transition table.  */
    3340        trtable = state->trtable =
    3341  	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
    3342        if (__glibc_unlikely (trtable == NULL))
    3343  	goto out_free;
    3344  
    3345        /* For all characters ch...:  */
    3346        for (i = 0; i < BITSET_WORDS; ++i)
    3347  	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
    3348  	     elem;
    3349  	     mask <<= 1, elem >>= 1, ++ch)
    3350  	  if (__glibc_unlikely (elem & 1))
    3351  	    {
    3352  	      /* There must be exactly one destination which accepts
    3353  		 character ch.  See group_nodes_into_DFAstates.  */
    3354  	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
    3355  		;
    3356  
    3357  	      /* j-th destination accepts the word character ch.  */
    3358  	      if (dfa->word_char[i] & mask)
    3359  		trtable[ch] = dest_states_word[j];
    3360  	      else
    3361  		trtable[ch] = dest_states[j];
    3362  	    }
    3363      }
    3364    else
    3365      {
    3366        /* We care about whether the following character is a word
    3367  	 character, and we are in a multi-byte character set: discern
    3368  	 by looking at the character code: build two 256-entry
    3369  	 transition tables, one starting at trtable[0] and one
    3370  	 starting at trtable[SBC_MAX].  */
    3371        trtable = state->word_trtable =
    3372  	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
    3373        if (__glibc_unlikely (trtable == NULL))
    3374  	goto out_free;
    3375  
    3376        /* For all characters ch...:  */
    3377        for (i = 0; i < BITSET_WORDS; ++i)
    3378  	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
    3379  	     elem;
    3380  	     mask <<= 1, elem >>= 1, ++ch)
    3381  	  if (__glibc_unlikely (elem & 1))
    3382  	    {
    3383  	      /* There must be exactly one destination which accepts
    3384  		 character ch.  See group_nodes_into_DFAstates.  */
    3385  	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
    3386  		;
    3387  
    3388  	      /* j-th destination accepts the word character ch.  */
    3389  	      trtable[ch] = dest_states[j];
    3390  	      trtable[ch + SBC_MAX] = dest_states_word[j];
    3391  	    }
    3392      }
    3393  
    3394    /* new line */
    3395    if (bitset_contain (acceptable, NEWLINE_CHAR))
    3396      {
    3397        /* The current state accepts newline character.  */
    3398        for (j = 0; j < ndests; ++j)
    3399  	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
    3400  	  {
    3401  	    /* k-th destination accepts newline character.  */
    3402  	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
    3403  	    if (need_word_trtable)
    3404  	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
    3405  	    /* There must be only one destination which accepts
    3406  	       newline.  See group_nodes_into_DFAstates.  */
    3407  	    break;
    3408  	  }
    3409      }
    3410  
    3411    re_node_set_free (&follows);
    3412    for (i = 0; i < ndests; ++i)
    3413      re_node_set_free (dests_node + i);
    3414    return true;
    3415  }
    3416  
    3417  /* Group all nodes belonging to STATE into several destinations.
    3418     Then for all destinations, set the nodes belonging to the destination
    3419     to DESTS_NODE[i] and set the characters accepted by the destination
    3420     to DEST_CH[i].  Return the number of destinations if successful,
    3421     -1 on internal error.  */
    3422  
    3423  static Idx
    3424  group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
    3425  			    re_node_set *dests_node, bitset_t *dests_ch)
    3426  {
    3427    reg_errcode_t err;
    3428    bool ok;
    3429    Idx i, j, k;
    3430    Idx ndests; /* Number of the destinations from 'state'.  */
    3431    bitset_t accepts; /* Characters a node can accept.  */
    3432    const re_node_set *cur_nodes = &state->nodes;
    3433    bitset_empty (accepts);
    3434    ndests = 0;
    3435  
    3436    /* For all the nodes belonging to 'state',  */
    3437    for (i = 0; i < cur_nodes->nelem; ++i)
    3438      {
    3439        re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
    3440        re_token_type_t type = node->type;
    3441        unsigned int constraint = node->constraint;
    3442  
    3443        /* Enumerate all single byte character this node can accept.  */
    3444        if (type == CHARACTER)
    3445  	bitset_set (accepts, node->opr.c);
    3446        else if (type == SIMPLE_BRACKET)
    3447  	{
    3448  	  bitset_merge (accepts, node->opr.sbcset);
    3449  	}
    3450        else if (type == OP_PERIOD)
    3451  	{
    3452  	  if (dfa->mb_cur_max > 1)
    3453  	    bitset_merge (accepts, dfa->sb_char);
    3454  	  else
    3455  	    bitset_set_all (accepts);
    3456  	  if (!(dfa->syntax & RE_DOT_NEWLINE))
    3457  	    bitset_clear (accepts, '\n');
    3458  	  if (dfa->syntax & RE_DOT_NOT_NULL)
    3459  	    bitset_clear (accepts, '\0');
    3460  	}
    3461        else if (type == OP_UTF8_PERIOD)
    3462  	{
    3463  	  if (ASCII_CHARS % BITSET_WORD_BITS == 0)
    3464  	    memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
    3465  	  else
    3466  	    bitset_merge (accepts, utf8_sb_map);
    3467  	  if (!(dfa->syntax & RE_DOT_NEWLINE))
    3468  	    bitset_clear (accepts, '\n');
    3469  	  if (dfa->syntax & RE_DOT_NOT_NULL)
    3470  	    bitset_clear (accepts, '\0');
    3471  	}
    3472        else
    3473  	continue;
    3474  
    3475        /* Check the 'accepts' and sift the characters which are not
    3476  	 match it the context.  */
    3477        if (constraint)
    3478  	{
    3479  	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
    3480  	    {
    3481  	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
    3482  	      bitset_empty (accepts);
    3483  	      if (accepts_newline)
    3484  		bitset_set (accepts, NEWLINE_CHAR);
    3485  	      else
    3486  		continue;
    3487  	    }
    3488  	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
    3489  	    {
    3490  	      bitset_empty (accepts);
    3491  	      continue;
    3492  	    }
    3493  
    3494  	  if (constraint & NEXT_WORD_CONSTRAINT)
    3495  	    {
    3496  	      bitset_word_t any_set = 0;
    3497  	      if (type == CHARACTER && !node->word_char)
    3498  		{
    3499  		  bitset_empty (accepts);
    3500  		  continue;
    3501  		}
    3502  	      if (dfa->mb_cur_max > 1)
    3503  		for (j = 0; j < BITSET_WORDS; ++j)
    3504  		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
    3505  	      else
    3506  		for (j = 0; j < BITSET_WORDS; ++j)
    3507  		  any_set |= (accepts[j] &= dfa->word_char[j]);
    3508  	      if (!any_set)
    3509  		continue;
    3510  	    }
    3511  	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
    3512  	    {
    3513  	      bitset_word_t any_set = 0;
    3514  	      if (type == CHARACTER && node->word_char)
    3515  		{
    3516  		  bitset_empty (accepts);
    3517  		  continue;
    3518  		}
    3519  	      if (dfa->mb_cur_max > 1)
    3520  		for (j = 0; j < BITSET_WORDS; ++j)
    3521  		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
    3522  	      else
    3523  		for (j = 0; j < BITSET_WORDS; ++j)
    3524  		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
    3525  	      if (!any_set)
    3526  		continue;
    3527  	    }
    3528  	}
    3529  
    3530        /* Then divide 'accepts' into DFA states, or create a new
    3531  	 state.  Above, we make sure that accepts is not empty.  */
    3532        for (j = 0; j < ndests; ++j)
    3533  	{
    3534  	  bitset_t intersec; /* Intersection sets, see below.  */
    3535  	  bitset_t remains;
    3536  	  /* Flags, see below.  */
    3537  	  bitset_word_t has_intersec, not_subset, not_consumed;
    3538  
    3539  	  /* Optimization, skip if this state doesn't accept the character.  */
    3540  	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
    3541  	    continue;
    3542  
    3543  	  /* Enumerate the intersection set of this state and 'accepts'.  */
    3544  	  has_intersec = 0;
    3545  	  for (k = 0; k < BITSET_WORDS; ++k)
    3546  	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
    3547  	  /* And skip if the intersection set is empty.  */
    3548  	  if (!has_intersec)
    3549  	    continue;
    3550  
    3551  	  /* Then check if this state is a subset of 'accepts'.  */
    3552  	  not_subset = not_consumed = 0;
    3553  	  for (k = 0; k < BITSET_WORDS; ++k)
    3554  	    {
    3555  	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
    3556  	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
    3557  	    }
    3558  
    3559  	  /* If this state isn't a subset of 'accepts', create a
    3560  	     new group state, which has the 'remains'. */
    3561  	  if (not_subset)
    3562  	    {
    3563  	      bitset_copy (dests_ch[ndests], remains);
    3564  	      bitset_copy (dests_ch[j], intersec);
    3565  	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
    3566  	      if (__glibc_unlikely (err != REG_NOERROR))
    3567  		goto error_return;
    3568  	      ++ndests;
    3569  	    }
    3570  
    3571  	  /* Put the position in the current group. */
    3572  	  ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
    3573  	  if (__glibc_unlikely (! ok))
    3574  	    goto error_return;
    3575  
    3576  	  /* If all characters are consumed, go to next node. */
    3577  	  if (!not_consumed)
    3578  	    break;
    3579  	}
    3580        /* Some characters remain, create a new group. */
    3581        if (j == ndests)
    3582  	{
    3583  	  bitset_copy (dests_ch[ndests], accepts);
    3584  	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
    3585  	  if (__glibc_unlikely (err != REG_NOERROR))
    3586  	    goto error_return;
    3587  	  ++ndests;
    3588  	  bitset_empty (accepts);
    3589  	}
    3590      }
    3591    assume (ndests <= SBC_MAX);
    3592    return ndests;
    3593   error_return:
    3594    for (j = 0; j < ndests; ++j)
    3595      re_node_set_free (dests_node + j);
    3596    return -1;
    3597  }
    3598  
    3599  /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
    3600     Return the number of the bytes the node accepts.
    3601     STR_IDX is the current index of the input string.
    3602  
    3603     This function handles the nodes which can accept one character, or
    3604     one collating element like '.', '[a-z]', opposite to the other nodes
    3605     can only accept one byte.  */
    3606  
    3607  #ifdef _LIBC
    3608  # include <locale/weight.h>
    3609  #endif
    3610  
    3611  static int
    3612  check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
    3613  			 const re_string_t *input, Idx str_idx)
    3614  {
    3615    const re_token_t *node = dfa->nodes + node_idx;
    3616    int char_len, elem_len;
    3617    Idx i;
    3618  
    3619    if (__glibc_unlikely (node->type == OP_UTF8_PERIOD))
    3620      {
    3621        unsigned char c = re_string_byte_at (input, str_idx), d;
    3622        if (__glibc_likely (c < 0xc2))
    3623  	return 0;
    3624  
    3625        if (str_idx + 2 > input->len)
    3626  	return 0;
    3627  
    3628        d = re_string_byte_at (input, str_idx + 1);
    3629        if (c < 0xe0)
    3630  	return (d < 0x80 || d > 0xbf) ? 0 : 2;
    3631        else if (c < 0xf0)
    3632  	{
    3633  	  char_len = 3;
    3634  	  if (c == 0xe0 && d < 0xa0)
    3635  	    return 0;
    3636  	}
    3637        else if (c < 0xf8)
    3638  	{
    3639  	  char_len = 4;
    3640  	  if (c == 0xf0 && d < 0x90)
    3641  	    return 0;
    3642  	}
    3643        else if (c < 0xfc)
    3644  	{
    3645  	  char_len = 5;
    3646  	  if (c == 0xf8 && d < 0x88)
    3647  	    return 0;
    3648  	}
    3649        else if (c < 0xfe)
    3650  	{
    3651  	  char_len = 6;
    3652  	  if (c == 0xfc && d < 0x84)
    3653  	    return 0;
    3654  	}
    3655        else
    3656  	return 0;
    3657  
    3658        if (str_idx + char_len > input->len)
    3659  	return 0;
    3660  
    3661        for (i = 1; i < char_len; ++i)
    3662  	{
    3663  	  d = re_string_byte_at (input, str_idx + i);
    3664  	  if (d < 0x80 || d > 0xbf)
    3665  	    return 0;
    3666  	}
    3667        return char_len;
    3668      }
    3669  
    3670    char_len = re_string_char_size_at (input, str_idx);
    3671    if (node->type == OP_PERIOD)
    3672      {
    3673        if (char_len <= 1)
    3674  	return 0;
    3675        /* FIXME: I don't think this if is needed, as both '\n'
    3676  	 and '\0' are char_len == 1.  */
    3677        /* '.' accepts any one character except the following two cases.  */
    3678        if ((!(dfa->syntax & RE_DOT_NEWLINE)
    3679  	   && re_string_byte_at (input, str_idx) == '\n')
    3680  	  || ((dfa->syntax & RE_DOT_NOT_NULL)
    3681  	      && re_string_byte_at (input, str_idx) == '\0'))
    3682  	return 0;
    3683        return char_len;
    3684      }
    3685  
    3686    elem_len = re_string_elem_size_at (input, str_idx);
    3687    if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
    3688      return 0;
    3689  
    3690    if (node->type == COMPLEX_BRACKET)
    3691      {
    3692        const re_charset_t *cset = node->opr.mbcset;
    3693  #ifdef _LIBC
    3694        const unsigned char *pin
    3695  	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
    3696        Idx j;
    3697        uint32_t nrules;
    3698  #endif
    3699        int match_len = 0;
    3700        wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
    3701  		    ? re_string_wchar_at (input, str_idx) : 0);
    3702  
    3703        /* match with multibyte character?  */
    3704        for (i = 0; i < cset->nmbchars; ++i)
    3705  	if (wc == cset->mbchars[i])
    3706  	  {
    3707  	    match_len = char_len;
    3708  	    goto check_node_accept_bytes_match;
    3709  	  }
    3710        /* match with character_class?  */
    3711        for (i = 0; i < cset->nchar_classes; ++i)
    3712  	{
    3713  	  wctype_t wt = cset->char_classes[i];
    3714  	  if (__iswctype (wc, wt))
    3715  	    {
    3716  	      match_len = char_len;
    3717  	      goto check_node_accept_bytes_match;
    3718  	    }
    3719  	}
    3720  
    3721  #ifdef _LIBC
    3722        nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
    3723        if (nrules != 0)
    3724  	{
    3725  	  unsigned int in_collseq = 0;
    3726  	  const int32_t *table, *indirect;
    3727  	  const unsigned char *weights, *extra;
    3728  	  const char *collseqwc;
    3729  
    3730  	  /* match with collating_symbol?  */
    3731  	  if (cset->ncoll_syms)
    3732  	    extra = (const unsigned char *)
    3733  	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
    3734  	  for (i = 0; i < cset->ncoll_syms; ++i)
    3735  	    {
    3736  	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
    3737  	      /* Compare the length of input collating element and
    3738  		 the length of current collating element.  */
    3739  	      if (*coll_sym != elem_len)
    3740  		continue;
    3741  	      /* Compare each bytes.  */
    3742  	      for (j = 0; j < *coll_sym; j++)
    3743  		if (pin[j] != coll_sym[1 + j])
    3744  		  break;
    3745  	      if (j == *coll_sym)
    3746  		{
    3747  		  /* Match if every bytes is equal.  */
    3748  		  match_len = j;
    3749  		  goto check_node_accept_bytes_match;
    3750  		}
    3751  	    }
    3752  
    3753  	  if (cset->nranges)
    3754  	    {
    3755  	      if (elem_len <= char_len)
    3756  		{
    3757  		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
    3758  		  in_collseq = __collseq_table_lookup (collseqwc, wc);
    3759  		}
    3760  	      else
    3761  		in_collseq = find_collation_sequence_value (pin, elem_len);
    3762  	    }
    3763  	  /* match with range expression?  */
    3764  	  /* FIXME: Implement rational ranges here, too.  */
    3765  	  for (i = 0; i < cset->nranges; ++i)
    3766  	    if (cset->range_starts[i] <= in_collseq
    3767  		&& in_collseq <= cset->range_ends[i])
    3768  	      {
    3769  		match_len = elem_len;
    3770  		goto check_node_accept_bytes_match;
    3771  	      }
    3772  
    3773  	  /* match with equivalence_class?  */
    3774  	  if (cset->nequiv_classes)
    3775  	    {
    3776  	      const unsigned char *cp = pin;
    3777  	      table = (const int32_t *)
    3778  		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
    3779  	      weights = (const unsigned char *)
    3780  		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
    3781  	      extra = (const unsigned char *)
    3782  		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
    3783  	      indirect = (const int32_t *)
    3784  		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
    3785  	      int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
    3786  	      int32_t rule = idx >> 24;
    3787  	      idx &= 0xffffff;
    3788  	      if (idx > 0)
    3789  		{
    3790  		  size_t weight_len = weights[idx];
    3791  		  for (i = 0; i < cset->nequiv_classes; ++i)
    3792  		    {
    3793  		      int32_t equiv_class_idx = cset->equiv_classes[i];
    3794  		      int32_t equiv_class_rule = equiv_class_idx >> 24;
    3795  		      equiv_class_idx &= 0xffffff;
    3796  		      if (weights[equiv_class_idx] == weight_len
    3797  			  && equiv_class_rule == rule
    3798  			  && memcmp (weights + idx + 1,
    3799  				     weights + equiv_class_idx + 1,
    3800  				     weight_len) == 0)
    3801  			{
    3802  			  match_len = elem_len;
    3803  			  goto check_node_accept_bytes_match;
    3804  			}
    3805  		    }
    3806  		}
    3807  	    }
    3808  	}
    3809        else
    3810  #endif /* _LIBC */
    3811  	{
    3812  	  /* match with range expression?  */
    3813  	  for (i = 0; i < cset->nranges; ++i)
    3814  	    {
    3815  	      if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
    3816  		{
    3817  		  match_len = char_len;
    3818  		  goto check_node_accept_bytes_match;
    3819  		}
    3820  	    }
    3821  	}
    3822      check_node_accept_bytes_match:
    3823        if (!cset->non_match)
    3824  	return match_len;
    3825        else
    3826  	{
    3827  	  if (match_len > 0)
    3828  	    return 0;
    3829  	  else
    3830  	    return (elem_len > char_len) ? elem_len : char_len;
    3831  	}
    3832      }
    3833    return 0;
    3834  }
    3835  
    3836  #ifdef _LIBC
    3837  static unsigned int
    3838  find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
    3839  {
    3840    uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
    3841    if (nrules == 0)
    3842      {
    3843        if (mbs_len == 1)
    3844  	{
    3845  	  /* No valid character.  Match it as a single byte character.  */
    3846  	  const unsigned char *collseq = (const unsigned char *)
    3847  	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
    3848  	  return collseq[mbs[0]];
    3849  	}
    3850        return UINT_MAX;
    3851      }
    3852    else
    3853      {
    3854        int32_t idx;
    3855        const unsigned char *extra = (const unsigned char *)
    3856  	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
    3857        int32_t extrasize = (const unsigned char *)
    3858  	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
    3859  
    3860        for (idx = 0; idx < extrasize;)
    3861  	{
    3862  	  int mbs_cnt;
    3863  	  bool found = false;
    3864  	  int32_t elem_mbs_len;
    3865  	  /* Skip the name of collating element name.  */
    3866  	  idx = idx + extra[idx] + 1;
    3867  	  elem_mbs_len = extra[idx++];
    3868  	  if (mbs_len == elem_mbs_len)
    3869  	    {
    3870  	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
    3871  		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
    3872  		  break;
    3873  	      if (mbs_cnt == elem_mbs_len)
    3874  		/* Found the entry.  */
    3875  		found = true;
    3876  	    }
    3877  	  /* Skip the byte sequence of the collating element.  */
    3878  	  idx += elem_mbs_len;
    3879  	  /* Adjust for the alignment.  */
    3880  	  idx = (idx + 3) & ~3;
    3881  	  /* Skip the collation sequence value.  */
    3882  	  idx += sizeof (uint32_t);
    3883  	  /* Skip the wide char sequence of the collating element.  */
    3884  	  idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
    3885  	  /* If we found the entry, return the sequence value.  */
    3886  	  if (found)
    3887  	    return *(uint32_t *) (extra + idx);
    3888  	  /* Skip the collation sequence value.  */
    3889  	  idx += sizeof (uint32_t);
    3890  	}
    3891        return UINT_MAX;
    3892      }
    3893  }
    3894  #endif /* _LIBC */
    3895  
    3896  /* Check whether the node accepts the byte which is IDX-th
    3897     byte of the INPUT.  */
    3898  
    3899  static bool
    3900  check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
    3901  		   Idx idx)
    3902  {
    3903    unsigned char ch;
    3904    ch = re_string_byte_at (&mctx->input, idx);
    3905    switch (node->type)
    3906      {
    3907      case CHARACTER:
    3908        if (node->opr.c != ch)
    3909          return false;
    3910        break;
    3911  
    3912      case SIMPLE_BRACKET:
    3913        if (!bitset_contain (node->opr.sbcset, ch))
    3914          return false;
    3915        break;
    3916  
    3917      case OP_UTF8_PERIOD:
    3918        if (ch >= ASCII_CHARS)
    3919          return false;
    3920        FALLTHROUGH;
    3921      case OP_PERIOD:
    3922        if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
    3923  	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
    3924  	return false;
    3925        break;
    3926  
    3927      default:
    3928        return false;
    3929      }
    3930  
    3931    if (node->constraint)
    3932      {
    3933        /* The node has constraints.  Check whether the current context
    3934  	 satisfies the constraints.  */
    3935        unsigned int context = re_string_context_at (&mctx->input, idx,
    3936  						   mctx->eflags);
    3937        if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
    3938  	return false;
    3939      }
    3940  
    3941    return true;
    3942  }
    3943  
    3944  /* Extend the buffers, if the buffers have run out.  */
    3945  
    3946  static reg_errcode_t
    3947  __attribute_warn_unused_result__
    3948  extend_buffers (re_match_context_t *mctx, int min_len)
    3949  {
    3950    reg_errcode_t ret;
    3951    re_string_t *pstr = &mctx->input;
    3952  
    3953    /* Avoid overflow.  */
    3954    if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2
    3955  			<= pstr->bufs_len))
    3956      return REG_ESPACE;
    3957  
    3958    /* Double the lengths of the buffers, but allocate at least MIN_LEN.  */
    3959    ret = re_string_realloc_buffers (pstr,
    3960  				   MAX (min_len,
    3961  					MIN (pstr->len, pstr->bufs_len * 2)));
    3962    if (__glibc_unlikely (ret != REG_NOERROR))
    3963      return ret;
    3964  
    3965    if (mctx->state_log != NULL)
    3966      {
    3967        /* And double the length of state_log.  */
    3968        /* XXX We have no indication of the size of this buffer.  If this
    3969  	 allocation fail we have no indication that the state_log array
    3970  	 does not have the right size.  */
    3971        re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
    3972  					      pstr->bufs_len + 1);
    3973        if (__glibc_unlikely (new_array == NULL))
    3974  	return REG_ESPACE;
    3975        mctx->state_log = new_array;
    3976      }
    3977  
    3978    /* Then reconstruct the buffers.  */
    3979    if (pstr->icase)
    3980      {
    3981        if (pstr->mb_cur_max > 1)
    3982  	{
    3983  	  ret = build_wcs_upper_buffer (pstr);
    3984  	  if (__glibc_unlikely (ret != REG_NOERROR))
    3985  	    return ret;
    3986  	}
    3987        else
    3988  	build_upper_buffer (pstr);
    3989      }
    3990    else
    3991      {
    3992        if (pstr->mb_cur_max > 1)
    3993  	build_wcs_buffer (pstr);
    3994        else
    3995  	{
    3996  	  if (pstr->trans != NULL)
    3997  	    re_string_translate_buffer (pstr);
    3998  	}
    3999      }
    4000    return REG_NOERROR;
    4001  }
    4002  
    4003  
    4004  /* Functions for matching context.  */
    4005  
    4006  /* Initialize MCTX.  */
    4007  
    4008  static reg_errcode_t
    4009  __attribute_warn_unused_result__
    4010  match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
    4011  {
    4012    mctx->eflags = eflags;
    4013    mctx->match_last = -1;
    4014    if (n > 0)
    4015      {
    4016        /* Avoid overflow.  */
    4017        size_t max_object_size =
    4018  	MAX (sizeof (struct re_backref_cache_entry),
    4019  	     sizeof (re_sub_match_top_t *));
    4020        if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n))
    4021  	return REG_ESPACE;
    4022  
    4023        mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
    4024        mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
    4025        if (__glibc_unlikely (mctx->bkref_ents == NULL || mctx->sub_tops == NULL))
    4026  	return REG_ESPACE;
    4027      }
    4028    /* Already zero-ed by the caller.
    4029       else
    4030         mctx->bkref_ents = NULL;
    4031       mctx->nbkref_ents = 0;
    4032       mctx->nsub_tops = 0;  */
    4033    mctx->abkref_ents = n;
    4034    mctx->max_mb_elem_len = 1;
    4035    mctx->asub_tops = n;
    4036    return REG_NOERROR;
    4037  }
    4038  
    4039  /* Clean the entries which depend on the current input in MCTX.
    4040     This function must be invoked when the matcher changes the start index
    4041     of the input, or changes the input string.  */
    4042  
    4043  static void
    4044  match_ctx_clean (re_match_context_t *mctx)
    4045  {
    4046    Idx st_idx;
    4047    for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
    4048      {
    4049        Idx sl_idx;
    4050        re_sub_match_top_t *top = mctx->sub_tops[st_idx];
    4051        for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
    4052  	{
    4053  	  re_sub_match_last_t *last = top->lasts[sl_idx];
    4054  	  re_free (last->path.array);
    4055  	  re_free (last);
    4056  	}
    4057        re_free (top->lasts);
    4058        if (top->path)
    4059  	{
    4060  	  re_free (top->path->array);
    4061  	  re_free (top->path);
    4062  	}
    4063        re_free (top);
    4064      }
    4065  
    4066    mctx->nsub_tops = 0;
    4067    mctx->nbkref_ents = 0;
    4068  }
    4069  
    4070  /* Free all the memory associated with MCTX.  */
    4071  
    4072  static void
    4073  match_ctx_free (re_match_context_t *mctx)
    4074  {
    4075    /* First, free all the memory associated with MCTX->SUB_TOPS.  */
    4076    match_ctx_clean (mctx);
    4077    re_free (mctx->sub_tops);
    4078    re_free (mctx->bkref_ents);
    4079  }
    4080  
    4081  /* Add a new backreference entry to MCTX.
    4082     Note that we assume that caller never call this function with duplicate
    4083     entry, and call with STR_IDX which isn't smaller than any existing entry.
    4084  */
    4085  
    4086  static reg_errcode_t
    4087  __attribute_warn_unused_result__
    4088  match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
    4089  		     Idx to)
    4090  {
    4091    if (mctx->nbkref_ents >= mctx->abkref_ents)
    4092      {
    4093        struct re_backref_cache_entry* new_entry;
    4094        new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
    4095  			      mctx->abkref_ents * 2);
    4096        if (__glibc_unlikely (new_entry == NULL))
    4097  	{
    4098  	  re_free (mctx->bkref_ents);
    4099  	  return REG_ESPACE;
    4100  	}
    4101        mctx->bkref_ents = new_entry;
    4102        memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
    4103  	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
    4104        mctx->abkref_ents *= 2;
    4105      }
    4106    if (mctx->nbkref_ents > 0
    4107        && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
    4108      mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
    4109  
    4110    mctx->bkref_ents[mctx->nbkref_ents].node = node;
    4111    mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
    4112    mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
    4113    mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
    4114  
    4115    /* This is a cache that saves negative results of check_dst_limits_calc_pos.
    4116       If bit N is clear, means that this entry won't epsilon-transition to
    4117       an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
    4118       it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
    4119       such node.
    4120  
    4121       A backreference does not epsilon-transition unless it is empty, so set
    4122       to all zeros if FROM != TO.  */
    4123    mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
    4124      = (from == to ? -1 : 0);
    4125  
    4126    mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
    4127    if (mctx->max_mb_elem_len < to - from)
    4128      mctx->max_mb_elem_len = to - from;
    4129    return REG_NOERROR;
    4130  }
    4131  
    4132  /* Return the first entry with the same str_idx, or -1 if none is
    4133     found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
    4134  
    4135  static Idx
    4136  search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
    4137  {
    4138    Idx left, right, mid, last;
    4139    last = right = mctx->nbkref_ents;
    4140    for (left = 0; left < right;)
    4141      {
    4142        mid = (left + right) / 2;
    4143        if (mctx->bkref_ents[mid].str_idx < str_idx)
    4144  	left = mid + 1;
    4145        else
    4146  	right = mid;
    4147      }
    4148    if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
    4149      return left;
    4150    else
    4151      return -1;
    4152  }
    4153  
    4154  /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
    4155     at STR_IDX.  */
    4156  
    4157  static reg_errcode_t
    4158  __attribute_warn_unused_result__
    4159  match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
    4160  {
    4161    DEBUG_ASSERT (mctx->sub_tops != NULL);
    4162    DEBUG_ASSERT (mctx->asub_tops > 0);
    4163    if (__glibc_unlikely (mctx->nsub_tops == mctx->asub_tops))
    4164      {
    4165        Idx new_asub_tops = mctx->asub_tops * 2;
    4166        re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
    4167  						   re_sub_match_top_t *,
    4168  						   new_asub_tops);
    4169        if (__glibc_unlikely (new_array == NULL))
    4170  	return REG_ESPACE;
    4171        mctx->sub_tops = new_array;
    4172        mctx->asub_tops = new_asub_tops;
    4173      }
    4174    mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
    4175    if (__glibc_unlikely (mctx->sub_tops[mctx->nsub_tops] == NULL))
    4176      return REG_ESPACE;
    4177    mctx->sub_tops[mctx->nsub_tops]->node = node;
    4178    mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
    4179    return REG_NOERROR;
    4180  }
    4181  
    4182  /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
    4183     at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.
    4184     Return the new entry if successful, NULL if memory is exhausted.  */
    4185  
    4186  static re_sub_match_last_t *
    4187  match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
    4188  {
    4189    re_sub_match_last_t *new_entry;
    4190    if (__glibc_unlikely (subtop->nlasts == subtop->alasts))
    4191      {
    4192        Idx new_alasts = 2 * subtop->alasts + 1;
    4193        re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
    4194  						    re_sub_match_last_t *,
    4195  						    new_alasts);
    4196        if (__glibc_unlikely (new_array == NULL))
    4197  	return NULL;
    4198        subtop->lasts = new_array;
    4199        subtop->alasts = new_alasts;
    4200      }
    4201    new_entry = calloc (1, sizeof (re_sub_match_last_t));
    4202    if (__glibc_likely (new_entry != NULL))
    4203      {
    4204        subtop->lasts[subtop->nlasts] = new_entry;
    4205        new_entry->node = node;
    4206        new_entry->str_idx = str_idx;
    4207        ++subtop->nlasts;
    4208      }
    4209    return new_entry;
    4210  }
    4211  
    4212  static void
    4213  sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
    4214  	       re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
    4215  {
    4216    sctx->sifted_states = sifted_sts;
    4217    sctx->limited_states = limited_sts;
    4218    sctx->last_node = last_node;
    4219    sctx->last_str_idx = last_str_idx;
    4220    re_node_set_init_empty (&sctx->limits);
    4221  }