(root)/
flex-2.6.4/
src/
dfa.c
       1  /* dfa - DFA construction routines */
       2  
       3  /*  Copyright (c) 1990 The Regents of the University of California. */
       4  /*  All rights reserved. */
       5  
       6  /*  This code is derived from software contributed to Berkeley by */
       7  /*  Vern Paxson. */
       8  
       9  /*  The United States Government has rights in this work pursuant */
      10  /*  to contract no. DE-AC03-76SF00098 between the United States */
      11  /*  Department of Energy and the University of California. */
      12  
      13  /*  Redistribution and use in source and binary forms, with or without */
      14  /*  modification, are permitted provided that the following conditions */
      15  /*  are met: */
      16  
      17  /*  1. Redistributions of source code must retain the above copyright */
      18  /*     notice, this list of conditions and the following disclaimer. */
      19  /*  2. Redistributions in binary form must reproduce the above copyright */
      20  /*     notice, this list of conditions and the following disclaimer in the */
      21  /*     documentation and/or other materials provided with the distribution. */
      22  
      23  /*  Neither the name of the University nor the names of its contributors */
      24  /*  may be used to endorse or promote products derived from this software */
      25  /*  without specific prior written permission. */
      26  
      27  /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
      28  /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
      29  /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
      30  /*  PURPOSE. */
      31  
      32  #include "flexdef.h"
      33  #include "tables.h"
      34  
      35  /* declare functions that have forward references */
      36  
      37  void	dump_associated_rules(FILE *, int);
      38  void	dump_transitions(FILE *, int[]);
      39  void	sympartition(int[], int, int[], int[]);
      40  int	symfollowset(int[], int, int, int[]);
      41  
      42  
      43  /* check_for_backing_up - check a DFA state for backing up
      44   *
      45   * synopsis
      46   *     void check_for_backing_up( int ds, int state[numecs] );
      47   *
      48   * ds is the number of the state to check and state[] is its out-transitions,
      49   * indexed by equivalence class.
      50   */
      51  
      52  void check_for_backing_up (int ds, int state[])
      53  {
      54  	if ((reject && !dfaacc[ds].dfaacc_set) || (!reject && !dfaacc[ds].dfaacc_state)) {	/* state is non-accepting */
      55  		++num_backing_up;
      56  
      57  		if (backing_up_report) {
      58  			fprintf (backing_up_file,
      59  				 _("State #%d is non-accepting -\n"), ds);
      60  
      61  			/* identify the state */
      62  			dump_associated_rules (backing_up_file, ds);
      63  
      64  			/* Now identify it further using the out- and
      65  			 * jam-transitions.
      66  			 */
      67  			dump_transitions (backing_up_file, state);
      68  
      69  			putc ('\n', backing_up_file);
      70  		}
      71  	}
      72  }
      73  
      74  
      75  /* check_trailing_context - check to see if NFA state set constitutes
      76   *                          "dangerous" trailing context
      77   *
      78   * synopsis
      79   *    void check_trailing_context( int nfa_states[num_states+1], int num_states,
      80   *				int accset[nacc+1], int nacc );
      81   *
      82   * NOTES
      83   *  Trailing context is "dangerous" if both the head and the trailing
      84   *  part are of variable size \and/ there's a DFA state which contains
      85   *  both an accepting state for the head part of the rule and NFA states
      86   *  which occur after the beginning of the trailing context.
      87   *
      88   *  When such a rule is matched, it's impossible to tell if having been
      89   *  in the DFA state indicates the beginning of the trailing context or
      90   *  further-along scanning of the pattern.  In these cases, a warning
      91   *  message is issued.
      92   *
      93   *    nfa_states[1 .. num_states] is the list of NFA states in the DFA.
      94   *    accset[1 .. nacc] is the list of accepting numbers for the DFA state.
      95   */
      96  
      97  void check_trailing_context (int *nfa_states, int num_states, int *accset, int nacc)
      98  {
      99  	int i, j;
     100  
     101  	for (i = 1; i <= num_states; ++i) {
     102  		int     ns = nfa_states[i];
     103  		int type = state_type[ns];
     104  		int ar = assoc_rule[ns];
     105  
     106  		if (type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE) {	/* do nothing */
     107  		}
     108  
     109  		else if (type == STATE_TRAILING_CONTEXT) {
     110  			/* Potential trouble.  Scan set of accepting numbers
     111  			 * for the one marking the end of the "head".  We
     112  			 * assume that this looping will be fairly cheap
     113  			 * since it's rare that an accepting number set
     114  			 * is large.
     115  			 */
     116  			for (j = 1; j <= nacc; ++j)
     117  				if (accset[j] & YY_TRAILING_HEAD_MASK) {
     118  					line_warning (_
     119  						      ("dangerous trailing context"),
     120  						      rule_linenum[ar]);
     121  					return;
     122  				}
     123  		}
     124  	}
     125  }
     126  
     127  
     128  /* dump_associated_rules - list the rules associated with a DFA state
     129   *
     130   * Goes through the set of NFA states associated with the DFA and
     131   * extracts the first MAX_ASSOC_RULES unique rules, sorts them,
     132   * and writes a report to the given file.
     133   */
     134  
     135  void dump_associated_rules (FILE *file, int ds)
     136  {
     137  	int i, j;
     138  	int num_associated_rules = 0;
     139  	int rule_set[MAX_ASSOC_RULES + 1];
     140  	int *dset = dss[ds];
     141  	int size = dfasiz[ds];
     142  
     143  	for (i = 1; i <= size; ++i) {
     144  		int rule_num = rule_linenum[assoc_rule[dset[i]]];
     145  
     146  		for (j = 1; j <= num_associated_rules; ++j)
     147  			if (rule_num == rule_set[j])
     148  				break;
     149  
     150  		if (j > num_associated_rules) {	/* new rule */
     151  			if (num_associated_rules < MAX_ASSOC_RULES)
     152  				rule_set[++num_associated_rules] =
     153  					rule_num;
     154  		}
     155  	}
     156  
     157  	qsort (&rule_set [1], (size_t) num_associated_rules, sizeof (rule_set [1]), intcmp);
     158  
     159  	fprintf (file, _(" associated rule line numbers:"));
     160  
     161  	for (i = 1; i <= num_associated_rules; ++i) {
     162  		if (i % 8 == 1)
     163  			putc ('\n', file);
     164  
     165  		fprintf (file, "\t%d", rule_set[i]);
     166  	}
     167  
     168  	putc ('\n', file);
     169  }
     170  
     171  
     172  /* dump_transitions - list the transitions associated with a DFA state
     173   *
     174   * synopsis
     175   *     dump_transitions( FILE *file, int state[numecs] );
     176   *
     177   * Goes through the set of out-transitions and lists them in human-readable
     178   * form (i.e., not as equivalence classes); also lists jam transitions
     179   * (i.e., all those which are not out-transitions, plus EOF).  The dump
     180   * is done to the given file.
     181   */
     182  
     183  void dump_transitions (FILE *file, int state[])
     184  {
     185  	int i, ec;
     186  	int out_char_set[CSIZE];
     187  
     188  	for (i = 0; i < csize; ++i) {
     189  		ec = ABS (ecgroup[i]);
     190  		out_char_set[i] = state[ec];
     191  	}
     192  
     193  	fprintf (file, _(" out-transitions: "));
     194  
     195  	list_character_set (file, out_char_set);
     196  
     197  	/* now invert the members of the set to get the jam transitions */
     198  	for (i = 0; i < csize; ++i)
     199  		out_char_set[i] = !out_char_set[i];
     200  
     201  	fprintf (file, _("\n jam-transitions: EOF "));
     202  
     203  	list_character_set (file, out_char_set);
     204  
     205  	putc ('\n', file);
     206  }
     207  
     208  
     209  /* epsclosure - construct the epsilon closure of a set of ndfa states
     210   *
     211   * synopsis
     212   *    int *epsclosure( int t[num_states], int *numstates_addr,
     213   *			int accset[num_rules+1], int *nacc_addr,
     214   *			int *hashval_addr );
     215   *
     216   * NOTES
     217   *  The epsilon closure is the set of all states reachable by an arbitrary
     218   *  number of epsilon transitions, which themselves do not have epsilon
     219   *  transitions going out, unioned with the set of states which have non-null
     220   *  accepting numbers.  t is an array of size numstates of nfa state numbers.
     221   *  Upon return, t holds the epsilon closure and *numstates_addr is updated.
     222   *  accset holds a list of the accepting numbers, and the size of accset is
     223   *  given by *nacc_addr.  t may be subjected to reallocation if it is not
     224   *  large enough to hold the epsilon closure.
     225   *
     226   *  hashval is the hash value for the dfa corresponding to the state set.
     227   */
     228  
     229  int    *epsclosure (int *t, int *ns_addr, int accset[], int *nacc_addr, int *hv_addr)
     230  {
     231  	int     stkpos, ns, tsp;
     232  	int     numstates = *ns_addr, nacc, hashval, transsym, nfaccnum;
     233  	int     stkend, nstate;
     234  	static int did_stk_init = false, *stk;
     235  
     236  #define MARK_STATE(state) \
     237  do{ trans1[state] = trans1[state] - MARKER_DIFFERENCE;} while(0)
     238  
     239  #define IS_MARKED(state) (trans1[state] < 0)
     240  
     241  #define UNMARK_STATE(state) \
     242  do{ trans1[state] = trans1[state] + MARKER_DIFFERENCE;} while(0)
     243  
     244  #define CHECK_ACCEPT(state) \
     245  do{ \
     246  nfaccnum = accptnum[state]; \
     247  if ( nfaccnum != NIL ) \
     248  accset[++nacc] = nfaccnum; \
     249  }while(0)
     250  
     251  #define DO_REALLOCATION() \
     252  do { \
     253  current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \
     254  ++num_reallocs; \
     255  t = reallocate_integer_array( t, current_max_dfa_size ); \
     256  stk = reallocate_integer_array( stk, current_max_dfa_size ); \
     257  }while(0) \
     258  
     259  #define PUT_ON_STACK(state) \
     260  do { \
     261  if ( ++stkend >= current_max_dfa_size ) \
     262  DO_REALLOCATION(); \
     263  stk[stkend] = state; \
     264  MARK_STATE(state); \
     265  }while(0)
     266  
     267  #define ADD_STATE(state) \
     268  do { \
     269  if ( ++numstates >= current_max_dfa_size ) \
     270  DO_REALLOCATION(); \
     271  t[numstates] = state; \
     272  hashval += state; \
     273  }while(0)
     274  
     275  #define STACK_STATE(state) \
     276  do { \
     277  PUT_ON_STACK(state); \
     278  CHECK_ACCEPT(state); \
     279  if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \
     280  ADD_STATE(state); \
     281  }while(0)
     282  
     283  
     284  	if (!did_stk_init) {
     285  		stk = allocate_integer_array (current_max_dfa_size);
     286  		did_stk_init = true;
     287  	}
     288  
     289  	nacc = stkend = hashval = 0;
     290  
     291  	for (nstate = 1; nstate <= numstates; ++nstate) {
     292  		ns = t[nstate];
     293  
     294  		/* The state could be marked if we've already pushed it onto
     295  		 * the stack.
     296  		 */
     297  		if (!IS_MARKED (ns)) {
     298  			PUT_ON_STACK (ns);
     299  			CHECK_ACCEPT (ns);
     300  			hashval += ns;
     301  		}
     302  	}
     303  
     304  	for (stkpos = 1; stkpos <= stkend; ++stkpos) {
     305  		ns = stk[stkpos];
     306  		transsym = transchar[ns];
     307  
     308  		if (transsym == SYM_EPSILON) {
     309  			tsp = trans1[ns] + MARKER_DIFFERENCE;
     310  
     311  			if (tsp != NO_TRANSITION) {
     312  				if (!IS_MARKED (tsp))
     313  					STACK_STATE (tsp);
     314  
     315  				tsp = trans2[ns];
     316  
     317  				if (tsp != NO_TRANSITION
     318  				    && !IS_MARKED (tsp))
     319  					STACK_STATE (tsp);
     320  			}
     321  		}
     322  	}
     323  
     324  	/* Clear out "visit" markers. */
     325  
     326  	for (stkpos = 1; stkpos <= stkend; ++stkpos) {
     327  		if (IS_MARKED (stk[stkpos]))
     328  			UNMARK_STATE (stk[stkpos]);
     329  		else
     330  			flexfatal (_
     331  				   ("consistency check failed in epsclosure()"));
     332  	}
     333  
     334  	*ns_addr = numstates;
     335  	*hv_addr = hashval;
     336  	*nacc_addr = nacc;
     337  
     338  	return t;
     339  }
     340  
     341  
     342  /* increase_max_dfas - increase the maximum number of DFAs */
     343  
     344  void increase_max_dfas (void)
     345  {
     346  	current_max_dfas += MAX_DFAS_INCREMENT;
     347  
     348  	++num_reallocs;
     349  
     350  	base = reallocate_integer_array (base, current_max_dfas);
     351  	def = reallocate_integer_array (def, current_max_dfas);
     352  	dfasiz = reallocate_integer_array (dfasiz, current_max_dfas);
     353  	accsiz = reallocate_integer_array (accsiz, current_max_dfas);
     354  	dhash = reallocate_integer_array (dhash, current_max_dfas);
     355  	dss = reallocate_int_ptr_array (dss, current_max_dfas);
     356  	dfaacc = reallocate_dfaacc_union (dfaacc, current_max_dfas);
     357  
     358  	if (nultrans)
     359  		nultrans =
     360  			reallocate_integer_array (nultrans,
     361  						  current_max_dfas);
     362  }
     363  
     364  
     365  /* ntod - convert an ndfa to a dfa
     366   *
     367   * Creates the dfa corresponding to the ndfa we've constructed.  The
     368   * dfa starts out in state #1.
     369   */
     370  
     371  void ntod (void)
     372  {
     373  	int    *accset, ds, nacc, newds;
     374  	int     sym, hashval, numstates, dsize;
     375  	int     num_full_table_rows=0;	/* used only for -f */
     376  	int    *nset, *dset;
     377  	int     targptr, totaltrans, i, comstate, comfreq, targ;
     378  	int     symlist[CSIZE + 1];
     379  	int     num_start_states;
     380  	int     todo_head, todo_next;
     381  
     382  	struct yytbl_data *yynxt_tbl = 0;
     383  	flex_int32_t *yynxt_data = 0, yynxt_curr = 0;
     384  
     385  	/* Note that the following are indexed by *equivalence classes*
     386  	 * and not by characters.  Since equivalence classes are indexed
     387  	 * beginning with 1, even if the scanner accepts NUL's, this
     388  	 * means that (since every character is potentially in its own
     389  	 * equivalence class) these arrays must have room for indices
     390  	 * from 1 to CSIZE, so their size must be CSIZE + 1.
     391  	 */
     392  	int     duplist[CSIZE + 1], state[CSIZE + 1];
     393  	int     targfreq[CSIZE + 1] = {0}, targstate[CSIZE + 1];
     394  
     395  	/* accset needs to be large enough to hold all of the rules present
     396  	 * in the input, *plus* their YY_TRAILING_HEAD_MASK variants.
     397  	 */
     398  	accset = allocate_integer_array ((num_rules + 1) * 2);
     399  	nset = allocate_integer_array (current_max_dfa_size);
     400  
     401  	/* The "todo" queue is represented by the head, which is the DFA
     402  	 * state currently being processed, and the "next", which is the
     403  	 * next DFA state number available (not in use).  We depend on the
     404  	 * fact that snstods() returns DFA's \in increasing order/, and thus
     405  	 * need only know the bounds of the dfas to be processed.
     406  	 */
     407  	todo_head = todo_next = 0;
     408  
     409  	for (i = 0; i <= csize; ++i) {
     410  		duplist[i] = NIL;
     411  		symlist[i] = false;
     412  	}
     413  
     414  	for (i = 0; i <= num_rules; ++i)
     415  		accset[i] = NIL;
     416  
     417  	if (trace) {
     418  		dumpnfa (scset[1]);
     419  		fputs (_("\n\nDFA Dump:\n\n"), stderr);
     420  	}
     421  
     422  	inittbl ();
     423  
     424  	/* Check to see whether we should build a separate table for
     425  	 * transitions on NUL characters.  We don't do this for full-speed
     426  	 * (-F) scanners, since for them we don't have a simple state
     427  	 * number lying around with which to index the table.  We also
     428  	 * don't bother doing it for scanners unless (1) NUL is in its own
     429  	 * equivalence class (indicated by a positive value of
     430  	 * ecgroup[NUL]), (2) NUL's equivalence class is the last
     431  	 * equivalence class, and (3) the number of equivalence classes is
     432  	 * the same as the number of characters.  This latter case comes
     433  	 * about when useecs is false or when it's true but every character
     434  	 * still manages to land in its own class (unlikely, but it's
     435  	 * cheap to check for).  If all these things are true then the
     436  	 * character code needed to represent NUL's equivalence class for
     437  	 * indexing the tables is going to take one more bit than the
     438  	 * number of characters, and therefore we won't be assured of
     439  	 * being able to fit it into a YY_CHAR variable.  This rules out
     440  	 * storing the transitions in a compressed table, since the code
     441  	 * for interpreting them uses a YY_CHAR variable (perhaps it
     442  	 * should just use an integer, though; this is worth pondering ...
     443  	 * ###).
     444  	 *
     445  	 * Finally, for full tables, we want the number of entries in the
     446  	 * table to be a power of two so the array references go fast (it
     447  	 * will just take a shift to compute the major index).  If
     448  	 * encoding NUL's transitions in the table will spoil this, we
     449  	 * give it its own table (note that this will be the case if we're
     450  	 * not using equivalence classes).
     451  	 */
     452  
     453  	/* Note that the test for ecgroup[0] == numecs below accomplishes
     454  	 * both (1) and (2) above
     455  	 */
     456  	if (!fullspd && ecgroup[0] == numecs) {
     457  		/* NUL is alone in its equivalence class, which is the
     458  		 * last one.
     459  		 */
     460  		int     use_NUL_table = (numecs == csize);
     461  
     462  		if (fulltbl && !use_NUL_table) {
     463  			/* We still may want to use the table if numecs
     464  			 * is a power of 2.
     465  			 */
     466  			if (numecs <= csize && is_power_of_2(numecs)) {
     467  				use_NUL_table = true;
     468  			}
     469  		}
     470  
     471  		if (use_NUL_table)
     472  			nultrans =
     473  				allocate_integer_array (current_max_dfas);
     474  
     475  		/* From now on, nultrans != nil indicates that we're
     476  		 * saving null transitions for later, separate encoding.
     477  		 */
     478  	}
     479  
     480  
     481  	if (fullspd) {
     482  		for (i = 0; i <= numecs; ++i)
     483  			state[i] = 0;
     484  
     485  		place_state (state, 0, 0);
     486  		dfaacc[0].dfaacc_state = 0;
     487  	}
     488  
     489  	else if (fulltbl) {
     490  		if (nultrans)
     491  			/* We won't be including NUL's transitions in the
     492  			 * table, so build it for entries from 0 .. numecs - 1.
     493  			 */
     494  			num_full_table_rows = numecs;
     495  
     496  		else
     497  			/* Take into account the fact that we'll be including
     498  			 * the NUL entries in the transition table.  Build it
     499  			 * from 0 .. numecs.
     500  			 */
     501  			num_full_table_rows = numecs + 1;
     502  
     503  		/* Begin generating yy_nxt[][]
     504  		 * This spans the entire LONG function.
     505  		 * This table is tricky because we don't know how big it will be.
     506  		 * So we'll have to realloc() on the way...
     507  		 * we'll wait until we can calculate yynxt_tbl->td_hilen.
     508  		 */
     509  		yynxt_tbl = calloc(1, sizeof (struct yytbl_data));
     510       
     511  		yytbl_data_init (yynxt_tbl, YYTD_ID_NXT);
     512  		yynxt_tbl->td_hilen = 1;
     513  		yynxt_tbl->td_lolen = (flex_uint32_t) num_full_table_rows;
     514  		yynxt_tbl->td_data = yynxt_data =
     515  			calloc(yynxt_tbl->td_lolen *
     516  					    yynxt_tbl->td_hilen,
     517  					    sizeof (flex_int32_t));
     518  		yynxt_curr = 0;
     519  
     520  		buf_prints (&yydmap_buf,
     521  			    "\t{YYTD_ID_NXT, (void**)&yy_nxt, sizeof(%s)},\n",
     522  			    long_align ? "flex_int32_t" : "flex_int16_t");
     523  
     524  		/* Unless -Ca, declare it "short" because it's a real
     525  		 * long-shot that that won't be large enough.
     526  		 */
     527  		if (gentables)
     528  			out_str_dec
     529  				("static const %s yy_nxt[][%d] =\n    {\n",
     530  				 long_align ? "flex_int32_t" : "flex_int16_t",
     531  				 num_full_table_rows);
     532  		else {
     533  			out_dec ("#undef YY_NXT_LOLEN\n#define YY_NXT_LOLEN (%d)\n", num_full_table_rows);
     534  			out_str ("static const %s *yy_nxt =0;\n",
     535  				 long_align ? "flex_int32_t" : "flex_int16_t");
     536  		}
     537  
     538  
     539  		if (gentables)
     540  			outn ("    {");
     541  
     542  		/* Generate 0 entries for state #0. */
     543  		for (i = 0; i < num_full_table_rows; ++i) {
     544  			mk2data (0);
     545  			yynxt_data[yynxt_curr++] = 0;
     546  		}
     547  
     548  		dataflush ();
     549  		if (gentables)
     550  			outn ("    },\n");
     551  	}
     552  
     553  	/* Create the first states. */
     554  
     555  	num_start_states = lastsc * 2;
     556  
     557  	for (i = 1; i <= num_start_states; ++i) {
     558  		numstates = 1;
     559  
     560  		/* For each start condition, make one state for the case when
     561  		 * we're at the beginning of the line (the '^' operator) and
     562  		 * one for the case when we're not.
     563  		 */
     564  		if (i % 2 == 1)
     565  			nset[numstates] = scset[(i / 2) + 1];
     566  		else
     567  			nset[numstates] =
     568  				mkbranch (scbol[i / 2], scset[i / 2]);
     569  
     570  		nset = epsclosure (nset, &numstates, accset, &nacc,
     571  				   &hashval);
     572  
     573  		if (snstods (nset, numstates, accset, nacc, hashval, &ds)) {
     574  			numas += nacc;
     575  			totnst += numstates;
     576  			++todo_next;
     577  
     578  			if (variable_trailing_context_rules && nacc > 0)
     579  				check_trailing_context (nset, numstates,
     580  							accset, nacc);
     581  		}
     582  	}
     583  
     584  	if (!fullspd) {
     585  		if (!snstods (nset, 0, accset, 0, 0, &end_of_buffer_state))
     586  			flexfatal (_
     587  				   ("could not create unique end-of-buffer state"));
     588  
     589  		++numas;
     590  		++num_start_states;
     591  		++todo_next;
     592  	}
     593  
     594  
     595  	while (todo_head < todo_next) {
     596  		targptr = 0;
     597  		totaltrans = 0;
     598  
     599  		for (i = 1; i <= numecs; ++i)
     600  			state[i] = 0;
     601  
     602  		ds = ++todo_head;
     603  
     604  		dset = dss[ds];
     605  		dsize = dfasiz[ds];
     606  
     607  		if (trace)
     608  			fprintf (stderr, _("state # %d:\n"), ds);
     609  
     610  		sympartition (dset, dsize, symlist, duplist);
     611  
     612  		for (sym = 1; sym <= numecs; ++sym) {
     613  			if (symlist[sym]) {
     614  				symlist[sym] = 0;
     615  
     616  				if (duplist[sym] == NIL) {
     617  					/* Symbol has unique out-transitions. */
     618  					numstates =
     619  						symfollowset (dset, dsize,
     620  							      sym, nset);
     621  					nset = epsclosure (nset,
     622  							   &numstates,
     623  							   accset, &nacc,
     624  							   &hashval);
     625  
     626  					if (snstods
     627  					    (nset, numstates, accset, nacc,
     628  					     hashval, &newds)) {
     629  						totnst = totnst +
     630  							numstates;
     631  						++todo_next;
     632  						numas += nacc;
     633  
     634  						if (variable_trailing_context_rules && nacc > 0)
     635  							check_trailing_context
     636  								(nset,
     637  								 numstates,
     638  								 accset,
     639  								 nacc);
     640  					}
     641  
     642  					state[sym] = newds;
     643  
     644  					if (trace)
     645  						fprintf (stderr,
     646  							 "\t%d\t%d\n", sym,
     647  							 newds);
     648  
     649  					targfreq[++targptr] = 1;
     650  					targstate[targptr] = newds;
     651  					++numuniq;
     652  				}
     653  
     654  				else {
     655  					/* sym's equivalence class has the same
     656  					 * transitions as duplist(sym)'s
     657  					 * equivalence class.
     658  					 */
     659  					targ = state[duplist[sym]];
     660  					state[sym] = targ;
     661  
     662  					if (trace)
     663  						fprintf (stderr,
     664  							 "\t%d\t%d\n", sym,
     665  							 targ);
     666  
     667  					/* Update frequency count for
     668  					 * destination state.
     669  					 */
     670  
     671  					i = 0;
     672  					while (targstate[++i] != targ) ;
     673  
     674  					++targfreq[i];
     675  					++numdup;
     676  				}
     677  
     678  				++totaltrans;
     679  				duplist[sym] = NIL;
     680  			}
     681  		}
     682  
     683  
     684  		numsnpairs += totaltrans;
     685  
     686  		if (ds > num_start_states)
     687  			check_for_backing_up (ds, state);
     688  
     689  		if (nultrans) {
     690  			nultrans[ds] = state[NUL_ec];
     691  			state[NUL_ec] = 0;	/* remove transition */
     692  		}
     693  
     694  		if (fulltbl) {
     695  
     696  			/* Each time we hit here, it's another td_hilen, so we realloc. */
     697  			yynxt_tbl->td_hilen++;
     698  			yynxt_tbl->td_data = yynxt_data =
     699  				realloc (yynxt_data,
     700  						     yynxt_tbl->td_hilen *
     701  						     yynxt_tbl->td_lolen *
     702  						     sizeof (flex_int32_t));
     703  
     704  
     705  			if (gentables)
     706  				outn ("    {");
     707  
     708  			/* Supply array's 0-element. */
     709  			if (ds == end_of_buffer_state) {
     710  				mk2data (-end_of_buffer_state);
     711  				yynxt_data[yynxt_curr++] =
     712  					-end_of_buffer_state;
     713  			}
     714  			else {
     715  				mk2data (end_of_buffer_state);
     716  				yynxt_data[yynxt_curr++] =
     717  					end_of_buffer_state;
     718  			}
     719  
     720  			for (i = 1; i < num_full_table_rows; ++i) {
     721  				/* Jams are marked by negative of state
     722  				 * number.
     723  				 */
     724  				mk2data (state[i] ? state[i] : -ds);
     725  				yynxt_data[yynxt_curr++] =
     726  					state[i] ? state[i] : -ds;
     727  			}
     728  
     729  			dataflush ();
     730  			if (gentables)
     731  				outn ("    },\n");
     732  		}
     733  
     734  		else if (fullspd)
     735  			place_state (state, ds, totaltrans);
     736  
     737  		else if (ds == end_of_buffer_state)
     738  			/* Special case this state to make sure it does what
     739  			 * it's supposed to, i.e., jam on end-of-buffer.
     740  			 */
     741  			stack1 (ds, 0, 0, JAMSTATE);
     742  
     743  		else {		/* normal, compressed state */
     744  
     745  			/* Determine which destination state is the most
     746  			 * common, and how many transitions to it there are.
     747  			 */
     748  
     749  			comfreq = 0;
     750  			comstate = 0;
     751  
     752  			for (i = 1; i <= targptr; ++i)
     753  				if (targfreq[i] > comfreq) {
     754  					comfreq = targfreq[i];
     755  					comstate = targstate[i];
     756  				}
     757  
     758  			bldtbl (state, ds, totaltrans, comstate, comfreq);
     759  		}
     760  	}
     761  
     762  	if (fulltbl) {
     763  		dataend ();
     764  		if (tablesext) {
     765  			yytbl_data_compress (yynxt_tbl);
     766  			if (yytbl_data_fwrite (&tableswr, yynxt_tbl) < 0)
     767  				flexerror (_
     768  					   ("Could not write yynxt_tbl[][]"));
     769  		}
     770  		if (yynxt_tbl) {
     771  			yytbl_data_destroy (yynxt_tbl);
     772  			yynxt_tbl = 0;
     773  		}
     774  	}
     775  
     776  	else if (!fullspd) {
     777  		cmptmps ();	/* create compressed template entries */
     778  
     779  		/* Create tables for all the states with only one
     780  		 * out-transition.
     781  		 */
     782  		while (onesp > 0) {
     783  			mk1tbl (onestate[onesp], onesym[onesp],
     784  				onenext[onesp], onedef[onesp]);
     785  			--onesp;
     786  		}
     787  
     788  		mkdeftbl ();
     789  	}
     790  
     791  	free(accset);
     792  	free(nset);
     793  }
     794  
     795  
     796  /* snstods - converts a set of ndfa states into a dfa state
     797   *
     798   * synopsis
     799   *    is_new_state = snstods( int sns[numstates], int numstates,
     800   *				int accset[num_rules+1], int nacc,
     801   *				int hashval, int *newds_addr );
     802   *
     803   * On return, the dfa state number is in newds.
     804   */
     805  
     806  int snstods (int sns[], int numstates, int accset[], int nacc, int hashval, int *newds_addr)
     807  {
     808  	int didsort = 0;
     809  	int i, j;
     810  	int newds, *oldsns;
     811  
     812  	for (i = 1; i <= lastdfa; ++i)
     813  		if (hashval == dhash[i]) {
     814  			if (numstates == dfasiz[i]) {
     815  				oldsns = dss[i];
     816  
     817  				if (!didsort) {
     818  					/* We sort the states in sns so we
     819  					 * can compare it to oldsns quickly.
     820  					 */
     821  					qsort (&sns [1], (size_t) numstates, sizeof (sns [1]), intcmp);
     822  					didsort = 1;
     823  				}
     824  
     825  				for (j = 1; j <= numstates; ++j)
     826  					if (sns[j] != oldsns[j])
     827  						break;
     828  
     829  				if (j > numstates) {
     830  					++dfaeql;
     831  					*newds_addr = i;
     832  					return 0;
     833  				}
     834  
     835  				++hshcol;
     836  			}
     837  
     838  			else
     839  				++hshsave;
     840  		}
     841  
     842  	/* Make a new dfa. */
     843  
     844  	if (++lastdfa >= current_max_dfas)
     845  		increase_max_dfas ();
     846  
     847  	newds = lastdfa;
     848  
     849  	dss[newds] = allocate_integer_array (numstates + 1);
     850  
     851  	/* If we haven't already sorted the states in sns, we do so now,
     852  	 * so that future comparisons with it can be made quickly.
     853  	 */
     854  
     855  	if (!didsort)
     856            	qsort (&sns [1], (size_t) numstates, sizeof (sns [1]), intcmp);
     857  
     858  	for (i = 1; i <= numstates; ++i)
     859  		dss[newds][i] = sns[i];
     860  
     861  	dfasiz[newds] = numstates;
     862  	dhash[newds] = hashval;
     863  
     864  	if (nacc == 0) {
     865  		if (reject)
     866  			dfaacc[newds].dfaacc_set = NULL;
     867  		else
     868  			dfaacc[newds].dfaacc_state = 0;
     869  
     870  		accsiz[newds] = 0;
     871  	}
     872  
     873  	else if (reject) {
     874  		/* We sort the accepting set in increasing order so the
     875  		 * disambiguating rule that the first rule listed is considered
     876  		 * match in the event of ties will work.
     877  		 */
     878  
     879  		qsort (&accset [1], (size_t) nacc, sizeof (accset [1]), intcmp);
     880  
     881  		dfaacc[newds].dfaacc_set =
     882  			allocate_integer_array (nacc + 1);
     883  
     884  		/* Save the accepting set for later */
     885  		for (i = 1; i <= nacc; ++i) {
     886  			dfaacc[newds].dfaacc_set[i] = accset[i];
     887  
     888  			if (accset[i] <= num_rules)
     889  				/* Who knows, perhaps a REJECT can yield
     890  				 * this rule.
     891  				 */
     892  				rule_useful[accset[i]] = true;
     893  		}
     894  
     895  		accsiz[newds] = nacc;
     896  	}
     897  
     898  	else {
     899  		/* Find lowest numbered rule so the disambiguating rule
     900  		 * will work.
     901  		 */
     902  		j = num_rules + 1;
     903  
     904  		for (i = 1; i <= nacc; ++i)
     905  			if (accset[i] < j)
     906  				j = accset[i];
     907  
     908  		dfaacc[newds].dfaacc_state = j;
     909  
     910  		if (j <= num_rules)
     911  			rule_useful[j] = true;
     912  	}
     913  
     914  	*newds_addr = newds;
     915  
     916  	return 1;
     917  }
     918  
     919  
     920  /* symfollowset - follow the symbol transitions one step
     921   *
     922   * synopsis
     923   *    numstates = symfollowset( int ds[current_max_dfa_size], int dsize,
     924   *				int transsym, int nset[current_max_dfa_size] );
     925   */
     926  
     927  int symfollowset (int ds[], int dsize, int transsym, int nset[])
     928  {
     929  	int     ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist;
     930  
     931  	numstates = 0;
     932  
     933  	for (i = 1; i <= dsize; ++i) {	/* for each nfa state ns in the state set of ds */
     934  		ns = ds[i];
     935  		sym = transchar[ns];
     936  		tsp = trans1[ns];
     937  
     938  		if (sym < 0) {	/* it's a character class */
     939  			sym = -sym;
     940  			ccllist = cclmap[sym];
     941  			lenccl = ccllen[sym];
     942  
     943  			if (cclng[sym]) {
     944  				for (j = 0; j < lenccl; ++j) {
     945  					/* Loop through negated character
     946  					 * class.
     947  					 */
     948  					ch = ccltbl[ccllist + j];
     949  
     950  					if (ch == 0)
     951  						ch = NUL_ec;
     952  
     953  					if (ch > transsym)
     954  						/* Transsym isn't in negated
     955  						 * ccl.
     956  						 */
     957  						break;
     958  
     959  					else if (ch == transsym)
     960  						/* next 2 */
     961  						goto bottom;
     962  				}
     963  
     964  				/* Didn't find transsym in ccl. */
     965  				nset[++numstates] = tsp;
     966  			}
     967  
     968  			else
     969  				for (j = 0; j < lenccl; ++j) {
     970  					ch = ccltbl[ccllist + j];
     971  
     972  					if (ch == 0)
     973  						ch = NUL_ec;
     974  
     975  					if (ch > transsym)
     976  						break;
     977  					else if (ch == transsym) {
     978  						nset[++numstates] = tsp;
     979  						break;
     980  					}
     981  				}
     982  		}
     983  
     984  		else if (sym == SYM_EPSILON) {	/* do nothing */
     985  		}
     986  
     987  		else if (ABS (ecgroup[sym]) == transsym)
     988  			nset[++numstates] = tsp;
     989  
     990  	      bottom:;
     991  	}
     992  
     993  	return numstates;
     994  }
     995  
     996  
     997  /* sympartition - partition characters with same out-transitions
     998   *
     999   * synopsis
    1000   *    sympartition( int ds[current_max_dfa_size], int numstates,
    1001   *			int symlist[numecs], int duplist[numecs] );
    1002   */
    1003  
    1004  void sympartition (int ds[], int numstates, int symlist[], int duplist[])
    1005  {
    1006  	int     tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich;
    1007  
    1008  	/* Partitioning is done by creating equivalence classes for those
    1009  	 * characters which have out-transitions from the given state.  Thus
    1010  	 * we are really creating equivalence classes of equivalence classes.
    1011  	 */
    1012  
    1013  	for (i = 1; i <= numecs; ++i) {	/* initialize equivalence class list */
    1014  		duplist[i] = i - 1;
    1015  		dupfwd[i] = i + 1;
    1016  	}
    1017  
    1018  	duplist[1] = NIL;
    1019  	dupfwd[numecs] = NIL;
    1020  
    1021  	for (i = 1; i <= numstates; ++i) {
    1022  		ns = ds[i];
    1023  		tch = transchar[ns];
    1024  
    1025  		if (tch != SYM_EPSILON) {
    1026  			if (tch < -lastccl || tch >= csize) {
    1027  				flexfatal (_
    1028  					   ("bad transition character detected in sympartition()"));
    1029  			}
    1030  
    1031  			if (tch >= 0) {	/* character transition */
    1032  				int     ec = ecgroup[tch];
    1033  
    1034  				mkechar (ec, dupfwd, duplist);
    1035  				symlist[ec] = 1;
    1036  			}
    1037  
    1038  			else {	/* character class */
    1039  				tch = -tch;
    1040  
    1041  				lenccl = ccllen[tch];
    1042  				cclp = cclmap[tch];
    1043  				mkeccl (ccltbl + cclp, lenccl, dupfwd,
    1044  					duplist, numecs, NUL_ec);
    1045  
    1046  				if (cclng[tch]) {
    1047  					j = 0;
    1048  
    1049  					for (k = 0; k < lenccl; ++k) {
    1050  						ich = ccltbl[cclp + k];
    1051  
    1052  						if (ich == 0)
    1053  							ich = NUL_ec;
    1054  
    1055  						for (++j; j < ich; ++j)
    1056  							symlist[j] = 1;
    1057  					}
    1058  
    1059  					for (++j; j <= numecs; ++j)
    1060  						symlist[j] = 1;
    1061  				}
    1062  
    1063  				else
    1064  					for (k = 0; k < lenccl; ++k) {
    1065  						ich = ccltbl[cclp + k];
    1066  
    1067  						if (ich == 0)
    1068  							ich = NUL_ec;
    1069  
    1070  						symlist[ich] = 1;
    1071  					}
    1072  			}
    1073  		}
    1074  	}
    1075  }