(root)/
bison-3.8.2/
src/
lr0.c
       1  /* Generate the LR(0) parser states for Bison.
       2  
       3     Copyright (C) 1984, 1986, 1989, 2000-2002, 2004-2015, 2018-2021 Free
       4     Software Foundation, Inc.
       5  
       6     This file is part of Bison, the GNU Compiler Compiler.
       7  
       8     This program is free software: you can redistribute it and/or modify
       9     it under the terms of the GNU General Public License as published by
      10     the Free Software Foundation, either version 3 of the License, or
      11     (at your option) any later version.
      12  
      13     This program is distributed in the hope that it will be useful,
      14     but WITHOUT ANY WARRANTY; without even the implied warranty of
      15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      16     GNU General Public License for more details.
      17  
      18     You should have received a copy of the GNU General Public License
      19     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      20  
      21  
      22  /* See comments in state.h for the data structures that represent it.
      23     The entry point is generate_states.  */
      24  
      25  #include <config.h>
      26  #include "system.h"
      27  
      28  #include <bitset.h>
      29  
      30  #include "closure.h"
      31  #include "complain.h"
      32  #include "getargs.h"
      33  #include "gram.h"
      34  #include "lalr.h"
      35  #include "lr0.h"
      36  #include "reader.h"
      37  #include "reduce.h"
      38  #include "state.h"
      39  #include "symtab.h"
      40  
      41  typedef struct state_list
      42  {
      43    struct state_list *next;
      44    state *state;
      45  } state_list;
      46  
      47  static state_list *first_state = NULL;
      48  static state_list *last_state = NULL;
      49  
      50  /* Print CORE for debugging. */
      51  static void
      52  core_print (size_t core_size, item_index *core, FILE *out)
      53  {
      54    for (int i = 0; i < core_size; ++i)
      55      {
      56        item_print (ritem + core[i], NULL, out);
      57        fputc ('\n', out);
      58      }
      59  }
      60  
      61  /*-----------------------------------------------------------------.
      62  | A state was just discovered by transitioning on SYM from another |
      63  | state.  Queue this state for later examination, in order to find |
      64  | its outgoing transitions.  Return it.                            |
      65  `-----------------------------------------------------------------*/
      66  
      67  static state *
      68  state_list_append (symbol_number sym, size_t core_size, item_index *core)
      69  {
      70    state_list *node = xmalloc (sizeof *node);
      71    state *res = state_new (sym, core_size, core);
      72  
      73    if (trace_flag & trace_automaton)
      74      fprintf (stderr, "state_list_append (state = %d, symbol = %d (%s))\n",
      75               nstates, sym, symbols[sym]->tag);
      76  
      77    node->next = NULL;
      78    node->state = res;
      79  
      80    if (!first_state)
      81      first_state = node;
      82    if (last_state)
      83      last_state->next = node;
      84    last_state = node;
      85  
      86    return res;
      87  }
      88  
      89  /* Symbols that can be "shifted" (including nonterminals) from the
      90     current state.  */
      91  bitset shift_symbol;
      92  
      93  static rule **redset;
      94  /* For the current state, the list of pointers to states that can be
      95     reached via a shift/goto.  Could be indexed by the reaching symbol,
      96     but labels of incoming transitions can be recovered by the state
      97     itself.  */
      98  static state **shiftset;
      99  
     100  
     101  /* KERNEL_BASE[symbol-number] -> list of item indices (offsets inside
     102     RITEM) of length KERNEL_SIZE[symbol-number]. */
     103  static item_index **kernel_base;
     104  static int *kernel_size;
     105  
     106  /* A single dimension array that serves as storage for
     107     KERNEL_BASE.  */
     108  static item_index *kernel_items;
     109  
     110  
     111  static void
     112  allocate_itemsets (void)
     113  {
     114    /* Count the number of occurrences of all the symbols in RITEMS.
     115       Note that useless productions (hence useless nonterminals) are
     116       browsed too, hence we need to allocate room for _all_ the
     117       symbols.  */
     118    size_t count = 0;
     119    size_t *symbol_count = xcalloc (nsyms + nuseless_nonterminals,
     120                                    sizeof *symbol_count);
     121  
     122    for (rule_number r = 0; r < nrules; ++r)
     123      for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
     124        {
     125          symbol_number sym = item_number_as_symbol_number (*rhsp);
     126          count += 1;
     127          symbol_count[sym] += 1;
     128        }
     129  
     130    /* See comments before new_itemsets.  All the vectors of items
     131       live inside KERNEL_ITEMS.  The number of active items after
     132       some symbol S cannot be more than the number of times that S
     133       appears as an item, which is SYMBOL_COUNT[S].
     134       We allocate that much space for each symbol.  */
     135  
     136    kernel_base = xnmalloc (nsyms, sizeof *kernel_base);
     137    kernel_items = xnmalloc (count, sizeof *kernel_items);
     138  
     139    count = 0;
     140    for (symbol_number i = 0; i < nsyms; i++)
     141      {
     142        kernel_base[i] = kernel_items + count;
     143        count += symbol_count[i];
     144      }
     145  
     146    free (symbol_count);
     147    kernel_size = xnmalloc (nsyms, sizeof *kernel_size);
     148  }
     149  
     150  /* Print the current kernel (in KERNEL_BASE). */
     151  static void
     152  kernel_print (FILE *out)
     153  {
     154    for (symbol_number i = 0; i < nsyms; ++i)
     155      if (kernel_size[i])
     156        {
     157          fprintf (out, "kernel[%s] =\n", symbols[i]->tag);
     158          core_print (kernel_size[i], kernel_base[i], out);
     159        }
     160  }
     161  
     162  /* Make sure the kernel is in sane state. */
     163  static void
     164  kernel_check (void)
     165  {
     166    for (symbol_number i = 0; i < nsyms - 1; ++i)
     167      assert (kernel_base[i] + kernel_size[i] <= kernel_base[i + 1]);
     168  }
     169  
     170  static void
     171  allocate_storage (void)
     172  {
     173    allocate_itemsets ();
     174  
     175    shiftset = xnmalloc (nsyms, sizeof *shiftset);
     176    redset = xnmalloc (nrules, sizeof *redset);
     177    state_hash_new ();
     178    shift_symbol = bitset_create (nsyms, BITSET_FIXED);
     179  }
     180  
     181  
     182  static void
     183  free_storage (void)
     184  {
     185    bitset_free (shift_symbol);
     186    free (redset);
     187    free (shiftset);
     188    free (kernel_base);
     189    free (kernel_size);
     190    free (kernel_items);
     191    state_hash_free ();
     192  }
     193  
     194  
     195  
     196  
     197  /*------------------------------------------------------------------.
     198  | Find which term/nterm symbols can be "shifted" in S, and for each |
     199  | one record which items would be active after that transition.     |
     200  | Uses the contents of itemset.                                     |
     201  |                                                                   |
     202  | shift_symbol is a bitset of the term/nterm symbols that can be    |
     203  | shifted.  For each symbol in the grammar, kernel_base[symbol]     |
     204  | points to a vector of item numbers activated if that symbol is    |
     205  | shifted, and kernel_size[symbol] is their numbers.                |
     206  |                                                                   |
     207  | itemset is sorted on item index in ritem, which is sorted on rule |
     208  | number.  Compute each kernel_base[symbol] with the same sort.     |
     209  `------------------------------------------------------------------*/
     210  
     211  static void
     212  new_itemsets (state *s)
     213  {
     214    if (trace_flag & trace_automaton)
     215      fprintf (stderr, "new_itemsets: begin: state = %d\n", s->number);
     216  
     217    memset (kernel_size, 0, nsyms * sizeof *kernel_size);
     218  
     219    bitset_zero (shift_symbol);
     220  
     221    if (trace_flag & trace_automaton)
     222      {
     223        fprintf (stderr, "initial kernel:\n");
     224        kernel_print (stderr);
     225      }
     226  
     227    for (size_t i = 0; i < nitemset; ++i)
     228      if (item_number_is_symbol_number (ritem[itemset[i]]))
     229        {
     230          if (trace_flag & trace_automaton)
     231            {
     232              fputs ("working on: ", stderr);
     233              item_print (ritem + itemset[i], NULL, stderr);
     234              fputc ('\n', stderr);
     235            }
     236          symbol_number sym = item_number_as_symbol_number (ritem[itemset[i]]);
     237          bitset_set (shift_symbol, sym);
     238          kernel_base[sym][kernel_size[sym]] = itemset[i] + 1;
     239          kernel_size[sym]++;
     240        }
     241  
     242    if (trace_flag & trace_automaton)
     243      {
     244        fprintf (stderr, "final kernel:\n");
     245        kernel_print (stderr);
     246        fprintf (stderr, "new_itemsets: end: state = %d\n\n", s->number);
     247      }
     248    kernel_check ();
     249  }
     250  
     251  
     252  
     253  /*--------------------------------------------------------------.
     254  | Find the state we would get to (from the current state) by    |
     255  | shifting SYM.  Create a new state if no equivalent one exists |
     256  | already.  Used by append_states.                              |
     257  `--------------------------------------------------------------*/
     258  
     259  static state *
     260  get_state (symbol_number sym, size_t core_size, item_index *core)
     261  {
     262    if (trace_flag & trace_automaton)
     263      {
     264        fprintf (stderr, "Entering get_state, symbol = %d (%s), core:\n",
     265                 sym, symbols[sym]->tag);
     266        core_print (core_size, core, stderr);
     267        fputc ('\n', stderr);
     268      }
     269  
     270    state *s = state_hash_lookup (core_size, core);
     271    if (!s)
     272      s = state_list_append (sym, core_size, core);
     273  
     274    if (trace_flag & trace_automaton)
     275      fprintf (stderr, "Exiting get_state => %d\n", s->number);
     276  
     277    return s;
     278  }
     279  
     280  /*---------------------------------------------------------------.
     281  | Use the information computed by new_itemsets to find the state |
     282  | numbers reached by each shift transition from S.               |
     283  |                                                                |
     284  | SHIFTSET is set up as a vector of those states.                |
     285  `---------------------------------------------------------------*/
     286  
     287  static void
     288  append_states (state *s)
     289  {
     290    if (trace_flag & trace_automaton)
     291      fprintf (stderr, "append_states: begin: state = %d\n", s->number);
     292  
     293    bitset_iterator iter;
     294    symbol_number sym;
     295    int i = 0;
     296    BITSET_FOR_EACH (iter, shift_symbol, sym, 0)
     297      {
     298        shiftset[i] = get_state (sym, kernel_size[sym], kernel_base[sym]);
     299        ++i;
     300      }
     301  
     302    if (trace_flag & trace_automaton)
     303      fprintf (stderr, "append_states: end: state = %d\n", s->number);
     304  }
     305  
     306  
     307  /*----------------------------------------------------------------.
     308  | Find which rules can be used for reduction transitions from the |
     309  | current state and make a reductions structure for the state to  |
     310  | record their rule numbers.                                      |
     311  `----------------------------------------------------------------*/
     312  
     313  static void
     314  save_reductions (state *s)
     315  {
     316    int count = 0;
     317  
     318    /* Find and count the active items that represent ends of rules. */
     319    for (size_t i = 0; i < nitemset; ++i)
     320      {
     321        item_number item = ritem[itemset[i]];
     322        if (item_number_is_rule_number (item))
     323          {
     324            rule_number r = item_number_as_rule_number (item);
     325            redset[count++] = &rules[r];
     326            if (r == 0)
     327              {
     328                /* This is "reduce 0", i.e., accept. */
     329                aver (!final_state);
     330                final_state = s;
     331              }
     332          }
     333      }
     334  
     335    if (trace_flag & trace_automaton)
     336      {
     337        fprintf (stderr, "reduction[%d] = {\n", s->number);
     338        for (int i = 0; i < count; ++i)
     339          {
     340            rule_print (redset[i], NULL, stderr);
     341            fputc ('\n', stderr);
     342          }
     343        fputs ("}\n", stderr);
     344      }
     345  
     346    /* Make a reductions structure and copy the data into it.  */
     347    state_reductions_set (s, count, redset);
     348  }
     349  
     350  
     351  /*---------------.
     352  | Build STATES.  |
     353  `---------------*/
     354  
     355  static void
     356  set_states (void)
     357  {
     358    states = xcalloc (nstates, sizeof *states);
     359  
     360    while (first_state)
     361      {
     362        state_list *this = first_state;
     363  
     364        /* Pessimization, but simplification of the code: make sure all
     365           the states have valid transitions and reductions members,
     366           even if reduced to 0.  It is too soon for errs, which are
     367           computed later, but set_conflicts.  */
     368        state *s = this->state;
     369        if (!s->transitions)
     370          state_transitions_set (s, 0, 0);
     371        if (!s->reductions)
     372          state_reductions_set (s, 0, 0);
     373  
     374        states[s->number] = s;
     375  
     376        first_state = this->next;
     377        free (this);
     378      }
     379    first_state = NULL;
     380    last_state = NULL;
     381  }
     382  
     383  
     384  /*-------------------------------------------------------------------.
     385  | Compute the LR(0) parser states (see state.h for details) from the |
     386  | grammar.                                                           |
     387  `-------------------------------------------------------------------*/
     388  
     389  void
     390  generate_states (void)
     391  {
     392    allocate_storage ();
     393    closure_new (nritems);
     394  
     395    /* Create the initial state, whose accessing symbol (by convention)
     396       is 0, aka $end.  */
     397    {
     398      /* The items of its core: beginning of all the rules of $accept.  */
     399      kernel_size[0] = 0;
     400      for (rule_number r = 0; r < nrules && rules[r].lhs->symbol == acceptsymbol; ++r)
     401        kernel_base[0][kernel_size[0]++] = rules[r].rhs - ritem;
     402      state_list_append (0, kernel_size[0], kernel_base[0]);
     403    }
     404  
     405    /* States are queued when they are created; process them all.  */
     406    for (state_list *list = first_state; list; list = list->next)
     407      {
     408        state *s = list->state;
     409        if (trace_flag & trace_automaton)
     410          fprintf (stderr, "Processing state %d (reached by %s)\n",
     411                   s->number,
     412                   symbols[s->accessing_symbol]->tag);
     413        /* Set up itemset for the transitions out of this state.  itemset gets a
     414           vector of all the items that could be accepted next.  */
     415        closure (s->items, s->nitems);
     416        /* Record the reductions allowed out of this state.  */
     417        save_reductions (s);
     418        /* Find the itemsets of the states that shifts/gotos can reach.  */
     419        new_itemsets (s);
     420        /* Find or create the core structures for those states.  */
     421        append_states (s);
     422  
     423        /* Create the shifts structures for the shifts to those states,
     424           now that the state numbers transitioning to are known.  */
     425        state_transitions_set (s, bitset_count (shift_symbol), shiftset);
     426      }
     427  
     428    /* discard various storage */
     429    free_storage ();
     430  
     431    /* Set up STATES. */
     432    set_states ();
     433  }