(root)/
bison-3.8.2/
src/
lalr.c
       1  /* Compute lookahead criteria for Bison.
       2  
       3     Copyright (C) 1984, 1986, 1989, 2000-2015, 2018-2021 Free Software
       4     Foundation, Inc.
       5  
       6     This file is part of Bison, the GNU Compiler Compiler.
       7  
       8     This program is free software: you can redistribute it and/or modify
       9     it under the terms of the GNU General Public License as published by
      10     the Free Software Foundation, either version 3 of the License, or
      11     (at your option) any later version.
      12  
      13     This program is distributed in the hope that it will be useful,
      14     but WITHOUT ANY WARRANTY; without even the implied warranty of
      15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      16     GNU General Public License for more details.
      17  
      18     You should have received a copy of the GNU General Public License
      19     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      20  
      21  
      22  /* Find which rules need lookahead in each state, and which lookahead
      23     tokens they accept.  */
      24  
      25  #include <config.h>
      26  #include "system.h"
      27  
      28  #include <bitset.h>
      29  #include <bitsetv.h>
      30  
      31  #include "complain.h"
      32  #include "derives.h"
      33  #include "getargs.h"
      34  #include "gram.h"
      35  #include "lalr.h"
      36  #include "lr0.h"
      37  #include "muscle-tab.h"
      38  #include "nullable.h"
      39  #include "reader.h"
      40  #include "relation.h"
      41  #include "symtab.h"
      42  
      43  goto_number *goto_map = NULL;
      44  goto_number ngotos = 0;
      45  state_number *from_state = NULL;
      46  state_number *to_state = NULL;
      47  bitsetv goto_follows = NULL;
      48  
      49  /* Linked list of goto numbers.  */
      50  typedef struct goto_list
      51  {
      52    struct goto_list *next;
      53    goto_number value;
      54  } goto_list;
      55  
      56  static goto_list *
      57  goto_list_new (goto_number value, struct goto_list *next)
      58  {
      59    goto_list *res = xmalloc (sizeof *res);
      60    res->next = next;
      61    res->value = value;
      62    return res;
      63  }
      64  
      65  /* LA is an nLA by NTOKENS matrix of bits.  LA[l, i] is 1 if the rule
      66     LArule[l] is applicable in the appropriate state when the next
      67     token is symbol i.  If LA[l, i] and LA[l, j] are both 1 for i != j,
      68     it is a conflict.  */
      69  
      70  static bitsetv LA = NULL;
      71  size_t nLA;
      72  
      73  
      74  /* "(p, A) includes (p', B)" iff
      75     B → βAγ, γ nullable, and p'-- β --> p (i.e., state p' reaches p on label β).
      76  
      77     Definition p.621 [DeRemer 1982].
      78  
      79     INCLUDES[(p, A)] = [(p', B),...] */
      80  static goto_number **includes;
      81  
      82  /* "(q, A → ω) lookback (p, A)" iff state p reaches state q on label ω.
      83  
      84     Definition p.621 [DeRemer 1982]. */
      85  static goto_list **lookback;
      86  
      87  static void
      88  goto_print (goto_number i, FILE *out)
      89  {
      90    const state_number src = from_state[i];
      91    const state_number dst = to_state[i];
      92    symbol_number var = states[dst]->accessing_symbol;
      93    fprintf (out,
      94             "goto[%zu] = (%d, %s, %d)", i, src, symbols[var]->tag, dst);
      95  }
      96  
      97  void
      98  set_goto_map (void)
      99  {
     100    /* Count the number of gotos (ngotos) per nterm (goto_map). */
     101    if (trace_flag & trace_automaton)
     102      fprintf (stderr, "nnterms: %d\n", nnterms);
     103    goto_map = xcalloc (nnterms + 1, sizeof *goto_map);
     104    ngotos = 0;
     105    for (state_number s = 0; s < nstates; ++s)
     106      {
     107        transitions *trans = states[s]->transitions;
     108        for (int i = trans->num - 1; 0 <= i && TRANSITION_IS_GOTO (trans, i); --i)
     109          {
     110            ngotos++;
     111            /* Abort if (ngotos + 1) would overflow.  */
     112            aver (ngotos != GOTO_NUMBER_MAXIMUM);
     113            goto_map[TRANSITION_SYMBOL (trans, i) - ntokens]++;
     114          }
     115      }
     116  
     117    goto_number *temp_map = xnmalloc (nnterms + 1, sizeof *temp_map);
     118    {
     119      goto_number k = 0;
     120      for (symbol_number i = ntokens; i < nsyms; ++i)
     121        {
     122          temp_map[i - ntokens] = k;
     123          k += goto_map[i - ntokens];
     124        }
     125  
     126      for (symbol_number i = ntokens; i < nsyms; ++i)
     127        goto_map[i - ntokens] = temp_map[i - ntokens];
     128  
     129      goto_map[nsyms - ntokens] = ngotos;
     130      temp_map[nsyms - ntokens] = ngotos;
     131    }
     132  
     133    from_state = xcalloc (ngotos, sizeof *from_state);
     134    to_state = xcalloc (ngotos, sizeof *to_state);
     135  
     136    for (state_number s = 0; s < nstates; ++s)
     137      {
     138        const transitions *trans = states[s]->transitions;
     139        for (int i = trans->num - 1; 0 <= i && TRANSITION_IS_GOTO (trans, i); --i)
     140          {
     141            goto_number k = temp_map[TRANSITION_SYMBOL (trans, i) - ntokens]++;
     142            from_state[k] = s;
     143            to_state[k] = trans->states[i]->number;
     144          }
     145      }
     146  
     147    free (temp_map);
     148  
     149    if (trace_flag & trace_automaton)
     150      {
     151        for (int i = 0; i < nnterms; ++i)
     152          fprintf (stderr, "goto_map[%d (%s)] = %ld .. %ld\n",
     153                   i, symbols[ntokens + i]->tag,
     154                   goto_map[i], goto_map[i+1] - 1);
     155        for (int i = 0; i < ngotos; ++i)
     156          {
     157            goto_print (i, stderr);
     158            fputc ('\n', stderr);
     159          }
     160      }
     161  }
     162  
     163  
     164  goto_number
     165  map_goto (state_number src, symbol_number sym)
     166  {
     167    goto_number low = goto_map[sym - ntokens];
     168    assert (goto_map[sym - ntokens] != goto_map[sym - ntokens + 1]);
     169    goto_number high = goto_map[sym - ntokens + 1] - 1;
     170  
     171    for (;;)
     172      {
     173        aver (low <= high);
     174        goto_number middle = (low + high) / 2;
     175        state_number s = from_state[middle];
     176        if (s == src)
     177          return middle;
     178        else if (s < src)
     179          low = middle + 1;
     180        else
     181          high = middle - 1;
     182      }
     183  }
     184  
     185  /* Print FOLLOWS for debugging.  */
     186  static void
     187  follows_print (const char* title, FILE *out)
     188  {
     189    fprintf (out, "%s:\n", title);
     190    for (goto_number i = 0; i < ngotos; ++i)
     191      {
     192        fputs ("    FOLLOWS[", out);
     193        goto_print (i, out);
     194        fputs ("] =", out);
     195        bitset_iterator iter;
     196        symbol_number sym;
     197        BITSET_FOR_EACH (iter, goto_follows[i], sym, 0)
     198          fprintf (out, " %s", symbols[sym]->tag);
     199        fputc ('\n', out);
     200      }
     201    fputc ('\n', out);
     202  }
     203  
     204  /* Build goto_follows. */
     205  static void
     206  initialize_goto_follows (void)
     207  {
     208    goto_number **reads = xnmalloc (ngotos, sizeof *reads);
     209    goto_number *edge = xnmalloc (ngotos, sizeof *edge);
     210  
     211    goto_follows = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
     212  
     213    for (goto_number i = 0; i < ngotos; ++i)
     214      {
     215        state_number dst = to_state[i];
     216        const transitions *trans = states[dst]->transitions;
     217  
     218        int j;
     219        FOR_EACH_SHIFT (trans, j)
     220          bitset_set (goto_follows[i], TRANSITION_SYMBOL (trans, j));
     221  
     222        /* Gotos outgoing from DST. */
     223        goto_number nedges = 0;
     224        for (; j < trans->num; ++j)
     225          {
     226            symbol_number sym = TRANSITION_SYMBOL (trans, j);
     227            if (nullable[sym - ntokens])
     228              {
     229                assert (nedges < ngotos);
     230                edge[nedges++] = map_goto (dst, sym);
     231              }
     232          }
     233  
     234        if (nedges == 0)
     235          reads[i] = NULL;
     236        else
     237          {
     238            reads[i] = xnmalloc (nedges + 1, sizeof reads[i][0]);
     239            memcpy (reads[i], edge, nedges * sizeof edge[0]);
     240            reads[i][nedges] = END_NODE;
     241          }
     242      }
     243    if (trace_flag & trace_automaton)
     244      {
     245        follows_print ("follows after shifts", stderr);
     246        relation_print ("reads", reads, ngotos, goto_print, stderr);
     247      }
     248  
     249    relation_digraph (reads, ngotos, goto_follows);
     250    if (trace_flag & trace_automaton)
     251      follows_print ("follows after read", stderr);
     252  
     253    for (goto_number i = 0; i < ngotos; ++i)
     254      free (reads[i]);
     255    free (reads);
     256    free (edge);
     257  }
     258  
     259  
     260  /* Find the state which LOOKBACK[LOOKBACK_INDEX] is about.  */
     261  static const state *
     262  lookback_find_state (int lookback_index)
     263  {
     264    state *res = NULL;
     265    for (int j = 0; j < nstates; ++j)
     266      if (states[j]->reductions
     267          && states[j]->reductions->lookaheads)
     268        {
     269          if (states[j]->reductions->lookaheads - LA > lookback_index)
     270            /* Went too far. */
     271            break;
     272          else
     273            res = states[j];
     274        }
     275    /* Pacify "potential null pointer dereference" warning.  */
     276    if (!res)
     277      abort ();
     278    return res;
     279  }
     280  
     281  /* Print LOOKBACK for debugging.  */
     282  static void
     283  lookback_print (FILE *out)
     284  {
     285    fputs ("lookback:\n", out);
     286    for (int i = 0; i < nLA; ++i)
     287      if (lookback[i])
     288      {
     289        fprintf (out, "   %3d = ", i);
     290        const state *s = lookback_find_state (i);
     291        int rnum = i - (s->reductions->lookaheads - LA);
     292        const rule *r = s->reductions->rules[rnum];
     293        fprintf (out, "(%3d, ", s->number);
     294        rule_print (r, NULL, out);
     295        fputs (") ->", out);
     296        for (goto_list *sp = lookback[i]; sp; sp = sp->next)
     297          {
     298            fputc (' ', out);
     299            goto_print (sp->value, out);
     300          }
     301        fputc ('\n', out);
     302      }
     303    fputc ('\n', out);
     304  }
     305  
     306  /* Add (S, R) -> GOTONO to LOOKBACK.
     307  
     308     "(q, A → ω) lookback (p, A)" iff state p reaches state q on label ω.
     309  
     310     The goto number GOTONO, whose source is S (which is
     311     inconsistent), */
     312  static void
     313  add_lookback_edge (state *s, rule const *r, goto_number gotono)
     314  {
     315    int ri = state_reduction_find (s, r);
     316    int idx = (s->reductions->lookaheads - LA) + ri;
     317    lookback[idx] = goto_list_new (gotono, lookback[idx]);
     318  }
     319  
     320  
     321  /* Compute INCLUDES and LOOKBACK.  Corresponds to step E in Sec. 6 of
     322     [DeRemer 1982].  */
     323  static void
     324  build_relations (void)
     325  {
     326    goto_number *edge = xnmalloc (ngotos, sizeof *edge);
     327    state_number *path = xnmalloc (ritem_longest_rhs () + 1, sizeof *path);
     328  
     329    includes = xnmalloc (ngotos, sizeof *includes);
     330  
     331    /* For each goto (from SRC to DST labeled by nterm VAR), iterate
     332       over each rule with VAR as LHS, and find the path PATH from SRC
     333       labeled with the RHS of the rule. */
     334    for (goto_number i = 0; i < ngotos; ++i)
     335      {
     336        const state_number src = from_state[i];
     337        const state_number dst = to_state[i];
     338        symbol_number var = states[dst]->accessing_symbol;
     339  
     340        /* Size of EDGE.  */
     341        int nedges = 0;
     342        for (rule **rulep = derives[var - ntokens]; *rulep; ++rulep)
     343          {
     344            rule const *r = *rulep;
     345            state *s = states[src];
     346            path[0] = s->number;
     347  
     348            /* Length of PATH.  */
     349            int length = 1;
     350            for (item_number const *rp = r->rhs; 0 <= *rp; rp++)
     351              {
     352                symbol_number sym = item_number_as_symbol_number (*rp);
     353                s = transitions_to (s, sym);
     354                path[length++] = s->number;
     355              }
     356  
     357            /* S is the end of PATH.  */
     358            if (!s->consistent)
     359              add_lookback_edge (s, r, i);
     360  
     361            /* Walk back PATH from penultimate to beginning.
     362  
     363               The "0 <= p" part is actually useless: each rhs ends in a
     364               rule number (for which ISVAR(...) is false), and there is
     365               a sentinel (ritem[-1]=0) before the first rhs.  */
     366            for (int p = length - 2; 0 <= p && ISVAR (r->rhs[p]); --p)
     367              {
     368                symbol_number sym = item_number_as_symbol_number (r->rhs[p]);
     369                goto_number g = map_goto (path[p], sym);
     370                /* Insert G if not already in EDGE.
     371                   FIXME: linear search.  A bitset instead?  */
     372                {
     373                  bool found = false;
     374                  for (int j = 0; !found && j < nedges; ++j)
     375                    found = edge[j] == g;
     376                  if (!found)
     377                    {
     378                      assert (nedges < ngotos);
     379                      edge[nedges++] = g;
     380                    }
     381                }
     382                if (!nullable[sym - ntokens])
     383                  break;
     384              }
     385          }
     386  
     387        if (trace_flag & trace_automaton)
     388          {
     389            goto_print (i, stderr);
     390            fputs (" edges = ", stderr);
     391            for (int j = 0; j < nedges; ++j)
     392              {
     393                fputc (' ', stderr);
     394                goto_print (edge[j], stderr);
     395              }
     396            fputc ('\n', stderr);
     397          }
     398  
     399        if (nedges == 0)
     400          includes[i] = NULL;
     401        else
     402          {
     403            includes[i] = xnmalloc (nedges + 1, sizeof includes[i][0]);
     404            for (int j = 0; j < nedges; ++j)
     405              includes[i][j] = edge[j];
     406            includes[i][nedges] = END_NODE;
     407          }
     408      }
     409  
     410    free (edge);
     411    free (path);
     412  
     413    relation_transpose (&includes, ngotos);
     414    if (trace_flag & trace_automaton)
     415      relation_print ("includes", includes, ngotos, goto_print, stderr);
     416  }
     417  
     418  /* Compute FOLLOWS from INCLUDES, and free INCLUDES.  */
     419  static void
     420  compute_follows (void)
     421  {
     422    relation_digraph (includes, ngotos, goto_follows);
     423    if (trace_flag & trace_sets)
     424      follows_print ("follows after includes", stderr);
     425    for (goto_number i = 0; i < ngotos; ++i)
     426      free (includes[i]);
     427    free (includes);
     428  }
     429  
     430  
     431  static void
     432  compute_lookaheads (void)
     433  {
     434    if (trace_flag & trace_automaton)
     435        lookback_print (stderr);
     436  
     437    for (size_t i = 0; i < nLA; ++i)
     438      for (goto_list *sp = lookback[i]; sp; sp = sp->next)
     439        bitset_or (LA[i], LA[i], goto_follows[sp->value]);
     440  
     441    /* Free LOOKBACK. */
     442    for (size_t i = 0; i < nLA; ++i)
     443      LIST_FREE (goto_list, lookback[i]);
     444    free (lookback);
     445  }
     446  
     447  
     448  /*------------------------------------------------------.
     449  | Count the number of lookahead tokens required for S.  |
     450  `------------------------------------------------------*/
     451  
     452  static int
     453  state_lookaheads_count (state *s, bool default_reduction_only_for_accept)
     454  {
     455    const reductions *reds = s->reductions;
     456    const transitions *trans = s->transitions;
     457  
     458    /* Transitions are only disabled during conflict resolution, and that
     459       hasn't happened yet, so there should be no need to check that
     460       transition 0 hasn't been disabled before checking if it is a shift.
     461       However, this check was performed at one time, so we leave it as an
     462       aver.  */
     463    aver (trans->num == 0 || !TRANSITION_IS_DISABLED (trans, 0));
     464  
     465    /* We need a lookahead either to distinguish different reductions
     466       (i.e., there are two or more), or to distinguish a reduction from a
     467       shift.  Otherwise, it is straightforward, and the state is
     468       'consistent'.  However, do not treat a state with any reductions as
     469       consistent unless it is the accepting state (because there is never
     470       a lookahead token that makes sense there, and so no lookahead token
     471       should be read) if the user has otherwise disabled default
     472       reductions.  */
     473    s->consistent =
     474      !(reds->num > 1
     475        || (reds->num == 1 && trans->num && TRANSITION_IS_SHIFT (trans, 0))
     476        || (reds->num == 1 && !rule_is_initial (reds->rules[0])
     477            && default_reduction_only_for_accept));
     478  
     479    return s->consistent ? 0 : reds->num;
     480  }
     481  
     482  
     483  /*----------------------------------------------.
     484  | Compute LA, NLA, and the lookaheads members.  |
     485  `----------------------------------------------*/
     486  
     487  void
     488  initialize_LA (void)
     489  {
     490    bool default_reduction_only_for_accept;
     491    {
     492      char *default_reductions =
     493        muscle_percent_define_get ("lr.default-reduction");
     494      default_reduction_only_for_accept = STREQ (default_reductions, "accepting");
     495      free (default_reductions);
     496    }
     497  
     498    /* Compute the total number of reductions requiring a lookahead.  */
     499    nLA = 0;
     500    for (state_number i = 0; i < nstates; ++i)
     501      nLA += state_lookaheads_count (states[i],
     502                                     default_reduction_only_for_accept);
     503    /* Avoid having to special case 0.  */
     504    if (!nLA)
     505      nLA = 1;
     506  
     507    bitsetv pLA = LA = bitsetv_create (nLA, ntokens, BITSET_FIXED);
     508  
     509    /* Initialize the members LOOKAHEADS for each state whose reductions
     510       require lookahead tokens.  */
     511    for (state_number i = 0; i < nstates; ++i)
     512      {
     513        int count = state_lookaheads_count (states[i],
     514                                            default_reduction_only_for_accept);
     515        if (count)
     516          {
     517            states[i]->reductions->lookaheads = pLA;
     518            pLA += count;
     519          }
     520      }
     521  }
     522  
     523  
     524  /*---------------------------------------------.
     525  | Output the lookahead tokens for each state.  |
     526  `---------------------------------------------*/
     527  
     528  static void
     529  lookaheads_print (FILE *out)
     530  {
     531    fputs ("Lookaheads:\n", out);
     532    for (state_number i = 0; i < nstates; ++i)
     533      {
     534        const reductions *reds = states[i]->reductions;
     535        if (reds->num)
     536          {
     537            fprintf (out, "  State %d:\n", i);
     538            for (int j = 0; j < reds->num; ++j)
     539              {
     540                fprintf (out, "    rule %d:", reds->rules[j]->number);
     541                if (reds->lookaheads)
     542                {
     543                  bitset_iterator iter;
     544                  int k;
     545                  BITSET_FOR_EACH (iter, reds->lookaheads[j], k, 0)
     546                    fprintf (out, " %s", symbols[k]->tag);
     547                }
     548                fputc ('\n', out);
     549              }
     550          }
     551      }
     552    fputc ('\n', out);
     553  }
     554  
     555  void
     556  lalr (void)
     557  {
     558    if (trace_flag & trace_automaton)
     559      {
     560        fputc ('\n', stderr);
     561        begin_use_class ("trace0", stderr);
     562        fprintf (stderr, "lalr: begin");
     563        end_use_class ("trace0", stderr);
     564        fputc ('\n', stderr);
     565      }
     566    initialize_LA ();
     567    set_goto_map ();
     568    initialize_goto_follows ();
     569    lookback = xcalloc (nLA, sizeof *lookback);
     570    build_relations ();
     571    compute_follows ();
     572    compute_lookaheads ();
     573  
     574    if (trace_flag & trace_sets)
     575      lookaheads_print (stderr);
     576    if (trace_flag & trace_automaton)
     577      {
     578        begin_use_class ("trace0", stderr);
     579        fprintf (stderr, "lalr: done");
     580        end_use_class ("trace0", stderr);
     581        fputc ('\n', stderr);
     582      }
     583  }
     584  
     585  
     586  void
     587  lalr_update_state_numbers (state_number old_to_new[], state_number nstates_old)
     588  {
     589    goto_number ngotos_reachable = 0;
     590    symbol_number nonterminal = 0;
     591    aver (nsyms == nnterms + ntokens);
     592  
     593    for (goto_number i = 0; i < ngotos; ++i)
     594      {
     595        while (i == goto_map[nonterminal])
     596          goto_map[nonterminal++] = ngotos_reachable;
     597        /* If old_to_new[from_state[i]] = nstates_old, remove this goto
     598           entry.  */
     599        if (old_to_new[from_state[i]] != nstates_old)
     600          {
     601            /* from_state[i] is not removed, so it and thus to_state[i] are
     602               reachable, so to_state[i] != nstates_old.  */
     603            aver (old_to_new[to_state[i]] != nstates_old);
     604            from_state[ngotos_reachable] = old_to_new[from_state[i]];
     605            to_state[ngotos_reachable] = old_to_new[to_state[i]];
     606            ++ngotos_reachable;
     607          }
     608      }
     609    while (nonterminal <= nnterms)
     610      {
     611        aver (ngotos == goto_map[nonterminal]);
     612        goto_map[nonterminal++] = ngotos_reachable;
     613      }
     614    ngotos = ngotos_reachable;
     615  }
     616  
     617  
     618  void
     619  lalr_free (void)
     620  {
     621    for (state_number s = 0; s < nstates; ++s)
     622      states[s]->reductions->lookaheads = NULL;
     623    bitsetv_free (LA);
     624  }