(root)/
bison-3.8.2/
src/
reduce.c
       1  /* Grammar reduction for Bison.
       2  
       3     Copyright (C) 1988-1989, 2000-2003, 2005-2015, 2018-2021 Free
       4     Software Foundation, Inc.
       5  
       6     This file is part of Bison, the GNU Compiler Compiler.
       7  
       8     This program is free software: you can redistribute it and/or modify
       9     it under the terms of the GNU General Public License as published by
      10     the Free Software Foundation, either version 3 of the License, or
      11     (at your option) any later version.
      12  
      13     This program is distributed in the hope that it will be useful,
      14     but WITHOUT ANY WARRANTY; without even the implied warranty of
      15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      16     GNU General Public License for more details.
      17  
      18     You should have received a copy of the GNU General Public License
      19     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      20  
      21  
      22  /* Reduce the grammar: Find and eliminate unreachable terminals,
      23     nonterminals, and productions.  David S. Bakin.  */
      24  
      25  /* Don't eliminate unreachable terminals: They may be used by the
      26     user's parser.  */
      27  
      28  #include <config.h>
      29  #include "system.h"
      30  
      31  #include <bitset.h>
      32  
      33  #include "complain.h"
      34  #include "files.h"
      35  #include "getargs.h"
      36  #include "gram.h"
      37  #include "print-xml.h"
      38  #include "reader.h"
      39  #include "reduce.h"
      40  #include "symtab.h"
      41  
      42  /* Set of nonterminals whose language is not empty.  */
      43  static bitset N;
      44  
      45  /* Set of rules that have no useless nonterminals in their RHS.  */
      46  static bitset P;
      47  
      48  /* Set of accessible symbols.  */
      49  static bitset V;
      50  
      51  /* Set of symbols used to define rule precedence (so they are
      52     'useless', but no warning should be issued).  */
      53  static bitset V1;
      54  
      55  int nuseless_productions;
      56  int nuseless_nonterminals;
      57  
      58  #define bitset_swap(Lhs, Rhs)                   \
      59    do {                                          \
      60      bitset lhs__ = Lhs;                         \
      61      Lhs = Rhs;                                  \
      62      Rhs = lhs__;                                \
      63    } while (0)
      64  
      65  /*-------------------------------------------------------------------.
      66  | Another way to do this would be with a set for each production and |
      67  | then do subset tests against N0, but even for the C grammar the    |
      68  | whole reducing process takes only 2 seconds on my 8Mhz AT.         |
      69  `-------------------------------------------------------------------*/
      70  
      71  static bool
      72  useful_production (rule_number r, bitset N0)
      73  {
      74    /* A production is useful if all of the nonterminals in its appear
      75       in the set of useful nonterminals.  */
      76  
      77    for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
      78      if (ISVAR (*rhsp) && !bitset_test (N0, *rhsp - ntokens))
      79        return false;
      80    return true;
      81  }
      82  
      83  
      84  /*-----------------------------------------------------------------.
      85  | Compute N, the set of nonterminals whose language is not empty.  |
      86  |                                                                  |
      87  | Remember that rules are 1-origin, symbols are 0-origin.          |
      88  `-----------------------------------------------------------------*/
      89  
      90  static void
      91  useless_nonterminals (void)
      92  {
      93    /* N is set as built.  Np is set being built this iteration. P is
      94       set of all productions which have a RHS all in N.  */
      95  
      96    bitset Np = bitset_create (nnterms, BITSET_FIXED);
      97  
      98    /* The set being computed is a set of nonterminals which can derive
      99       the empty string or strings consisting of all terminals. At each
     100       iteration a nonterminal is added to the set if there is a
     101       production with that nonterminal as its LHS for which all the
     102       nonterminals in its RHS are already in the set.  Iterate until
     103       the set being computed remains unchanged.  Any nonterminals not
     104       in the set at that point are useless in that they will never be
     105       used in deriving a sentence of the language.
     106  
     107       This iteration doesn't use any special traversal over the
     108       productions.  A set is kept of all productions for which all the
     109       nonterminals in the RHS are in useful.  Only productions not in
     110       this set are scanned on each iteration.  At the end, this set is
     111       saved to be used when finding useful productions: only
     112       productions in this set will appear in the final grammar.  */
     113  
     114    while (1)
     115      {
     116        bitset_copy (Np, N);
     117        for (rule_number r = 0; r < nrules; ++r)
     118          if (!bitset_test (P, r)
     119              && useful_production (r, N))
     120            {
     121              bitset_set (Np, rules[r].lhs->number - ntokens);
     122              bitset_set (P, r);
     123            }
     124        if (bitset_equal_p (N, Np))
     125          break;
     126        bitset_swap (N, Np);
     127      }
     128    bitset_free (N);
     129    N = Np;
     130  }
     131  
     132  
     133  static void
     134  inaccessable_symbols (void)
     135  {
     136    /* Find out which productions are reachable and which symbols are
     137       used.  Starting with an empty set of productions and a set of
     138       symbols which only has the start symbol in it, iterate over all
     139       productions until the set of productions remains unchanged for an
     140       iteration.  For each production which has a LHS in the set of
     141       reachable symbols, add the production to the set of reachable
     142       productions, and add all of the nonterminals in the RHS of the
     143       production to the set of reachable symbols.
     144  
     145       Consider only the (partially) reduced grammar which has only
     146       nonterminals in N and productions in P.
     147  
     148       The result is the set P of productions in the reduced grammar,
     149       and the set V of symbols in the reduced grammar.
     150  
     151       Although this algorithm also computes the set of terminals which
     152       are reachable, no terminal will be deleted from the grammar. Some
     153       terminals might not be in the grammar but might be generated by
     154       semantic routines, and so the user might want them available with
     155       specified numbers.  (Is this true?)  However, the nonreachable
     156       terminals are printed (if running in verbose mode) so that the
     157       user can know.  */
     158  
     159    bitset Vp = bitset_create (nsyms, BITSET_FIXED);
     160    bitset Pp = bitset_create (nrules, BITSET_FIXED);
     161  
     162    /* If the start symbol isn't useful, then nothing will be useful. */
     163    if (bitset_test (N, acceptsymbol->content->number - ntokens))
     164      {
     165        bitset_set (V, acceptsymbol->content->number);
     166  
     167        while (1)
     168          {
     169            bitset_copy (Vp, V);
     170            for (rule_number r = 0; r < nrules; ++r)
     171              if (!bitset_test (Pp, r)
     172                  && bitset_test (P, r)
     173                  && bitset_test (V, rules[r].lhs->number))
     174                {
     175                  for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
     176                    if (ISTOKEN (*rhsp) || bitset_test (N, *rhsp - ntokens))
     177                      bitset_set (Vp, *rhsp);
     178                  bitset_set (Pp, r);
     179                }
     180            if (bitset_equal_p (V, Vp))
     181              break;
     182            bitset_swap (V, Vp);
     183          }
     184      }
     185  
     186    bitset_free (V);
     187    V = Vp;
     188  
     189    /* These tokens (numbered 0, 1, and 2) are internal to Bison.
     190       Consider them useful. */
     191    bitset_set (V, eoftoken->content->number);   /* end-of-input token */
     192    bitset_set (V, errtoken->content->number);   /* error token */
     193    bitset_set (V, undeftoken->content->number); /* some undefined token */
     194  
     195    bitset_free (P);
     196    P = Pp;
     197  
     198    int nuseful_productions = bitset_count (P);
     199    nuseless_productions = nrules - nuseful_productions;
     200  
     201    int nuseful_nonterminals = 0;
     202    for (symbol_number i = ntokens; i < nsyms; ++i)
     203      nuseful_nonterminals += bitset_test (V, i);
     204    nuseless_nonterminals = nnterms - nuseful_nonterminals;
     205  
     206    /* A token that was used in %prec should not be warned about.  */
     207    for (rule_number r = 0; r < nrules; ++r)
     208      if (rules[r].precsym != 0)
     209        bitset_set (V1, rules[r].precsym->number);
     210  }
     211  
     212  
     213  /*-------------------------------------------------------------------.
     214  | Put the useless productions at the end of RULES, and adjust NRULES |
     215  | accordingly.                                                       |
     216  `-------------------------------------------------------------------*/
     217  
     218  static void
     219  reduce_grammar_tables (void)
     220  {
     221    /* Report and flag useless productions.  */
     222    {
     223      for (rule_number r = 0; r < nrules; ++r)
     224        rules[r].useful = bitset_test (P, r);
     225      grammar_rules_useless_report (_("rule useless in grammar"));
     226    }
     227  
     228    /* Map the nonterminals to their new index: useful first, useless
     229       afterwards.  Kept for later report.  */
     230    {
     231      int useful = 0;
     232      int useless = nrules - nuseless_productions;
     233      rule *rules_sorted = xnmalloc (nrules, sizeof *rules_sorted);
     234      for (rule_number r = 0; r < nrules; ++r)
     235        rules_sorted[rules[r].useful ? useful++ : useless++] = rules[r];
     236      free (rules);
     237      rules = rules_sorted;
     238  
     239      /* Renumber the rules markers in RITEMS.  */
     240      for (rule_number r = 0; r < nrules; ++r)
     241        {
     242          item_number *rhsp = rules[r].rhs;
     243          for (/* Nothing. */; 0 <= *rhsp; ++rhsp)
     244            continue;
     245          *rhsp = rule_number_as_item_number (r);
     246          rules[r].number = r;
     247        }
     248      nrules -= nuseless_productions;
     249    }
     250  
     251    /* Adjust NRITEMS.  */
     252    for (rule_number r = nrules; r < nrules + nuseless_productions; ++r)
     253      nritems -= rule_rhs_length (&rules[r]) + 1;
     254  }
     255  
     256  
     257  /*------------------------------.
     258  | Remove useless nonterminals.  |
     259  `------------------------------*/
     260  
     261  symbol_number *nterm_map = NULL;
     262  
     263  static void
     264  nonterminals_reduce (void)
     265  {
     266    nterm_map = xnmalloc (nnterms, sizeof *nterm_map);
     267    /* Map the nonterminals to their new index: useful first, useless
     268       afterwards.  Kept for later report.  */
     269    {
     270      symbol_number n = ntokens;
     271      for (symbol_number i = ntokens; i < nsyms; ++i)
     272        if (bitset_test (V, i))
     273          nterm_map[i - ntokens] = n++;
     274      for (symbol_number i = ntokens; i < nsyms; ++i)
     275        if (!bitset_test (V, i))
     276          {
     277            nterm_map[i - ntokens] = n++;
     278            if (symbols[i]->content->status != used
     279                && symbols[i] != acceptsymbol)
     280              complain (&symbols[i]->location, Wother,
     281                        _("nonterminal useless in grammar: %s"),
     282                        symbols[i]->tag);
     283          }
     284    }
     285  
     286    /* Shuffle elements of tables indexed by symbol number.  */
     287    {
     288      symbol **symbols_sorted = xnmalloc (nnterms, sizeof *symbols_sorted);
     289      for (symbol_number i = ntokens; i < nsyms; ++i)
     290        symbols[i]->content->number = nterm_map[i - ntokens];
     291      for (symbol_number i = ntokens; i < nsyms; ++i)
     292        symbols_sorted[nterm_map[i - ntokens] - ntokens] = symbols[i];
     293      for (symbol_number i = ntokens; i < nsyms; ++i)
     294        symbols[i] = symbols_sorted[i - ntokens];
     295      free (symbols_sorted);
     296    }
     297  
     298    /* Update nonterminal numbers in the RHS of the rules.  LHS are
     299       pointers to the symbol structure, they don't need renumbering. */
     300    {
     301      for (rule_number r = 0; r < nrules; ++r)
     302        for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
     303          if (ISVAR (*rhsp))
     304            *rhsp = symbol_number_as_item_number (nterm_map[*rhsp - ntokens]);
     305      acceptsymbol->content->number = nterm_map[acceptsymbol->content->number - ntokens];
     306    }
     307  
     308    nsyms -= nuseless_nonterminals;
     309    nnterms -= nuseless_nonterminals;
     310  }
     311  
     312  
     313  /*------------------------------------------------------------------.
     314  | Output the detailed results of the reductions.  For FILE.output.  |
     315  `------------------------------------------------------------------*/
     316  
     317  void
     318  reduce_output (FILE *out)
     319  {
     320    if (nuseless_nonterminals)
     321      {
     322        fprintf (out, "%s\n\n", _("Nonterminals useless in grammar"));
     323        for (int i = 0; i < nuseless_nonterminals; ++i)
     324          fprintf (out, "    %s\n", symbols[nsyms + i]->tag);
     325        fputs ("\n\n", out);
     326      }
     327  
     328    {
     329      bool b = false;
     330      for (int i = 0; i < ntokens; ++i)
     331        if (reduce_token_unused_in_grammar (i))
     332          {
     333            if (!b)
     334              fprintf (out, "%s\n\n", _("Terminals unused in grammar"));
     335            b = true;
     336            fprintf (out, "    %s\n", symbols[i]->tag);
     337          }
     338      if (b)
     339        fputs ("\n\n", out);
     340    }
     341  
     342    if (nuseless_productions)
     343      grammar_rules_partial_print (out, _("Rules useless in grammar"),
     344                                   rule_useless_in_grammar_p);
     345  }
     346  
     347  
     348  /*-------------------------------.
     349  | Report the results to STDERR.  |
     350  `-------------------------------*/
     351  
     352  static void
     353  reduce_print (void)
     354  {
     355    if (nuseless_nonterminals)
     356      complain (NULL, Wother, ngettext ("%d nonterminal useless in grammar",
     357                                        "%d nonterminals useless in grammar",
     358                                        nuseless_nonterminals),
     359                nuseless_nonterminals);
     360    if (nuseless_productions)
     361      complain (NULL, Wother, ngettext ("%d rule useless in grammar",
     362                                        "%d rules useless in grammar",
     363                                        nuseless_productions),
     364                nuseless_productions);
     365  }
     366  
     367  void
     368  reduce_grammar (void)
     369  {
     370    /* Allocate the global sets used to compute the reduced grammar */
     371  
     372    N = bitset_create (nnterms, BITSET_FIXED);
     373    P =  bitset_create (nrules, BITSET_FIXED);
     374    V = bitset_create (nsyms, BITSET_FIXED);
     375    V1 = bitset_create (nsyms, BITSET_FIXED);
     376  
     377    useless_nonterminals ();
     378    inaccessable_symbols ();
     379  
     380    /* Did we reduce something? */
     381    if (nuseless_nonterminals || nuseless_productions)
     382      {
     383        reduce_print ();
     384  
     385        // Check that start symbols have non-empty languages.
     386        bool failure = false;
     387        for (symbol_list *list = start_symbols; list; list = list->next)
     388          if (!bitset_test (N, list->content.sym->content->number - ntokens))
     389            {
     390              failure = true;
     391              complain (&list->sym_loc, complaint,
     392                        _("start symbol %s does not derive any sentence"),
     393                        list->content.sym->tag);
     394            }
     395        if (failure)
     396          exit (EXIT_FAILURE);
     397  
     398        /* First reduce the nonterminals, as they renumber themselves in the
     399           whole grammar.  If you change the order, nonterms would be
     400           renumbered only in the reduced grammar.  */
     401        if (nuseless_nonterminals)
     402          nonterminals_reduce ();
     403        if (nuseless_productions)
     404          reduce_grammar_tables ();
     405      }
     406  
     407    if (trace_flag & trace_grammar)
     408      {
     409        grammar_dump (stderr, "Reduced Grammar");
     410  
     411        fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals"
     412                 ", and %d productions.\n",
     413                 grammar_file, ntokens, nnterms, nrules);
     414      }
     415  }
     416  
     417  bool
     418  reduce_token_unused_in_grammar (symbol_number i)
     419  {
     420    aver (i < ntokens);
     421    return !bitset_test (V, i) && !bitset_test (V1, i);
     422  }
     423  
     424  bool
     425  reduce_nonterminal_useless_in_grammar (const sym_content *sym)
     426  {
     427    symbol_number n = sym->number;
     428    aver (ntokens <= n && n < nsyms + nuseless_nonterminals);
     429    return nsyms <= n;
     430  }
     431  
     432  /*-----------------------------------------------------------.
     433  | Free the global sets used to compute the reduced grammar.  |
     434  `-----------------------------------------------------------*/
     435  
     436  void
     437  reduce_free (void)
     438  {
     439    bitset_free (N);
     440    bitset_free (V);
     441    bitset_free (V1);
     442    bitset_free (P);
     443    free (nterm_map);
     444    nterm_map = NULL;
     445  }