(root)/
bison-3.8.2/
src/
reader.c
       1  /* Input parser for Bison
       2  
       3     Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000-2003, 2005-2007,
       4     2009-2015, 2018-2021 Free Software Foundation, Inc.
       5  
       6     This file is part of Bison, the GNU Compiler Compiler.
       7  
       8     This program is free software: you can redistribute it and/or modify
       9     it under the terms of the GNU General Public License as published by
      10     the Free Software Foundation, either version 3 of the License, or
      11     (at your option) any later version.
      12  
      13     This program is distributed in the hope that it will be useful,
      14     but WITHOUT ANY WARRANTY; without even the implied warranty of
      15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      16     GNU General Public License for more details.
      17  
      18     You should have received a copy of the GNU General Public License
      19     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      20  
      21  #include <config.h>
      22  #include "system.h"
      23  
      24  #include <quote.h>
      25  #include <vasnprintf.h>
      26  
      27  #include "complain.h"
      28  #include "conflicts.h"
      29  #include "files.h"
      30  #include "fixits.h"
      31  #include "getargs.h"
      32  #include "gram.h"
      33  #include "muscle-tab.h"
      34  #include "reader.h"
      35  #include "symlist.h"
      36  #include "symtab.h"
      37  #include "scan-gram.h"
      38  #include "scan-code.h"
      39  
      40  static void prepare_percent_define_front_end_variables (void);
      41  static void check_and_convert_grammar (void);
      42  
      43  static symbol_list *grammar = NULL;
      44  symbol_list *start_symbols = NULL;
      45  merger_list *merge_functions = NULL;
      46  
      47  /* Was %union seen?  */
      48  bool union_seen = false;
      49  
      50  /* Should rules have a default precedence?  */
      51  bool default_prec = true;
      52  
      53  
      54  void
      55  grammar_start_symbols_add (symbol_list *syms)
      56  {
      57    /* Report and ignore duplicates.  Append the others to START_SYMBOLS.  */
      58    symbol_list *last = symbol_list_last (start_symbols);
      59    for (symbol_list *l = syms; l && l->content.sym; /* nothing */)
      60      {
      61        /* Is there a previous definition?  */
      62        symbol_list *first = symbol_list_find_symbol (start_symbols, l->content.sym);
      63        if (first)
      64          {
      65            duplicate_directive ("%start", first->sym_loc, l->sym_loc);
      66            symbol_list *dupl = l;
      67            l = l->next;
      68            dupl->next = NULL;
      69            symbol_list_free (dupl);
      70          }
      71        else
      72          {
      73            if (last)
      74              {
      75                last->next = l;
      76                last = l;
      77              }
      78            else
      79              {
      80                last = l;
      81                start_symbols = last;
      82              }
      83            symbol_list *next = l->next;
      84            l->next = NULL;
      85            l = next;
      86          }
      87      }
      88  }
      89  
      90  
      91  
      92  /*------------------------------------------------------------------------.
      93  | Return the merger index for a merging function named NAME.  Records the |
      94  | function, if new, in MERGER_LIST.                                       |
      95  `------------------------------------------------------------------------*/
      96  
      97  static int
      98  get_merge_function (uniqstr name)
      99  {
     100    if (! glr_parser)
     101      return 0;
     102  
     103    merger_list *syms;
     104    merger_list head;
     105    int n;
     106  
     107    head.next = merge_functions;
     108    for (syms = &head, n = 1; syms->next; syms = syms->next, n += 1)
     109      if (UNIQSTR_EQ (name, syms->next->name))
     110        break;
     111    if (syms->next == NULL)
     112      {
     113        syms->next = xmalloc (sizeof syms->next[0]);
     114        syms->next->name = uniqstr_new (name);
     115        /* After all symbol type declarations have been parsed, packgram invokes
     116           record_merge_function_type to set the type.  */
     117        syms->next->sym = NULL;
     118        syms->next->next = NULL;
     119        merge_functions = head.next;
     120      }
     121    return n;
     122  }
     123  
     124  /*-------------------------------------------------------------------.
     125  | For the existing merging function with index MERGER, record that   |
     126  | the result type is that of SYM, as required by the lhs (i.e., SYM) |
     127  | of the rule whose %merge declaration is at DECLARATION_LOC.        |
     128  `-------------------------------------------------------------------*/
     129  
     130  static void
     131  record_merge_function_type (int merger, symbol *sym, location declaration_loc)
     132  {
     133    if (merger <= 0)
     134      return;
     135  
     136    uniqstr type
     137      = sym->content->type_name ? sym->content->type_name : uniqstr_new ("");
     138  
     139    merger_list *merge_function;
     140    int merger_find = 1;
     141    for (merge_function = merge_functions;
     142         merge_function != NULL && merger_find != merger;
     143         merge_function = merge_function->next)
     144      merger_find += 1;
     145    aver (merge_function != NULL && merger_find == merger);
     146    if (merge_function->sym && merge_function->sym->content->type_name)
     147      {
     148        if (!UNIQSTR_EQ (merge_function->sym->content->type_name, type))
     149          {
     150            complain (&declaration_loc, complaint,
     151                      _("result type clash on merge function %s: "
     152                        "<%s> != <%s>"),
     153                      quote (merge_function->name), type,
     154                      merge_function->sym->content->type_name);
     155            subcomplain (&merge_function->type_declaration_loc, complaint,
     156                         _("previous declaration"));
     157          }
     158      }
     159    else
     160      {
     161        merge_function->sym = sym;
     162        merge_function->type_declaration_loc = declaration_loc;
     163      }
     164  }
     165  
     166  /*--------------------------------------.
     167  | Free all merge-function definitions.  |
     168  `--------------------------------------*/
     169  
     170  void
     171  free_merger_functions (void)
     172  {
     173    merger_list *L0 = merge_functions;
     174    while (L0)
     175      {
     176        merger_list *L1 = L0->next;
     177        free (L0);
     178        L0 = L1;
     179      }
     180  }
     181  
     182  
     183  /*-------------------------------------------------------------------.
     184  | Parse the input grammar into a one symbol_list structure.  Each    |
     185  | rule is represented by a sequence of symbols: the left hand side   |
     186  | followed by the contents of the right hand side, followed by a     |
     187  | null pointer instead of a symbol to terminate the rule.  The next  |
     188  | symbol is the lhs of the following rule.                           |
     189  |                                                                    |
     190  | All actions are copied out, labelled by the rule number they apply |
     191  | to.                                                                |
     192  `-------------------------------------------------------------------*/
     193  
     194  /* The (currently) last symbol of GRAMMAR. */
     195  static symbol_list *grammar_end = NULL;
     196  
     197  /* Append SYM to the grammar.  */
     198  static symbol_list *
     199  grammar_symbol_append (symbol *sym, location loc)
     200  {
     201    symbol_list *p = symbol_list_sym_new (sym, loc);
     202  
     203    if (grammar_end)
     204      grammar_end->next = p;
     205    else
     206      grammar = p;
     207  
     208    grammar_end = p;
     209  
     210    /* A null SYM stands for an end of rule; it is not an actual
     211       part of it.  */
     212    if (sym)
     213      ++nritems;
     214  
     215    return p;
     216  }
     217  
     218  static void
     219  assign_named_ref (symbol_list *p, named_ref *name)
     220  {
     221    symbol *sym = p->content.sym;
     222  
     223    if (name->id == sym->tag)
     224      {
     225        complain (&name->loc, Wother,
     226                  _("duplicated symbol name for %s ignored"),
     227                  quote (sym->tag));
     228        named_ref_free (name);
     229      }
     230    else
     231      p->named_ref = name;
     232  }
     233  
     234  
     235  /* The rule currently being defined, and the previous rule.
     236     CURRENT_RULE points to the first LHS of the current rule, while
     237     PREVIOUS_RULE_END points to the *end* of the previous rule (NULL).  */
     238  static symbol_list *current_rule = NULL;
     239  static symbol_list *previous_rule_end = NULL;
     240  
     241  
     242  /*----------------------------------------------.
     243  | Create a new rule for LHS in to the GRAMMAR.  |
     244  `----------------------------------------------*/
     245  
     246  void
     247  grammar_current_rule_begin (symbol *lhs, location loc,
     248                              named_ref *lhs_name)
     249  {
     250    /* Start a new rule and record its lhs.  */
     251    ++nrules;
     252    previous_rule_end = grammar_end;
     253  
     254    current_rule = grammar_symbol_append (lhs, loc);
     255    if (lhs_name)
     256      assign_named_ref (current_rule, named_ref_copy (lhs_name));
     257  
     258    /* Mark the rule's lhs as a nonterminal if not already so.  */
     259    if (lhs->content->class == unknown_sym || lhs->content->class == pct_type_sym)
     260      symbol_class_set (lhs, nterm_sym, empty_loc, false);
     261    else if (lhs->content->class == token_sym)
     262      complain (&loc, complaint, _("rule given for %s, which is a token"),
     263                lhs->tag);
     264  }
     265  
     266  
     267  /*----------------------------------------------------------------------.
     268  | A symbol should be used if either:                                    |
     269  |   1. It has a destructor.                                             |
     270  |   2. The symbol is a midrule symbol (i.e., the generated LHS          |
     271  |      replacing a midrule action) that was assigned to or used, as in  |
     272  |      "exp: { $$ = 1; } { $$ = $1; }".                                 |
     273  `----------------------------------------------------------------------*/
     274  
     275  static bool
     276  symbol_should_be_used (symbol_list const *s, bool *midrule_warning)
     277  {
     278    if (symbol_code_props_get (s->content.sym, destructor)->code)
     279      return true;
     280    if ((s->midrule && s->midrule->action_props.is_value_used)
     281        || (s->midrule_parent_rule
     282            && (symbol_list_n_get (s->midrule_parent_rule,
     283                                   s->midrule_parent_rhs_index)
     284                ->action_props.is_value_used)))
     285      {
     286        *midrule_warning = true;
     287        return true;
     288      }
     289    return false;
     290  }
     291  
     292  /*-----------------------------------------------------------------.
     293  | Check that the rule R is properly defined.  For instance, there  |
     294  | should be no type clash on the default action.  Possibly install |
     295  | the default action.                                              |
     296  `-----------------------------------------------------------------*/
     297  
     298  static void
     299  grammar_rule_check_and_complete (symbol_list *r)
     300  {
     301    const symbol *lhs = r->content.sym;
     302    const symbol *first_rhs = r->next->content.sym;
     303  
     304    /* Type check.
     305  
     306       If there is an action, then there is nothing we can do: the user
     307       is allowed to shoot herself in the foot.
     308  
     309       Don't worry about the default action if $$ is untyped, since $$'s
     310       value can't be used.  */
     311    if (!r->action_props.code && lhs->content->type_name)
     312      {
     313        /* If $$ is being set in default way, report if any type mismatch.  */
     314        if (first_rhs)
     315          {
     316            char const *lhs_type = lhs->content->type_name;
     317            char const *rhs_type =
     318              first_rhs->content->type_name ? first_rhs->content->type_name : "";
     319            if (!UNIQSTR_EQ (lhs_type, rhs_type))
     320              complain (&r->rhs_loc, Wother,
     321                        _("type clash on default action: <%s> != <%s>"),
     322                        lhs_type, rhs_type);
     323            else
     324              {
     325                /* Install the default action only for C++.  */
     326                const bool is_cxx =
     327                  STREQ (language->language, "c++")
     328                  || (skeleton && (STREQ (skeleton, "glr.cc")
     329                                   || STREQ (skeleton, "glr2.cc")
     330                                   || STREQ (skeleton, "lalr1.cc")));
     331                if (is_cxx)
     332                  {
     333                    code_props_rule_action_init (&r->action_props, "{ $$ = $1; }",
     334                                                 r->rhs_loc, r,
     335                                                 /* name */ NULL,
     336                                                 /* type */ NULL,
     337                                                 /* is_predicate */ false);
     338                    code_props_translate_code (&r->action_props);
     339                  }
     340              }
     341          }
     342        /* Warn if there is no default for $$ but we need one.  */
     343        else
     344          complain (&r->rhs_loc, Wother,
     345                    _("empty rule for typed nonterminal, and no action"));
     346      }
     347  
     348    /* For each start symbol, build the action of its start rule.  Use
     349       the same obstack as the one used by scan-code, which is in charge
     350       of actions. */
     351    const bool multistart = start_symbols && start_symbols->next;
     352    if (multistart && lhs == acceptsymbol)
     353      {
     354        const symbol *start = r->next->next->content.sym;
     355        if (start->content->type_name)
     356          obstack_printf (obstack_for_actions,
     357                          "{ ]b4_accept""([%s%d])[; }",
     358                          start->content->class == nterm_sym ? "orig " : "",
     359                          start->content->number);
     360        else
     361          obstack_printf (obstack_for_actions,
     362                          "{ ]b4_accept[; }");
     363        code_props_rule_action_init (&r->action_props,
     364                                     obstack_finish0 (obstack_for_actions),
     365                                     r->rhs_loc, r,
     366                                     /* name */ NULL,
     367                                     /* type */ NULL,
     368                                     /* is_predicate */ false);
     369      }
     370  
     371  
     372    /* Check that symbol values that should be used are in fact used.
     373       Don't check the generated start rules.  It has no action, so some
     374       rhs symbols may appear unused, but the parsing algorithm ensures
     375       that %destructor's are invoked appropriately.  */
     376    if (lhs != acceptsymbol)
     377      {
     378        int n = 0;
     379        for (symbol_list const *l = r; l && l->content.sym; l = l->next, ++n)
     380          {
     381            bool midrule_warning = false;
     382            if (!l->action_props.is_value_used
     383                && symbol_should_be_used (l, &midrule_warning)
     384                /* The default action, $$ = $1, 'uses' both.  */
     385                && (r->action_props.code || (n != 0 && n != 1)))
     386              {
     387                warnings warn_flag = midrule_warning ? Wmidrule_values : Wother;
     388                if (n)
     389                  complain (&l->sym_loc, warn_flag, _("unused value: $%d"), n);
     390                else
     391                  complain (&l->rhs_loc, warn_flag, _("unset value: $$"));
     392              }
     393          }
     394      }
     395  
     396    /* Check that %empty => empty rule.  */
     397    if (r->percent_empty_loc.start.file
     398        && r->next && r->next->content.sym)
     399      {
     400        complain (&r->percent_empty_loc, complaint,
     401                  _("%%empty on non-empty rule"));
     402        fixits_register (&r->percent_empty_loc, "");
     403      }
     404  
     405    /* Check that empty rule => %empty.  */
     406    if (!(r->next && r->next->content.sym)
     407        && !r->midrule_parent_rule
     408        && !r->percent_empty_loc.start.file
     409        && warning_is_enabled (Wempty_rule))
     410      {
     411        complain (&r->rhs_loc, Wempty_rule, _("empty rule without %%empty"));
     412        if (feature_flag & feature_caret)
     413          location_caret_suggestion (r->rhs_loc, "%empty", stderr);
     414        location loc = r->rhs_loc;
     415        loc.end = loc.start;
     416        fixits_register (&loc, " %empty ");
     417      }
     418  
     419    /* See comments in grammar_current_rule_prec_set for how POSIX
     420       mandates this complaint.  It's only for identifiers, so skip
     421       it for char literals and strings, which are always tokens.  */
     422    if (r->ruleprec
     423        && r->ruleprec->tag[0] != '\'' && r->ruleprec->tag[0] != '"'
     424        && r->ruleprec->content->status != declared
     425        && !r->ruleprec->content->prec)
     426      complain (&r->rhs_loc, Wother,
     427                _("token for %%prec is not defined: %s"), r->ruleprec->tag);
     428  
     429    /* Check that the (main) action was not typed.  */
     430    if (r->action_props.type)
     431      complain (&r->rhs_loc, Wother,
     432                _("only midrule actions can be typed: %s"), r->action_props.type);
     433  }
     434  
     435  
     436  /*-------------------------------------.
     437  | End the currently being grown rule.  |
     438  `-------------------------------------*/
     439  
     440  void
     441  grammar_current_rule_end (location loc)
     442  {
     443    /* Put an empty link in the list to mark the end of this rule  */
     444    grammar_symbol_append (NULL, grammar_end->rhs_loc);
     445    current_rule->rhs_loc = loc;
     446  }
     447  
     448  
     449  /*-------------------------------------------------------------------.
     450  | The previous action turns out to be a midrule action.  Attach it   |
     451  | to the current rule, i.e., create a dummy symbol, attach it this   |
     452  | midrule action, and append this dummy nonterminal to the current   |
     453  | rule.                                                              |
     454  `-------------------------------------------------------------------*/
     455  
     456  void
     457  grammar_midrule_action (void)
     458  {
     459    /* Since the action was written out with this rule's number, we must
     460       give the new rule this number by inserting the new rule before
     461       it.  */
     462  
     463    /* Make a DUMMY nonterminal, whose location is that of the midrule
     464       action.  Create the MIDRULE.  */
     465    location dummy_loc = current_rule->action_props.location;
     466    symbol *dummy = dummy_symbol_get (dummy_loc);
     467    symbol_type_set (dummy,
     468                     current_rule->action_props.type, current_rule->action_props.location);
     469    symbol_list *midrule = symbol_list_sym_new (dummy, dummy_loc);
     470  
     471    /* Remember named_ref of previous action. */
     472    named_ref *action_name = current_rule->action_props.named_ref;
     473  
     474    /* Make a new rule, whose body is empty, before the current one, so
     475       that the action just read can belong to it.  */
     476    ++nrules;
     477    ++nritems;
     478    /* Attach its location and actions to that of the DUMMY.  */
     479    midrule->rhs_loc = dummy_loc;
     480    code_props_rule_action_init (&midrule->action_props,
     481                                 current_rule->action_props.code,
     482                                 current_rule->action_props.location,
     483                                 midrule,
     484                                 /* name_ref */ NULL,
     485                                 /* type */ NULL,
     486                                 current_rule->action_props.is_predicate);
     487    code_props_none_init (&current_rule->action_props);
     488  
     489    midrule->expected_sr_conflicts = current_rule->expected_sr_conflicts;
     490    midrule->expected_rr_conflicts = current_rule->expected_rr_conflicts;
     491    current_rule->expected_sr_conflicts = -1;
     492    current_rule->expected_rr_conflicts = -1;
     493  
     494    if (previous_rule_end)
     495      previous_rule_end->next = midrule;
     496    else
     497      grammar = midrule;
     498  
     499    /* End the dummy's rule.  */
     500    midrule->next = symbol_list_sym_new (NULL, dummy_loc);
     501    midrule->next->next = current_rule;
     502  
     503    previous_rule_end = midrule->next;
     504  
     505    /* Insert the dummy nonterminal replacing the midrule action into
     506       the current rule.  Bind it to its dedicated rule.  */
     507    grammar_current_rule_symbol_append (dummy, dummy_loc,
     508                                        action_name);
     509    grammar_end->midrule = midrule;
     510    midrule->midrule_parent_rule = current_rule;
     511    midrule->midrule_parent_rhs_index = symbol_list_length (current_rule->next);
     512  }
     513  
     514  /* Set the precedence symbol of the current rule to PRECSYM. */
     515  
     516  void
     517  grammar_current_rule_prec_set (symbol *precsym, location loc)
     518  {
     519    /* POSIX says that any identifier is a nonterminal if it does not
     520       appear on the LHS of a grammar rule and is not defined by %token
     521       or by one of the directives that assigns precedence to a token.
     522       We ignore this here because the only kind of identifier that
     523       POSIX allows to follow a %prec is a token and because assuming
     524       it's a token now can produce more logical error messages.
     525       Nevertheless, grammar_rule_check_and_complete does obey what we
     526       believe is the real intent of POSIX here: that an error be
     527       reported for any identifier that appears after %prec but that is
     528       not defined separately as a token.  */
     529    symbol_class_set (precsym, token_sym, loc, false);
     530    if (current_rule->ruleprec)
     531      duplicate_rule_directive ("%prec",
     532                                current_rule->ruleprec->location, loc);
     533    else
     534      current_rule->ruleprec = precsym;
     535  }
     536  
     537  /* Set %empty for the current rule. */
     538  
     539  void
     540  grammar_current_rule_empty_set (location loc)
     541  {
     542    /* If %empty is used and -Wno-empty-rule is not, then enable
     543       -Wempty-rule.  */
     544    if (warning_is_unset (Wempty_rule))
     545      warning_argmatch ("empty-rule", 0, 0);
     546    if (current_rule->percent_empty_loc.start.file)
     547      duplicate_rule_directive ("%empty",
     548                                current_rule->percent_empty_loc, loc);
     549    else
     550      current_rule->percent_empty_loc = loc;
     551  }
     552  
     553  /* Attach dynamic precedence DPREC to the current rule. */
     554  
     555  void
     556  grammar_current_rule_dprec_set (int dprec, location loc)
     557  {
     558    if (! glr_parser)
     559      complain (&loc, Wother, _("%s affects only GLR parsers"),
     560                "%dprec");
     561    if (dprec <= 0)
     562      complain (&loc, complaint, _("%s must be followed by positive number"),
     563                "%dprec");
     564    else if (current_rule->dprec != 0)
     565      duplicate_rule_directive ("%dprec",
     566                                current_rule->dprec_loc, loc);
     567    else
     568      {
     569        current_rule->dprec = dprec;
     570        current_rule->dprec_loc = loc;
     571      }
     572  }
     573  
     574  /* Attach a merge function NAME with argument type TYPE to current
     575     rule. */
     576  
     577  void
     578  grammar_current_rule_merge_set (uniqstr name, location loc)
     579  {
     580    if (! glr_parser)
     581      complain (&loc, Wother, _("%s affects only GLR parsers"),
     582                "%merge");
     583    if (current_rule->merger != 0)
     584      duplicate_rule_directive ("%merge",
     585                                current_rule->merger_declaration_loc, loc);
     586    else
     587      {
     588        current_rule->merger = get_merge_function (name);
     589        current_rule->merger_declaration_loc = loc;
     590      }
     591  }
     592  
     593  /* Attach SYM to the current rule.  If needed, move the previous
     594     action as a midrule action.  */
     595  
     596  void
     597  grammar_current_rule_symbol_append (symbol *sym, location loc,
     598                                      named_ref *name)
     599  {
     600    if (current_rule->action_props.code)
     601      grammar_midrule_action ();
     602    symbol_list *p = grammar_symbol_append (sym, loc);
     603    if (name)
     604      assign_named_ref (p, name);
     605    if (sym->content->status == undeclared || sym->content->status == used)
     606      sym->content->status = needed;
     607  }
     608  
     609  void
     610  grammar_current_rule_action_append (const char *action, location loc,
     611                                      named_ref *name, uniqstr type)
     612  {
     613    if (current_rule->action_props.code)
     614      grammar_midrule_action ();
     615    if (type)
     616      complain (&loc, Wyacc,
     617                _("POSIX Yacc does not support typed midrule actions"));
     618    /* After all symbol declarations have been parsed, packgram invokes
     619       code_props_translate_code.  */
     620    code_props_rule_action_init (&current_rule->action_props, action, loc,
     621                                 current_rule,
     622                                 name, type,
     623                                 /* is_predicate */ false);
     624  }
     625  
     626  void
     627  grammar_current_rule_predicate_append (const char *pred, location loc)
     628  {
     629    if (current_rule->action_props.code)
     630      grammar_midrule_action ();
     631    code_props_rule_action_init (&current_rule->action_props, pred, loc,
     632                                 current_rule,
     633                                 NULL, NULL,
     634                                 /* is_predicate */ true);
     635  }
     636  
     637  /* Set the expected number of shift/reduce (reduce/reduce) conflicts
     638   * for the current rule.  If a midrule is encountered later, the count
     639   * is transferred to it and reset in the current rule to -1. */
     640  
     641  void
     642  grammar_current_rule_expect_sr (int count, location loc)
     643  {
     644    (void) loc;
     645    current_rule->expected_sr_conflicts = count;
     646  }
     647  
     648  void
     649  grammar_current_rule_expect_rr (int count, location loc)
     650  {
     651    if (! glr_parser)
     652      complain (&loc, Wother, _("%s affects only GLR parsers"),
     653                "%expect-rr");
     654    else
     655      current_rule->expected_rr_conflicts = count;
     656  }
     657  
     658  
     659  /*---------------------------------------------.
     660  | Build RULES and RITEM from what was parsed.  |
     661  `---------------------------------------------*/
     662  
     663  static void
     664  packgram (void)
     665  {
     666    int itemno = 0;
     667    ritem = xnmalloc (nritems + 1, sizeof *ritem);
     668    /* This sentinel is used by build_relations() in lalr.c.  */
     669    *ritem++ = 0;
     670  
     671    rule_number ruleno = 0;
     672    rules = xnmalloc (nrules, sizeof *rules);
     673  
     674    for (symbol_list *p = grammar; p; p = p->next)
     675      {
     676        symbol_list *lhs = p;
     677        record_merge_function_type (lhs->merger, lhs->content.sym,
     678                                    lhs->merger_declaration_loc);
     679        /* If the midrule's $$ is set or its $n is used, remove the '$' from the
     680           symbol name so that it's a user-defined symbol so that the default
     681           %destructor and %printer apply.  */
     682        if (lhs->midrule_parent_rule /* i.e., symbol_is_dummy (lhs->content.sym).  */
     683            && (lhs->action_props.is_value_used
     684                || (symbol_list_n_get (lhs->midrule_parent_rule,
     685                                       lhs->midrule_parent_rhs_index)
     686                    ->action_props.is_value_used)))
     687          lhs->content.sym->tag += 1;
     688  
     689        grammar_rule_check_and_complete (lhs);
     690  
     691        rules[ruleno].code = ruleno;
     692        rules[ruleno].number = ruleno;
     693        rules[ruleno].lhs = lhs->content.sym->content;
     694        rules[ruleno].rhs = ritem + itemno;
     695        rules[ruleno].prec = NULL;
     696        rules[ruleno].dprec = lhs->dprec;
     697        rules[ruleno].merger = lhs->merger;
     698        rules[ruleno].precsym = NULL;
     699        rules[ruleno].location = lhs->rhs_loc;
     700        rules[ruleno].useful = true;
     701        rules[ruleno].action = lhs->action_props.code;
     702        rules[ruleno].action_loc = lhs->action_props.location;
     703        rules[ruleno].is_predicate = lhs->action_props.is_predicate;
     704        rules[ruleno].expected_sr_conflicts = lhs->expected_sr_conflicts;
     705        rules[ruleno].expected_rr_conflicts = lhs->expected_rr_conflicts;
     706  
     707        /* Traverse the rhs.  */
     708        {
     709          size_t rule_length = 0;
     710          for (p = lhs->next; p->content.sym; p = p->next)
     711            {
     712              ++rule_length;
     713  
     714              /* Don't allow rule_length == INT_MAX, since that might
     715                 cause confusion with strtol if INT_MAX == LONG_MAX.  */
     716              if (rule_length == INT_MAX)
     717                complain (&rules[ruleno].location, fatal, _("rule is too long"));
     718  
     719              /* item_number = symbol_number.
     720                 But the former needs to contain more: negative rule numbers. */
     721              ritem[itemno++] =
     722                symbol_number_as_item_number (p->content.sym->content->number);
     723              /* A rule gets by default the precedence and associativity
     724                 of its last token.  */
     725              if (p->content.sym->content->class == token_sym && default_prec)
     726                rules[ruleno].prec = p->content.sym->content;
     727            }
     728        }
     729  
     730        /* If this rule has a %prec,
     731           the specified symbol's precedence replaces the default.  */
     732        if (lhs->ruleprec)
     733          {
     734            rules[ruleno].precsym = lhs->ruleprec->content;
     735            rules[ruleno].prec = lhs->ruleprec->content;
     736          }
     737  
     738        /* An item ends by the rule number (negated).  */
     739        ritem[itemno++] = rule_number_as_item_number (ruleno);
     740        aver (itemno < ITEM_NUMBER_MAX);
     741        ++ruleno;
     742        aver (ruleno < RULE_NUMBER_MAX);
     743      }
     744  
     745    aver (itemno == nritems);
     746  
     747    if (trace_flag & trace_sets)
     748      ritem_print (stderr);
     749  }
     750  
     751  
     752  /*--------------------------------------------------------------.
     753  | Read in the grammar specification and record it in the format |
     754  | described in gram.h.                                          |
     755  `--------------------------------------------------------------*/
     756  
     757  void
     758  reader (const char *gram)
     759  {
     760    /* Set up symbol_table, semantic_type_table, and the built-in
     761       symbols.  */
     762    symbols_new ();
     763  
     764    gram_scanner_open (gram);
     765    parser_init ();
     766    gram_parse ();
     767    gram_scanner_close ();
     768  
     769    prepare_percent_define_front_end_variables ();
     770  
     771    if (complaint_status  < status_complaint)
     772      check_and_convert_grammar ();
     773  }
     774  
     775  static void
     776  prepare_percent_define_front_end_variables (void)
     777  {
     778    /* Set %define front-end variable defaults.  */
     779    muscle_percent_define_default ("lr.keep-unreachable-state", "false");
     780    {
     781      /* IELR would be a better default, but LALR is historically the
     782         default.  */
     783      muscle_percent_define_default ("lr.type", "lalr");
     784      char *lr_type = muscle_percent_define_get ("lr.type");
     785      if (STRNEQ (lr_type, "canonical-lr"))
     786        muscle_percent_define_default ("lr.default-reduction", "most");
     787      else
     788        muscle_percent_define_default ("lr.default-reduction", "accepting");
     789      free (lr_type);
     790    }
     791    muscle_percent_define_default ("tool.xsltproc", "xsltproc");
     792  
     793    /* Check %define front-end variables.  */
     794    {
     795      static char const * const values[] =
     796        {
     797         "lr.type", "lr""(0)", "lalr", "ielr", "canonical-lr", NULL,
     798         "lr.default-reduction", "most", "consistent", "accepting", NULL,
     799         NULL
     800        };
     801      muscle_percent_define_check_values (values);
     802    }
     803  }
     804  
     805  /* Find the first LHS which is not a dummy.  */
     806  
     807  static symbol *
     808  find_start_symbol (void)
     809  {
     810    symbol_list *res = grammar;
     811    /* Skip all the possible dummy rules of the first rule.  */
     812    for (; symbol_is_dummy (res->content.sym); res = res->next)
     813      /* Skip the LHS, and then all the RHS of the dummy rule.  */
     814      for (res = res->next; res->content.sym; res = res->next)
     815        continue;
     816    return res->content.sym;
     817  }
     818  
     819  
     820  /* Insert an initial rule, whose location is that of the first rule
     821     (not that of the start symbol):
     822  
     823     $accept: SWITCHING_TOKEN START $end.  */
     824  static void
     825  create_start_rule (symbol *swtok, symbol *start)
     826  {
     827    symbol_list *initial_rule = symbol_list_sym_new (acceptsymbol, empty_loc);
     828    initial_rule->rhs_loc = grammar->rhs_loc;
     829    symbol_list *p = initial_rule;
     830    if (swtok)
     831      {
     832        // Cannot create the action now, as the symbols have not yet
     833        // been assigned their number (by symbol_pack), which we need to
     834        // know the type name.  So the action is created in
     835        // grammar_rule_check_and_complete, which is run after
     836        // symbol_pack.
     837        p->next = symbol_list_sym_new (swtok, empty_loc);
     838        p = p->next;
     839      }
     840    p->next = symbol_list_sym_new (start, empty_loc);
     841    p = p->next;
     842    p->next = symbol_list_sym_new (eoftoken, empty_loc);
     843    p = p->next;
     844    p->next = symbol_list_sym_new (NULL, empty_loc);
     845    p = p->next;
     846    p->next = grammar;
     847    nrules += 1;
     848    nritems += 3 + !!swtok;
     849    grammar = initial_rule;
     850  }
     851  
     852  /* Fetch (or create) a token "YY_PARSE_foo" for start symbol "foo".
     853  
     854     We don't use the simple "YY_FOO" because (i) we might get clashes
     855     with some of our symbols (e.g., cast => YY_CAST), and (ii) upcasing
     856     introduces possible clashes between terminal FOO and nonterminal
     857     foo.  */
     858  symbol *
     859  switching_token (const symbol *start)
     860  {
     861    char buf[100];
     862    size_t len = sizeof buf;
     863    char *name = asnprintf (buf, &len, "YY_PARSE_%s", symbol_id_get (start));
     864    if (!name)
     865      xalloc_die ();
     866    // Setting the location ensures deterministic symbol numbers.
     867    symbol *res = symbol_get (name, start->location);
     868    if (name != buf)
     869      free (name);
     870    symbol_class_set (res, token_sym, start->location, false);
     871    return res;
     872  }
     873  
     874  /* Create the start rules in reverse order, since they are inserted at
     875     the top of the grammar.  That way the rules follow the order of
     876     declaration to %start.  */
     877  
     878  static void
     879  create_multiple_start_rules (symbol_list *start_syms)
     880  {
     881    if (start_syms)
     882      {
     883        create_multiple_start_rules (start_syms->next);
     884        assert (start_syms->content_type == SYMLIST_SYMBOL);
     885        symbol *start = start_syms->content.sym;
     886        symbol *swtok = switching_token (start);
     887        create_start_rule (swtok, start);
     888      }
     889  }
     890  
     891  /* For each start symbol "foo", create the rule "$accept: YY_FOO
     892     foo $end". */
     893  static void
     894  create_start_rules (void)
     895  {
     896    if (!start_symbols)
     897      {
     898        symbol *start = find_start_symbol ();
     899        start_symbols = symbol_list_sym_new (start, start->location);
     900      }
     901  
     902    const bool several = start_symbols->next;
     903    if (several)
     904      create_multiple_start_rules (start_symbols);
     905    else
     906      {
     907        symbol *start = start_symbols->content.sym;
     908        create_start_rule (NULL, start);
     909      }
     910  }
     911  
     912  static void
     913  check_start_symbols (void)
     914  {
     915    const bool multistart = start_symbols && start_symbols->next;
     916    // Sanity checks on the start symbols.
     917    for (symbol_list *list = start_symbols; list; list = list->next)
     918      {
     919        const symbol *start = list->content.sym;
     920        if (start->content->class == unknown_sym)
     921          {
     922            complain (&start->location, complaint,
     923                      _("the start symbol %s is undefined"),
     924                      start->tag);
     925            // I claim this situation is unreachable.  This is caught
     926            // before, and we get "symbol 'foo' is used, but is not
     927            // defined as a token and has no rules".
     928            abort ();
     929          }
     930        // If your only start symbol is a token, you're weird.
     931        if (!multistart && start->content->class == token_sym)
     932          complain (&start->location, complaint,
     933                    _("the start symbol %s is a token"),
     934                    start->tag);
     935      }
     936    if (complaint_status == status_complaint)
     937      exit (EXIT_FAILURE);
     938  }
     939  
     940  
     941  /*-------------------------------------------------------------.
     942  | Check the grammar that has just been read, and convert it to |
     943  | internal form.                                               |
     944  `-------------------------------------------------------------*/
     945  
     946  static void
     947  check_and_convert_grammar (void)
     948  {
     949    /* Grammar has been read.  Do some checking.  */
     950    if (nrules == 0)
     951      complain (NULL, fatal, _("no rules in the input grammar"));
     952  
     953    /* If the user did not define her EOFTOKEN, do it now. */
     954    if (!eoftoken)
     955      {
     956        eoftoken = symbol_get ("YYEOF", empty_loc);
     957        eoftoken->content->class = token_sym;
     958        eoftoken->content->number = 0;
     959        /* Value specified by POSIX.  */
     960        eoftoken->content->code = 0;
     961        {
     962          symbol *alias = symbol_get ("$end", empty_loc);
     963          symbol_class_set (alias, token_sym, empty_loc, false);
     964          symbol_make_alias (eoftoken, alias, empty_loc);
     965        }
     966      }
     967  
     968    /* Insert the initial rule(s).  */
     969    create_start_rules ();
     970  
     971    /* Report any undefined symbols and consider them nonterminals.  */
     972    symbols_check_defined ();
     973  
     974    if (SYMBOL_NUMBER_MAXIMUM - nnterms < ntokens)
     975      complain (NULL, fatal, "too many symbols in input grammar (limit is %d)",
     976                SYMBOL_NUMBER_MAXIMUM);
     977  
     978    nsyms = ntokens + nnterms;
     979  
     980    /* Assign the symbols their symbol numbers.  */
     981    symbols_pack ();
     982  
     983    check_start_symbols ();
     984  
     985    /* Scan rule actions after invoking symbol_check_alias_consistency
     986       (in symbols_pack above) so that token types are set correctly
     987       before the rule action type checking.
     988  
     989       Before invoking grammar_rule_check_and_complete (in packgram
     990       below) on any rule, make sure all actions have already been
     991       scanned in order to set 'used' flags.  Otherwise, checking that a
     992       midrule's $$ should be set will not always work properly because
     993       the check must forward-reference the midrule's parent rule.  For
     994       the same reason, all the 'used' flags must be set before checking
     995       whether to remove '$' from any midrule symbol name (also in
     996       packgram).  */
     997    for (symbol_list *sym = grammar; sym; sym = sym->next)
     998      code_props_translate_code (&sym->action_props);
     999  
    1000    /* Convert the grammar into the format described in gram.h.  */
    1001    packgram ();
    1002  
    1003    /* The grammar as a symbol_list is no longer needed. */
    1004    symbol_list_free (grammar);
    1005  }