1  /* Data definitions for internal representation of Bison's input.
       2  
       3     Copyright (C) 1984, 1986, 1989, 1992, 2001-2007, 2009-2015, 2018-2021
       4     Free Software Foundation, Inc.
       5  
       6     This file is part of Bison, the GNU Compiler Compiler.
       7  
       8     This program is free software: you can redistribute it and/or modify
       9     it under the terms of the GNU General Public License as published by
      10     the Free Software Foundation, either version 3 of the License, or
      11     (at your option) any later version.
      12  
      13     This program is distributed in the hope that it will be useful,
      14     but WITHOUT ANY WARRANTY; without even the implied warranty of
      15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      16     GNU General Public License for more details.
      17  
      18     You should have received a copy of the GNU General Public License
      19     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      20  
      21  #ifndef GRAM_H_
      22  # define GRAM_H_
      23  
      24  /* Representation of the grammar rules:
      25  
      26     NTOKENS is the number of tokens, and NNTERMS is the number of
      27     nonterminals (aka variables).  NSYMS is the total number, NTOKENS +
      28     NNTERMS.
      29  
      30     Each symbol (either token or nterm) receives a symbol number.
      31     Numbers 0 to NTOKENS - 1 are for tokens, and NTOKENS to NSYMS - 1
      32     are for nterms.  Symbol number zero is the end-of-input token.
      33     This token is counted in ntokens.  The true number of token values
      34     assigned is NTOKENS reduced by one for each alias declaration.
      35  
      36     The rules receive rule numbers 1 to NRULES in the order they are
      37     written.  More precisely Bison augments the grammar with the
      38     initial rule, '$accept: START-SYMBOL $end', which is numbered 1,
      39     all the user rules are 2, 3 etc.  Each time a rule number is
      40     presented to the user, we subtract 1, so *displayed* rule numbers
      41     are 0, 1, 2...
      42  
      43     Internally, we cannot use the number 0 for a rule because for
      44     instance RITEM stores both symbols (the RHS) and rule numbers: the
      45     symbols are integers >= 0, and rule numbers are stored negative.
      46     Therefore 0 cannot be used, since it would be both the rule number
      47     0, and the token $end.
      48  
      49     Actions are accessed via the rule number.
      50  
      51     The rules themselves are described by several arrays: amongst which
      52     RITEM, and RULES.
      53  
      54     RULES is an array of rules, whose members are:
      55  
      56     RULES[R].lhs -- the symbol of the left hand side of rule R.
      57  
      58     RULES[R].rhs -- the beginning of the portion of RITEM for rule R.
      59  
      60     RULES[R].prec -- the symbol providing the precedence level of R.
      61  
      62     RULES[R].precsym -- the symbol attached (via %prec) to give its
      63     precedence to R.  Of course, if set, it is equal to 'prec', but we
      64     need to distinguish one from the other when reducing: a symbol used
      65     in a %prec is not useless.
      66  
      67     RULES[R].assoc -- the associativity of R.
      68  
      69     RULES[R].dprec -- the dynamic precedence level of R (for GLR
      70     parsing).
      71  
      72     RULES[R].merger -- index of merging function for R (for GLR
      73     parsing).
      74  
      75     RULES[R].line -- the line where R was defined.
      76  
      77     RULES[R].useful -- whether the rule is used.  False if thrown away
      78     by reduce().
      79  
      80     The right hand side of rules is stored as symbol numbers in a
      81     portion of RITEM.
      82  
      83     The length of the portion is one greater than the number of symbols
      84     in the rule's right hand side.  The last element in the portion
      85     contains -R, which identifies it as the end of a portion and says
      86     which rule it is for.
      87  
      88     The portions of RITEM come in order of increasing rule number.
      89     NRITEMS is the total length of RITEM.  Each element of RITEM is
      90     called an "item" of type item_number and its index in RITEM is an
      91     item_index.
      92  
      93     Item numbers are used in the finite state machine to represent
      94     places that parsing can get to.
      95  
      96     SYMBOLS[I]->prec records the precedence level of each symbol.
      97  
      98     Precedence levels are assigned in increasing order starting with 1
      99     so that numerically higher precedence values mean tighter binding
     100     as they ought to.  Zero as a symbol or rule's precedence means none
     101     is assigned.
     102  
     103     Associativities are recorded similarly in SYMBOLS[I]->assoc.  */
     104  
     105  # include "system.h"
     106  
     107  # include "location.h"
     108  # include "symtab.h"
     109  
     110  # define ISTOKEN(i)     ((i) < ntokens)
     111  # define ISVAR(i)       ((i) >= ntokens)
     112  
     113  extern int nsyms;
     114  extern int ntokens;
     115  extern int nnterms;
     116  
     117  /* Elements of ritem. */
     118  typedef int item_number;
     119  # define ITEM_NUMBER_MAX INT_MAX
     120  extern item_number *ritem;
     121  extern int nritems;
     122  
     123  /* Indices into ritem. */
     124  typedef unsigned int item_index;
     125  
     126  /* There is weird relationship between OT1H item_number and OTOH
     127     symbol_number and rule_number: we store the latter in
     128     item_number.  symbol_number values are stored as-is, while
     129     the negation of (rule_number + 1) is stored.
     130  
     131     Therefore, a symbol_number must be a valid item_number, and we
     132     sometimes have to perform the converse transformation.  */
     133  
     134  static inline item_number
     135  symbol_number_as_item_number (symbol_number sym)
     136  {
     137    return sym;
     138  }
     139  
     140  static inline symbol_number
     141  item_number_as_symbol_number (item_number i)
     142  {
     143    return i;
     144  }
     145  
     146  static inline bool
     147  item_number_is_symbol_number (item_number i)
     148  {
     149    return i >= 0;
     150  }
     151  
     152  /* Rule numbers.  */
     153  typedef int rule_number;
     154  # define RULE_NUMBER_MAX INT_MAX
     155  
     156  static inline item_number
     157  rule_number_as_item_number (rule_number r)
     158  {
     159    return -1 - r;
     160  }
     161  
     162  static inline rule_number
     163  item_number_as_rule_number (item_number i)
     164  {
     165    return -1 - i;
     166  }
     167  
     168  static inline bool
     169  item_number_is_rule_number (item_number i)
     170  {
     171    return i < 0;
     172  }
     173  
     174  
     175  /*--------.
     176  | Rules.  |
     177  `--------*/
     178  
     179  typedef struct
     180  {
     181    /* The number of the rule in the source.  It is usually the index in
     182       RULES too, except if there are useless rules.  */
     183    rule_number code;
     184  
     185    /* The index in RULES.  Usually the rule number in the source,
     186       except if some rules are useless.  */
     187    rule_number number;
     188  
     189    sym_content *lhs;
     190    item_number *rhs;
     191  
     192    /* This symbol provides both the associativity, and the precedence. */
     193    sym_content *prec;
     194  
     195    int dprec;
     196    int merger;
     197  
     198    /* This symbol was attached to the rule via %prec. */
     199    sym_content *precsym;
     200  
     201    /* Location of the rhs.  */
     202    location location;
     203    bool useful;
     204    bool is_predicate;
     205  
     206    /* Counts of the numbers of expected conflicts for this rule, or -1 if none
     207       given. */
     208    int expected_sr_conflicts;
     209    int expected_rr_conflicts;
     210  
     211    const char *action;
     212    location action_loc;
     213  } rule;
     214  
     215  /* The used rules (size NRULES).  */
     216  extern rule *rules;
     217  extern rule_number nrules;
     218  
     219  /* Get the rule associated to this item.  ITEM points inside RITEM.  */
     220  static inline rule const *
     221  item_rule (item_number const *item)
     222  {
     223    item_number const *sp = item;
     224    while (!item_number_is_rule_number (*sp))
     225      ++sp;
     226    rule_number r = item_number_as_rule_number (*sp);
     227    return &rules[r];
     228  }
     229  
     230  /* Pretty-print this ITEM (as in the report).  ITEM points inside
     231     RITEM.  PREVIOUS_RULE is used to see if the lhs is common, in which
     232     case LHS is factored.  Passing NULL is fine.  */
     233  void item_print (item_number *item, rule const *previous_rule,
     234                   FILE *out);
     235  
     236  /*--------.
     237  | Rules.  |
     238  `--------*/
     239  
     240  /* A function that selects a rule.  */
     241  typedef bool (*rule_filter) (rule const *);
     242  
     243  /* Whether is an accepting rule (i.e., its reduction terminates
     244     parsing with success). */
     245  static inline bool
     246  rule_is_initial (rule const *r)
     247  {
     248    /* In the case of multistart, we need to check whether the LHS is
     249       $accept.  In the case of "unistart", it would suffice to
     250       check whether this is rule number 0.  */
     251    return r->lhs == acceptsymbol->content;
     252  }
     253  
     254  /* Whether the rule has a 'number' smaller than NRULES.  That is, it
     255     is useful in the grammar.  */
     256  bool rule_useful_in_grammar_p (rule const *r);
     257  
     258  /* Whether the rule has a 'number' higher than NRULES.  That is, it is
     259     useless in the grammar.  */
     260  bool rule_useless_in_grammar_p (rule const *r);
     261  
     262  /* Whether the rule is not flagged as useful but is useful in the
     263     grammar.  In other words, it was discarded because of conflicts.  */
     264  bool rule_useless_in_parser_p (rule const *r);
     265  
     266  /* Whether the rule has a single RHS, and no user action. */
     267  bool rule_useless_chain_p (rule const *r);
     268  
     269  /* Print this rule's number and lhs on OUT.  If a PREVIOUS_LHS was
     270     already displayed (by a previous call for another rule), avoid
     271     useless repetitions.  */
     272  void rule_lhs_print (rule const *r, sym_content const *previous_lhs,
     273                       FILE *out);
     274  void rule_lhs_print_xml (rule const *r, FILE *out, int level);
     275  
     276  /* The length of the RHS.  */
     277  size_t rule_rhs_length (rule const *r);
     278  
     279  /* Print this rule's RHS on OUT.  */
     280  void rule_rhs_print (rule const *r, FILE *out);
     281  
     282  /* Print this rule on OUT.  If a PREVIOUS_RULE was already displayed,
     283     avoid useless repetitions of their LHS. */
     284  void rule_print (rule const *r, rule const *prev_rule, FILE *out);
     285  
     286  
     287  
     288  /* Table of the symbols, indexed by the symbol number. */
     289  extern symbol **symbols;
     290  
     291  /* TOKEN_TRANSLATION -- a table indexed by a token number as returned
     292     by the user's yylex routine, it yields the internal token number
     293     used by the parser and throughout bison.  */
     294  extern symbol_number *token_translations;
     295  extern int max_code;
     296  
     297  
     298  
     299  /* Dump RITEM for traces. */
     300  void ritem_print (FILE *out);
     301  
     302  /* The size of the longest rule RHS.  */
     303  size_t ritem_longest_rhs (void);
     304  
     305  /* Print the grammar's rules that match FILTER on OUT under TITLE.  */
     306  void grammar_rules_partial_print (FILE *out, const char *title,
     307                                    rule_filter filter);
     308  
     309  /* Print the grammar's useful rules on OUT.  */
     310  void grammar_rules_print (FILE *out);
     311  /* Print all of the grammar's rules with a "usefulness" attribute.  */
     312  void grammar_rules_print_xml (FILE *out, int level);
     313  
     314  /* Dump the grammar. */
     315  void grammar_dump (FILE *out, const char *title);
     316  
     317  /* Report on STDERR the rules that are not flagged USEFUL, using the
     318     MESSAGE (which can be 'rule useless in grammar' when invoked after grammar
     319     reduction, or 'rule useless in parser due to conflicts' after conflicts
     320     were taken into account).  */
     321  void grammar_rules_useless_report (const char *message);
     322  
     323  /* Free the packed grammar. */
     324  void grammar_free (void);
     325  
     326  /* The version %required by the grammar file, as an int (100 * major +
     327     minor).  0 if unspecified.  */
     328  extern int required_version;
     329  
     330  #endif /* !GRAM_H_ */