(root)/
bison-3.8.2/
src/
lssi.c
       1  /* Lookahead sensitive state item searches for counterexample generation
       2  
       3     Copyright (C) 2020-2021 Free Software Foundation, Inc.
       4  
       5     This file is part of Bison, the GNU Compiler Compiler.
       6  
       7     This program is free software: you can redistribute it and/or modify
       8     it under the terms of the GNU General Public License as published by
       9     the Free Software Foundation, either version 3 of the License, or
      10     (at your option) any later version.
      11  
      12     This program is distributed in the hope that it will be useful,
      13     but WITHOUT ANY WARRANTY; without even the implied warranty of
      14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      15     GNU General Public License for more details.
      16  
      17     You should have received a copy of the GNU General Public License
      18     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      19  
      20  #include <config.h>
      21  
      22  #include "lssi.h"
      23  
      24  #include <gl_linked_list.h>
      25  #include <gl_xlist.h>
      26  #include <stdlib.h>
      27  
      28  #include "getargs.h"
      29  #include "nullable.h"
      30  
      31  // Lookahead sensitive state item.
      32  typedef struct lssi
      33  {
      34    state_item_number si;
      35    struct lssi *parent;
      36    // this is the precise lookahead set (follow_L from the CupEx paper)
      37    bitset lookahead;
      38    bool free_lookahead;
      39  } lssi;
      40  
      41  static lssi *
      42  new_lssi (state_item_number si, lssi *p, bitset l, bool free_l)
      43  {
      44    lssi *res = xmalloc (sizeof *res);
      45    res->si = si;
      46    res->parent = p;
      47    res->lookahead = l;
      48    res->free_lookahead = free_l;
      49    return res;
      50  }
      51  
      52  static void
      53  lssi_free (lssi *sn)
      54  {
      55    if (sn == NULL)
      56      return;
      57    if (sn->free_lookahead)
      58      bitset_free (sn->lookahead);
      59    free (sn);
      60  }
      61  
      62  static size_t
      63  lssi_hasher (lssi *sn, size_t max)
      64  {
      65    size_t hash = sn->si;
      66    bitset_iterator biter;
      67    symbol_number syn;
      68    BITSET_FOR_EACH (biter, sn->lookahead, syn, 0)
      69      hash += syn;
      70    return hash % max;
      71  }
      72  
      73  static bool
      74  lssi_comparator (lssi *s1, lssi *s2)
      75  {
      76    if (s1->si == s2->si)
      77      {
      78        if (s1->lookahead == s2->lookahead)
      79          return true;
      80        return bitset_equal_p (s1->lookahead, s2->lookahead);
      81      }
      82    return false;
      83  }
      84  
      85  typedef gl_list_t lssi_list;
      86  
      87  static inline bool
      88  append_lssi (lssi *sn, Hash_table *visited, lssi_list queue)
      89  {
      90    if (hash_lookup (visited, sn))
      91      {
      92        sn->free_lookahead = false;
      93        lssi_free (sn);
      94        return false;
      95      }
      96    hash_xinsert (visited, sn);
      97    gl_list_add_last (queue, sn);
      98    return true;
      99  }
     100  
     101  #if 0
     102  static void
     103  lssi_print (lssi *l)
     104  {
     105    FILE *out = stderr;
     106    print_state_item (&state_items[l->si], out);
     107    if (l->lookahead)
     108      {
     109        fprintf (out, "FOLLOWL = { ");
     110        bitset_iterator biter;
     111        symbol_number sin;
     112        BITSET_FOR_EACH (biter, l->lookahead, sin, 0)
     113          fprintf (out, "%s, \n", symbols[sin]->tag);
     114        fprintf (out, "}\n");
     115      }
     116  }
     117  #endif
     118  
     119  /**
     120   * Compute the set of state-items that can reach the given conflict item via
     121   * a combination of transitions or production steps.
     122   */
     123  static bitset
     124  eligible_state_items (state_item *target)
     125  {
     126    bitset result = bitset_create (nstate_items, BITSET_FIXED);
     127    state_item_list queue =
     128      gl_list_create (GL_LINKED_LIST, NULL, NULL, NULL, true, 1,
     129                      (const void **) &target);
     130    while (gl_list_size (queue) > 0)
     131      {
     132        state_item *si = (state_item *) gl_list_get_at (queue, 0);
     133        gl_list_remove_at (queue, 0);
     134        if (bitset_test (result, si - state_items))
     135          continue;
     136        bitset_set (result, si - state_items);
     137        // search all reverse edges.
     138        bitset rsi = si->revs;
     139        bitset_iterator biter;
     140        state_item_number sin;
     141        BITSET_FOR_EACH (biter, rsi, sin, 0)
     142          gl_list_add_last (queue, &state_items[sin]);
     143      }
     144    gl_list_free (queue);
     145    return result;
     146  }
     147  
     148  /**
     149   * Compute the shortest lookahead-sensitive path from the start state to
     150   * this conflict. If optimized is true, only consider parser states
     151   * that can reach the conflict state.
     152   */
     153  state_item_list
     154  shortest_path_from_start (state_item_number target, symbol_number next_sym)
     155  {
     156    bitset eligible = eligible_state_items (&state_items[target]);
     157    Hash_table *visited = hash_initialize (32,
     158                                           NULL,
     159                                           (Hash_hasher) lssi_hasher,
     160                                           (Hash_comparator) lssi_comparator,
     161                                           (Hash_data_freer) lssi_free);
     162    bitset il = bitset_create (nsyms, BITSET_FIXED);
     163    bitset_set (il, 0);
     164    lssi *init = new_lssi (0, NULL, il, true);
     165    lssi_list queue = gl_list_create_empty (GL_LINKED_LIST, NULL, NULL,
     166                                            NULL, true);
     167    append_lssi (init, visited, queue);
     168    // breadth-first search
     169    bool finished = false;
     170    lssi *n;
     171    while (gl_list_size (queue) > 0)
     172      {
     173        n = (lssi *) gl_list_get_at (queue, 0);
     174        gl_list_remove_at (queue, 0);
     175        state_item_number last = n->si;
     176        if (target == last && bitset_test (n->lookahead, next_sym))
     177          {
     178            finished = true;
     179            break;
     180          }
     181        state_item *si = &state_items[last];
     182        // Transitions don't change follow_L
     183        if (si->trans >= 0)
     184          {
     185            if (bitset_test (eligible, si->trans))
     186              {
     187                lssi *next = new_lssi (si->trans, n, n->lookahead, false);
     188                append_lssi (next, visited, queue);
     189              }
     190          }
     191        // For production steps, follow_L is based on the symbol after the
     192        // nonterminal being produced.
     193        // if no such symbol exists, follow_L is unchanged
     194        // if the symbol is a terminal, follow_L only contains that terminal
     195        // if the symbol is not nullable, follow_L is its FIRSTS set
     196        // if the symbol is nullable, follow_L is its FIRSTS set unioned with
     197        // this logic applied to the next symbol in the rule
     198        if (si->prods)
     199          {
     200            // Compute follow_L as above
     201            bitset lookahead = bitset_create (nsyms, BITSET_FIXED);
     202            item_number *pos = si->item + 1;
     203            for (; !item_number_is_rule_number (*pos); ++pos)
     204              {
     205                item_number it = *pos;
     206                if (ISTOKEN (it))
     207                  {
     208                    bitset_set (lookahead, it);
     209                    break;
     210                  }
     211                else
     212                  {
     213                    bitset_union (lookahead, lookahead, FIRSTS (it));
     214                    if (!nullable[it - ntokens])
     215                      break;
     216                  }
     217              }
     218            if (item_number_is_rule_number (*pos))
     219              bitset_union (lookahead, n->lookahead, lookahead);
     220  
     221            bool lookahead_used = false;
     222            // Try all possible production steps within this parser state.
     223            bitset_iterator biter;
     224            state_item_number nextSI;
     225            BITSET_FOR_EACH (biter, si->prods, nextSI, 0)
     226              {
     227                if (!bitset_test (eligible, nextSI))
     228                  continue;
     229                lssi *next = new_lssi (nextSI, n, lookahead,
     230                                       !lookahead_used);
     231                lookahead_used = append_lssi (next, visited, queue)
     232                                 || lookahead_used;
     233              }
     234            if (!lookahead_used)
     235              bitset_free (lookahead);
     236          }
     237      }
     238  
     239    bitset_free (eligible);
     240    if (!finished)
     241      {
     242        gl_list_free (queue);
     243        fputs ("Cannot find shortest path to conflict state.", stderr);
     244        abort ();
     245      }
     246    state_item_list res =
     247      gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
     248    for (lssi *sn = n; sn != NULL; sn = sn->parent)
     249      gl_list_add_first (res, &state_items[sn->si]);
     250  
     251    hash_free (visited);
     252    gl_list_free (queue);
     253  
     254    if (trace_flag & trace_cex)
     255      {
     256        fputs ("REDUCE ITEM PATH:\n", stderr);
     257        gl_list_iterator_t it = gl_list_iterator (res);
     258        const void *sip;
     259        while (gl_list_iterator_next (&it, &sip, NULL))
     260          state_item_print ((state_item *) sip, stderr, "");
     261      }
     262    return res;
     263  }
     264  
     265  /**
     266   * Determine if the given terminal is in the given symbol set or can begin
     267   * a nonterminal in the given symbol set.
     268   */
     269  bool
     270  intersect_symbol (symbol_number sym, bitset syms)
     271  {
     272    if (!syms)
     273      return true;
     274    bitset_iterator biter;
     275    symbol_number sn;
     276    BITSET_FOR_EACH (biter, syms, sn, 0)
     277      {
     278        if (sym == sn)
     279          return true;
     280        if (ISVAR (sn) && bitset_test (FIRSTS (sn), sym))
     281          return true;
     282      }
     283    return false;
     284  }
     285  
     286  /**
     287   * Determine if any symbol in ts is in syms
     288   * or can begin a nonterminal syms.
     289   */
     290  bool
     291  intersect (bitset ts, bitset syms)
     292  {
     293    if (!syms || !ts)
     294      return true;
     295    bitset_iterator biter;
     296    symbol_number sn;
     297    BITSET_FOR_EACH (biter, syms, sn, 0)
     298      {
     299        if (bitset_test (ts, sn))
     300          return true;
     301        if (ISVAR (sn) && !bitset_disjoint_p (ts, FIRSTS (sn)))
     302          return true;
     303      }
     304    return false;
     305  }
     306  
     307  
     308  /**
     309   * Compute a list of state_items that have a production to n with respect
     310   * to its lookahead
     311   */
     312  state_item_list
     313  lssi_reverse_production (const state_item *si, bitset lookahead)
     314  {
     315    state_item_list result =
     316      gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
     317    if (SI_TRANSITION (si))
     318      return result;
     319    // A production step was made to the current lalr_item.
     320    // Check that the next symbol in the parent lalr_item is
     321    // compatible with the lookahead.
     322    bitset_iterator biter;
     323    state_item_number sin;
     324    BITSET_FOR_EACH (biter, si->revs, sin, 0)
     325    {
     326      state_item *prevsi = &state_items[sin];
     327      if (!production_allowed (prevsi, si))
     328        continue;
     329      bitset prev_lookahead = prevsi->lookahead;
     330      if (item_number_is_rule_number (*(prevsi->item)))
     331        {
     332          // reduce item
     333          // Check that some lookaheads can be preserved.
     334          if (!intersect (prev_lookahead, lookahead))
     335            continue;
     336        }
     337      else
     338        {
     339          // shift item
     340          if (lookahead)
     341            {
     342              // Check that lookahead is compatible with the first
     343              // possible symbols in the rest of the production.
     344              // Alternatively, if the rest of the production is
     345              // nullable, the lookahead must be compatible with
     346              // the lookahead of the corresponding item.
     347              bool applicable = false;
     348              bool nlable = true;
     349              for (item_number *pos = prevsi->item + 1;
     350                   !applicable && nlable && item_number_is_symbol_number (*pos);
     351                   ++pos)
     352                {
     353                  symbol_number next_sym = item_number_as_symbol_number (*pos);
     354                  if (ISTOKEN (next_sym))
     355                    {
     356                      applicable = intersect_symbol (next_sym, lookahead);
     357                      nlable = false;
     358                    }
     359                  else
     360                    {
     361                      applicable = intersect (FIRSTS (next_sym), lookahead);
     362                      if (!applicable)
     363                        nlable = nullable[next_sym - ntokens];
     364                    }
     365                }
     366              if (!applicable && !nlable)
     367                continue;
     368            }
     369        }
     370      gl_list_add_last (result, prevsi);
     371    }
     372    return result;
     373  }