(root)/
bison-3.8.2/
src/
parse-simulation.c
       1  /* Parser simulator for unifying counterexample search
       2  
       3     Copyright (C) 2020-2021 Free Software Foundation, Inc.
       4  
       5     This file is part of Bison, the GNU Compiler Compiler.
       6  
       7     This program is free software: you can redistribute it and/or modify
       8     it under the terms of the GNU General Public License as published by
       9     the Free Software Foundation, either version 3 of the License, or
      10     (at your option) any later version.
      11  
      12     This program is distributed in the hope that it will be useful,
      13     but WITHOUT ANY WARRANTY; without even the implied warranty of
      14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      15     GNU General Public License for more details.
      16  
      17     You should have received a copy of the GNU General Public License
      18     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      19  
      20  #include <config.h>
      21  
      22  #include "parse-simulation.h"
      23  
      24  #include <gl_linked_list.h>
      25  #include <gl_xlist.h>
      26  #include <stdlib.h>
      27  
      28  #include "lssi.h"
      29  #include "nullable.h"
      30  
      31  struct parse_state
      32  {
      33    // Path of state-items the parser has traversed.
      34    struct si_chunk
      35    {
      36      // Elements newly added in this chunk.
      37      state_item_list contents;
      38      // Properties of the linked list this chunk represents.
      39      const state_item *head_elt;
      40      const state_item *tail_elt;
      41      size_t total_size;
      42    } state_items;
      43    // List of derivations of the symbols.
      44    struct deriv_chunk
      45    {
      46      derivation_list contents;
      47      const derivation *head_elt;
      48      const derivation *tail_elt;
      49      size_t total_size;
      50    } derivs;
      51    struct parse_state *parent;
      52    int reference_count;
      53    // Incremented during productions, decremented during reductions.
      54    int depth;
      55    // Whether the contents of the chunks should be prepended or
      56    // appended to the list the chunks represent.
      57    bool prepend;
      58    // Causes chunk contents to be freed when the reference count is
      59    // one. Used when only the chunk metadata will be needed.
      60    bool free_contents_early;
      61  };
      62  
      63  
      64  static void
      65  ps_si_prepend (parse_state *ps, const state_item *si)
      66  {
      67    struct si_chunk *sic = &ps->state_items;
      68    gl_list_add_first (sic->contents, si);
      69    sic->head_elt = si;
      70    ++sic->total_size;
      71    if (!sic->tail_elt)
      72      sic->tail_elt = si;
      73  }
      74  
      75  static void
      76  ps_si_append (parse_state *ps, const state_item *si)
      77  {
      78    struct si_chunk *sic = &ps->state_items;
      79    gl_list_add_last (sic->contents, si);
      80    sic->tail_elt = si;
      81    ++sic->total_size;
      82    if (!sic->head_elt)
      83      sic->head_elt = si;
      84  }
      85  
      86  static void
      87  ps_derivs_prepend (parse_state *ps, derivation *d)
      88  {
      89    struct deriv_chunk *dc = &ps->derivs;
      90    derivation_list_prepend (dc->contents, d);
      91    dc->head_elt = d;
      92    ++dc->total_size;
      93    if (!dc->tail_elt)
      94      dc->tail_elt = d;
      95  }
      96  
      97  static void
      98  ps_derivs_append (parse_state *ps, derivation *d)
      99  {
     100    struct deriv_chunk *dc = &ps->derivs;
     101    derivation_list_append (dc->contents, d);
     102    dc->tail_elt = d;
     103    ++dc->total_size;
     104    if (!dc->head_elt)
     105      dc->head_elt = d;
     106  }
     107  
     108  static int allocs = 0;
     109  static int frees = 0;
     110  
     111  static parse_state *
     112  empty_parse_state (void)
     113  {
     114    parse_state *res = xcalloc (1, sizeof *res);
     115    res->state_items.contents
     116      = gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
     117    res->derivs.contents = derivation_list_new ();
     118    ++allocs;
     119    return res;
     120  }
     121  
     122  parse_state *
     123  new_parse_state (const state_item *si)
     124  {
     125    parse_state *res = empty_parse_state ();
     126    ps_si_append (res, si);
     127    ps_derivs_append (res, derivation_dot ());
     128    return res;
     129  }
     130  
     131  static parse_state *
     132  copy_parse_state (bool prepend, parse_state *parent)
     133  {
     134    parse_state *res = xmalloc (sizeof *res);
     135    *res = *parent;
     136    res->state_items.contents
     137      = gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
     138    res->derivs.contents = derivation_list_new ();
     139    res->parent = parent;
     140    res->prepend = prepend;
     141    res->reference_count = 0;
     142    res->free_contents_early = false;
     143    parse_state_retain (parent);
     144    ++allocs;
     145    return res;
     146  }
     147  
     148  bool
     149  parse_state_derivation_completed (const parse_state *ps)
     150  {
     151    return ps->derivs.total_size == 1;
     152  }
     153  
     154  derivation *
     155  parse_state_derivation (const parse_state *ps)
     156  {
     157    return (derivation *) ps->derivs.head_elt;
     158  }
     159  
     160  const state_item *
     161  parse_state_head (const parse_state *ps)
     162  {
     163    return ps->state_items.head_elt;
     164  }
     165  
     166  const state_item *
     167  parse_state_tail (const parse_state *ps)
     168  {
     169    return ps->state_items.tail_elt;
     170  }
     171  
     172  int
     173  parse_state_length (const parse_state *ps)
     174  {
     175    return ps->state_items.total_size;
     176  }
     177  
     178  int
     179  parse_state_depth (const parse_state *ps)
     180  {
     181    return ps->depth;
     182  }
     183  
     184  void
     185  parse_state_retain (parse_state *ps)
     186  {
     187    ++ps->reference_count;
     188  }
     189  
     190  void
     191  parse_state_free_contents_early (parse_state *ps)
     192  {
     193    ps->free_contents_early = true;
     194  }
     195  
     196  void
     197  free_parse_state (parse_state *original_ps)
     198  {
     199    bool free_contents = true;
     200    parse_state *parent_ps = NULL;
     201    for (parse_state *ps = original_ps; ps && free_contents; ps = parent_ps)
     202      {
     203        --ps->reference_count;
     204        free_contents = (ps->reference_count == 1 && ps->free_contents_early)
     205          || (ps->reference_count == 0 && !ps->free_contents_early);
     206        // need to keep the parse state around for visited hash set,
     207        // but its contents and parent can be freed
     208        if (free_contents)
     209          {
     210            if (ps->state_items.contents)
     211              gl_list_free (ps->state_items.contents);
     212            if (ps->derivs.contents)
     213              derivation_list_free (ps->derivs.contents);
     214          }
     215        parent_ps = ps->parent;
     216        if (ps->reference_count <= 0)
     217          {
     218            free (ps);
     219            ++frees;
     220          }
     221      }
     222  }
     223  
     224  size_t
     225  parse_state_hasher (const parse_state *ps, size_t max)
     226  {
     227    const struct si_chunk *sis = &ps->state_items;
     228    return ((state_item *) sis->head_elt - state_items +
     229            (state_item *) sis->tail_elt - state_items + sis->total_size) % max;
     230  }
     231  
     232  bool
     233  parse_state_comparator (const parse_state *ps1, const parse_state *ps2)
     234  {
     235    const struct si_chunk *sis1 = &ps1->state_items;
     236    const struct si_chunk *sis2 = &ps2->state_items;
     237    return sis1->head_elt == sis2->head_elt
     238      && sis1->tail_elt == sis2->tail_elt
     239      && sis1->total_size == sis2->total_size;
     240  }
     241  
     242  
     243  void
     244  parse_state_completed_steps (const parse_state *ps, int *shifts, int *productions)
     245  {
     246    // traverse to the root parse_state,
     247    // which will have a list of all completed productions.
     248    const parse_state *root_ps = ps;
     249    while (root_ps->parent)
     250      root_ps = root_ps->parent;
     251  
     252    state_item_list sis = root_ps->state_items.contents;
     253    int count = 0;
     254  
     255    state_item *last = NULL;
     256    state_item *next = NULL;
     257    for (gl_list_iterator_t it = gl_list_iterator (sis);
     258         state_item_list_next (&it, &next);
     259         )
     260      {
     261        if (last && last->state == next->state)
     262          ++count;
     263        last = next;
     264      }
     265    *productions = count;
     266    *shifts = root_ps->state_items.total_size - count;
     267  }
     268  
     269  typedef void (*chunk_append_fn) (gl_list_t, const void *);
     270  
     271  // A version of gl_list_add_last which has the chunk_append_fn
     272  // signature.
     273  static void
     274  list_add_last (gl_list_t list, const void *elt)
     275  {
     276    gl_list_add_last (list, elt);
     277  }
     278  
     279  // takes an array of n gl_lists and flattens them into two list
     280  // based off of the index split
     281  static void
     282  list_flatten_and_split (gl_list_t *list, gl_list_t *rets, int split, int n,
     283                          chunk_append_fn append_fn)
     284  {
     285    int ret_index = 0;
     286    int ret_array = 0;
     287    for (int i = 0; i < n; ++i)
     288      {
     289        const void *p = NULL;
     290        gl_list_iterator_t it = gl_list_iterator (list[i]);
     291        while (gl_list_iterator_next (&it, &p, NULL))
     292          if (p)
     293            {
     294              gl_list_t l = (gl_list_t) p;
     295              const void *si = NULL;
     296              gl_list_iterator_t it2 = gl_list_iterator (l);
     297              while (gl_list_iterator_next (&it2, &si, NULL))
     298                {
     299                  if (ret_index++ == split)
     300                    ++ret_array;
     301                  if (rets[ret_array])
     302                    append_fn (rets[ret_array], si);
     303                }
     304              gl_list_iterator_free (&it2);
     305            }
     306        gl_list_iterator_free (&it);
     307      }
     308  }
     309  
     310  static parse_state_list
     311  parse_state_list_new (void)
     312  {
     313    return gl_list_create_empty (GL_LINKED_LIST, NULL, NULL,
     314                                 (gl_listelement_dispose_fn)free_parse_state,
     315                                 true);
     316  }
     317  
     318  static void
     319  parse_state_list_append (parse_state_list pl, parse_state *ps)
     320  {
     321    parse_state_retain (ps);
     322    gl_list_add_last (pl, ps);
     323  }
     324  
     325  // Emulates a reduction on a parse state by popping some amount of
     326  // derivations and state_items off of the parse_state and returning
     327  // the result in ret. Returns the derivation of what's popped.
     328  static derivation_list
     329  parser_pop (parse_state *ps, int deriv_index,
     330              int si_index, parse_state *ret)
     331  {
     332    // prepend sis, append sis, prepend derivs, append derivs
     333    gl_list_t chunks[4];
     334    for (int i = 0; i < 4; ++i)
     335      chunks[i] = gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
     336    for (parse_state *pn = ps; pn != NULL; pn = pn->parent)
     337      if (pn->prepend)
     338        {
     339          gl_list_add_last (chunks[0], pn->state_items.contents);
     340          gl_list_add_last (chunks[2], pn->derivs.contents);
     341        }
     342      else
     343        {
     344          gl_list_add_first (chunks[1], pn->state_items.contents);
     345          gl_list_add_first (chunks[3], pn->derivs.contents);
     346        }
     347    derivation_list popped_derivs = derivation_list_new ();
     348    gl_list_t ret_chunks[4] = { ret->state_items.contents, NULL,
     349      ret->derivs.contents, popped_derivs
     350    };
     351    list_flatten_and_split (chunks, ret_chunks, si_index, 2,
     352                            list_add_last);
     353    list_flatten_and_split (chunks + 2, ret_chunks + 2, deriv_index, 2,
     354                            (chunk_append_fn)derivation_list_append);
     355    size_t s_size = gl_list_size (ret->state_items.contents);
     356    ret->state_items.total_size = s_size;
     357    if (s_size > 0)
     358      {
     359        ret->state_items.tail_elt = gl_list_get_at (ret->state_items.contents,
     360                                                    s_size - 1);
     361        ret->state_items.head_elt =
     362          gl_list_get_at (ret->state_items.contents, 0);
     363      }
     364    else
     365      {
     366        ret->state_items.tail_elt = NULL;
     367        ret->state_items.head_elt = NULL;
     368      }
     369    size_t d_size = gl_list_size (ret->derivs.contents);
     370    ret->derivs.total_size = d_size;
     371    if (d_size > 0)
     372      {
     373        ret->derivs.tail_elt = gl_list_get_at (ret->derivs.contents,
     374                                               d_size - 1);
     375        ret->derivs.head_elt = gl_list_get_at (ret->derivs.contents, 0);
     376      }
     377    else
     378      {
     379        ret->derivs.tail_elt = NULL;
     380        ret->derivs.head_elt = NULL;
     381      }
     382    for (int i = 0; i < 4; ++i)
     383      gl_list_free (chunks[i]);
     384    return popped_derivs;
     385  }
     386  
     387  void
     388  parse_state_lists (parse_state *ps, state_item_list *sitems,
     389                     derivation_list *derivs)
     390  {
     391    parse_state *temp = empty_parse_state ();
     392    size_t si_size = ps->state_items.total_size;
     393    size_t deriv_size = ps->derivs.total_size;
     394    derivation_list dl = parser_pop (ps, si_size, deriv_size, temp);
     395    *sitems = temp->state_items.contents;
     396    *derivs = temp->derivs.contents;
     397    // prevent the return lists from being freed
     398    temp->state_items.contents = NULL;
     399    temp->derivs.contents = NULL;
     400    free_parse_state (temp);
     401    derivation_list_free (dl);
     402  }
     403  
     404  /**
     405   * Compute the parse states that result from taking a transition on
     406   * nullable symbols whenever possible from the given state_item.
     407   */
     408  static void
     409  nullable_closure (parse_state *ps, state_item *si, parse_state_list state_list)
     410  {
     411    parse_state *current_ps = ps;
     412    state_item_number prev_sin = si - state_items;
     413    for (state_item_number sin = si->trans; sin != -1;
     414         prev_sin = sin, sin = state_items[sin].trans)
     415      {
     416        state_item *psi = &state_items[prev_sin];
     417        symbol_number sp = item_number_as_symbol_number (*psi->item);
     418        if (ISTOKEN (sp) || !nullable[sp - ntokens])
     419          break;
     420  
     421        state_item *nsi = &state_items[sin];
     422        current_ps = copy_parse_state (false, current_ps);
     423        ps_si_append (current_ps, nsi);
     424        ps_derivs_append (current_ps,
     425                          derivation_new (sp, derivation_list_new (),
     426                                          state_item_rule (nsi)));
     427        parse_state_list_append (state_list, current_ps);
     428      }
     429  }
     430  
     431  parse_state_list
     432  simulate_transition (parse_state *ps)
     433  {
     434    const state_item *si = ps->state_items.tail_elt;
     435    symbol_number sym = item_number_as_symbol_number (*si->item);
     436    // Transition on the same next symbol, taking nullable
     437    // symbols into account.
     438    parse_state_list result = parse_state_list_new ();
     439    state_item_number si_next = si->trans;
     440    // Check for disabled transition, shouldn't happen as any
     441    // state_items that lead to these should be disabled.
     442    if (si_next < 0)
     443      return result;
     444    parse_state *next_ps = copy_parse_state (false, ps);
     445    ps_si_append (next_ps, &state_items[si_next]);
     446    ps_derivs_append (next_ps, derivation_new_leaf (sym));
     447    parse_state_list_append (result, next_ps);
     448  
     449    nullable_closure (next_ps, &state_items[si_next], result);
     450    return result;
     451  }
     452  
     453  /**
     454   * Determine if the given symbols are equal or their first sets
     455   * intersect.
     456   */
     457  static bool
     458  compatible (symbol_number sym1, symbol_number sym2)
     459  {
     460    if (sym1 == sym2)
     461      return true;
     462    if (ISTOKEN (sym1) && ISVAR (sym2))
     463      return bitset_test (FIRSTS (sym2), sym1);
     464    else if (ISVAR (sym1) && ISTOKEN (sym2))
     465      return bitset_test (FIRSTS (sym1), sym2);
     466    else if (ISVAR (sym1) && ISVAR (sym2))
     467      return !bitset_disjoint_p (FIRSTS (sym1), FIRSTS (sym2));
     468    else
     469      return false;
     470  }
     471  
     472  parse_state_list
     473  simulate_production (parse_state *ps, symbol_number compat_sym)
     474  {
     475    parse_state_list result = parse_state_list_new ();
     476    const state_item *si = parse_state_tail (ps);
     477    if (si->prods)
     478      {
     479        bitset_iterator biter;
     480        state_item_number sin;
     481        BITSET_FOR_EACH (biter, si->prods, sin, 0)
     482          {
     483            // Take production step only if lhs is not nullable and
     484            // if first rhs symbol is compatible with compat_sym
     485            state_item *next = &state_items[sin];
     486            item_number *itm1 = next->item;
     487            if (!compatible (*itm1, compat_sym) || !production_allowed (si, next))
     488              continue;
     489            parse_state *next_ps = copy_parse_state (false, ps);
     490            ps_si_append (next_ps, next);
     491            parse_state_list_append (result, next_ps);
     492            if (next_ps->depth >= 0)
     493              ++next_ps->depth;
     494            nullable_closure (next_ps, next, result);
     495          }
     496      }
     497    return result;
     498  }
     499  
     500  // simulates a reduction on the given parse state, conflict_item is the
     501  // item associated with ps's conflict. symbol_set is a lookahead set this
     502  // reduction must be compatible with
     503  parse_state_list
     504  simulate_reduction (parse_state *ps, int rule_len, bitset symbol_set)
     505  {
     506    parse_state_list result = parse_state_list_new ();
     507  
     508    int s_size = ps->state_items.total_size;
     509    int d_size = ps->derivs.total_size;
     510    if (ps->depth >= 0)
     511      d_size--;                   // account for dot
     512    parse_state *new_root = empty_parse_state ();
     513    derivation_list popped_derivs =
     514      parser_pop (ps, d_size - rule_len,
     515                  s_size - rule_len - 1, new_root);
     516  
     517    // update derivation
     518    state_item *si = (state_item *) ps->state_items.tail_elt;
     519    const rule *r = item_rule (si->item);
     520    symbol_number lhs = r->lhs->number;
     521    derivation *deriv = derivation_new (lhs, popped_derivs, state_item_rule (si));
     522    --new_root->depth;
     523    ps_derivs_append (new_root, deriv);
     524  
     525    if (s_size != rule_len + 1)
     526      {
     527        state_item *tail = (state_item *) new_root->state_items.tail_elt;
     528        ps_si_append (new_root, &state_items[tail->trans]);
     529        parse_state_list_append (result, new_root);
     530      }
     531    else
     532      {
     533        // The head state_item is a production item, so we need to prepend
     534        // with possible source state-items.
     535        const state_item *head = ps->state_items.head_elt;
     536        state_item_list prev = lssi_reverse_production (head, symbol_set);
     537        // TODO: better understand what causes this case.
     538        if (gl_list_size (prev) == 0)
     539          {
     540            // new_root needs to have an RC of 1 to be freed correctly here.
     541            parse_state_retain (new_root);
     542            free_parse_state (new_root);
     543          }
     544        else
     545          {
     546            state_item *psis = NULL;
     547            for (gl_list_iterator_t it = gl_list_iterator (prev);
     548                 state_item_list_next (&it, &psis);
     549                 )
     550              {
     551                // Prepend the result from the reverse production.
     552                parse_state *copy = copy_parse_state (true, new_root);
     553                ps_si_prepend (copy, psis);
     554  
     555                // Append the left hand side to the end of the parser state
     556                copy = copy_parse_state (false, copy);
     557                struct si_chunk *sis = &copy->state_items;
     558                const state_item *tail = sis->tail_elt;
     559                ps_si_append (copy, &state_items[tail->trans]);
     560                parse_state_list_append (result, copy);
     561                nullable_closure (copy, (state_item *) sis->tail_elt, result);
     562              }
     563          }
     564        gl_list_free (prev);
     565      }
     566    return result;
     567  }
     568  
     569  parse_state_list
     570  parser_prepend (parse_state *ps)
     571  {
     572    parse_state_list res = parse_state_list_new ();
     573    const state_item *head = ps->state_items.head_elt;
     574    symbol_number prepend_sym =
     575      item_number_as_symbol_number (*(head->item - 1));
     576    bitset_iterator biter;
     577    state_item_number sin;
     578    BITSET_FOR_EACH (biter, head->revs, sin, 0)
     579    {
     580      parse_state *copy = copy_parse_state (true, ps);
     581      ps_si_prepend (copy, &state_items[sin]);
     582      if (SI_TRANSITION (head))
     583        ps_derivs_prepend (copy, derivation_new_leaf (prepend_sym));
     584      parse_state_list_append (res, copy);
     585    }
     586    return res;
     587  }
     588  
     589  void
     590  print_parse_state (parse_state *ps)
     591  {
     592    FILE *out = stderr;
     593    fprintf (out, "(size %zu depth %d rc %d)\n",
     594            ps->state_items.total_size, ps->depth, ps->reference_count);
     595    state_item_print (ps->state_items.head_elt, out, "");
     596    state_item_print (ps->state_items.tail_elt, out, "");
     597    if (ps->derivs.total_size > 0)
     598      derivation_print (ps->derivs.head_elt, out, "");
     599    putc ('\n', out);
     600  }