1 /* Generate the LR(0) parser states for Bison.
2
3 Copyright (C) 1984, 1986, 1989, 2000-2002, 2004-2015, 2018-2021 Free
4 Software Foundation, Inc.
5
6 This file is part of Bison, the GNU Compiler Compiler.
7
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <https://www.gnu.org/licenses/>. */
20
21
22 /* See comments in state.h for the data structures that represent it.
23 The entry point is generate_states. */
24
25 #include <config.h>
26 #include "system.h"
27
28 #include <bitset.h>
29
30 #include "closure.h"
31 #include "complain.h"
32 #include "getargs.h"
33 #include "gram.h"
34 #include "lalr.h"
35 #include "lr0.h"
36 #include "reader.h"
37 #include "reduce.h"
38 #include "state.h"
39 #include "symtab.h"
40
41 typedef struct state_list
42 {
43 struct state_list *next;
44 state *state;
45 } state_list;
46
47 static state_list *first_state = NULL;
48 static state_list *last_state = NULL;
49
50 /* Print CORE for debugging. */
51 static void
52 core_print (size_t core_size, item_index *core, FILE *out)
53 {
54 for (int i = 0; i < core_size; ++i)
55 {
56 item_print (ritem + core[i], NULL, out);
57 fputc ('\n', out);
58 }
59 }
60
61 /*-----------------------------------------------------------------.
62 | A state was just discovered by transitioning on SYM from another |
63 | state. Queue this state for later examination, in order to find |
64 | its outgoing transitions. Return it. |
65 `-----------------------------------------------------------------*/
66
67 static state *
68 state_list_append (symbol_number sym, size_t core_size, item_index *core)
69 {
70 state_list *node = xmalloc (sizeof *node);
71 state *res = state_new (sym, core_size, core);
72
73 if (trace_flag & trace_automaton)
74 fprintf (stderr, "state_list_append (state = %d, symbol = %d (%s))\n",
75 nstates, sym, symbols[sym]->tag);
76
77 node->next = NULL;
78 node->state = res;
79
80 if (!first_state)
81 first_state = node;
82 if (last_state)
83 last_state->next = node;
84 last_state = node;
85
86 return res;
87 }
88
89 /* Symbols that can be "shifted" (including nonterminals) from the
90 current state. */
91 bitset shift_symbol;
92
93 static rule **redset;
94 /* For the current state, the list of pointers to states that can be
95 reached via a shift/goto. Could be indexed by the reaching symbol,
96 but labels of incoming transitions can be recovered by the state
97 itself. */
98 static state **shiftset;
99
100
101 /* KERNEL_BASE[symbol-number] -> list of item indices (offsets inside
102 RITEM) of length KERNEL_SIZE[symbol-number]. */
103 static item_index **kernel_base;
104 static int *kernel_size;
105
106 /* A single dimension array that serves as storage for
107 KERNEL_BASE. */
108 static item_index *kernel_items;
109
110
111 static void
112 allocate_itemsets (void)
113 {
114 /* Count the number of occurrences of all the symbols in RITEMS.
115 Note that useless productions (hence useless nonterminals) are
116 browsed too, hence we need to allocate room for _all_ the
117 symbols. */
118 size_t count = 0;
119 size_t *symbol_count = xcalloc (nsyms + nuseless_nonterminals,
120 sizeof *symbol_count);
121
122 for (rule_number r = 0; r < nrules; ++r)
123 for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
124 {
125 symbol_number sym = item_number_as_symbol_number (*rhsp);
126 count += 1;
127 symbol_count[sym] += 1;
128 }
129
130 /* See comments before new_itemsets. All the vectors of items
131 live inside KERNEL_ITEMS. The number of active items after
132 some symbol S cannot be more than the number of times that S
133 appears as an item, which is SYMBOL_COUNT[S].
134 We allocate that much space for each symbol. */
135
136 kernel_base = xnmalloc (nsyms, sizeof *kernel_base);
137 kernel_items = xnmalloc (count, sizeof *kernel_items);
138
139 count = 0;
140 for (symbol_number i = 0; i < nsyms; i++)
141 {
142 kernel_base[i] = kernel_items + count;
143 count += symbol_count[i];
144 }
145
146 free (symbol_count);
147 kernel_size = xnmalloc (nsyms, sizeof *kernel_size);
148 }
149
150 /* Print the current kernel (in KERNEL_BASE). */
151 static void
152 kernel_print (FILE *out)
153 {
154 for (symbol_number i = 0; i < nsyms; ++i)
155 if (kernel_size[i])
156 {
157 fprintf (out, "kernel[%s] =\n", symbols[i]->tag);
158 core_print (kernel_size[i], kernel_base[i], out);
159 }
160 }
161
162 /* Make sure the kernel is in sane state. */
163 static void
164 kernel_check (void)
165 {
166 for (symbol_number i = 0; i < nsyms - 1; ++i)
167 assert (kernel_base[i] + kernel_size[i] <= kernel_base[i + 1]);
168 }
169
170 static void
171 allocate_storage (void)
172 {
173 allocate_itemsets ();
174
175 shiftset = xnmalloc (nsyms, sizeof *shiftset);
176 redset = xnmalloc (nrules, sizeof *redset);
177 state_hash_new ();
178 shift_symbol = bitset_create (nsyms, BITSET_FIXED);
179 }
180
181
182 static void
183 free_storage (void)
184 {
185 bitset_free (shift_symbol);
186 free (redset);
187 free (shiftset);
188 free (kernel_base);
189 free (kernel_size);
190 free (kernel_items);
191 state_hash_free ();
192 }
193
194
195
196
197 /*------------------------------------------------------------------.
198 | Find which term/nterm symbols can be "shifted" in S, and for each |
199 | one record which items would be active after that transition. |
200 | Uses the contents of itemset. |
201 | |
202 | shift_symbol is a bitset of the term/nterm symbols that can be |
203 | shifted. For each symbol in the grammar, kernel_base[symbol] |
204 | points to a vector of item numbers activated if that symbol is |
205 | shifted, and kernel_size[symbol] is their numbers. |
206 | |
207 | itemset is sorted on item index in ritem, which is sorted on rule |
208 | number. Compute each kernel_base[symbol] with the same sort. |
209 `------------------------------------------------------------------*/
210
211 static void
212 new_itemsets (state *s)
213 {
214 if (trace_flag & trace_automaton)
215 fprintf (stderr, "new_itemsets: begin: state = %d\n", s->number);
216
217 memset (kernel_size, 0, nsyms * sizeof *kernel_size);
218
219 bitset_zero (shift_symbol);
220
221 if (trace_flag & trace_automaton)
222 {
223 fprintf (stderr, "initial kernel:\n");
224 kernel_print (stderr);
225 }
226
227 for (size_t i = 0; i < nitemset; ++i)
228 if (item_number_is_symbol_number (ritem[itemset[i]]))
229 {
230 if (trace_flag & trace_automaton)
231 {
232 fputs ("working on: ", stderr);
233 item_print (ritem + itemset[i], NULL, stderr);
234 fputc ('\n', stderr);
235 }
236 symbol_number sym = item_number_as_symbol_number (ritem[itemset[i]]);
237 bitset_set (shift_symbol, sym);
238 kernel_base[sym][kernel_size[sym]] = itemset[i] + 1;
239 kernel_size[sym]++;
240 }
241
242 if (trace_flag & trace_automaton)
243 {
244 fprintf (stderr, "final kernel:\n");
245 kernel_print (stderr);
246 fprintf (stderr, "new_itemsets: end: state = %d\n\n", s->number);
247 }
248 kernel_check ();
249 }
250
251
252
253 /*--------------------------------------------------------------.
254 | Find the state we would get to (from the current state) by |
255 | shifting SYM. Create a new state if no equivalent one exists |
256 | already. Used by append_states. |
257 `--------------------------------------------------------------*/
258
259 static state *
260 get_state (symbol_number sym, size_t core_size, item_index *core)
261 {
262 if (trace_flag & trace_automaton)
263 {
264 fprintf (stderr, "Entering get_state, symbol = %d (%s), core:\n",
265 sym, symbols[sym]->tag);
266 core_print (core_size, core, stderr);
267 fputc ('\n', stderr);
268 }
269
270 state *s = state_hash_lookup (core_size, core);
271 if (!s)
272 s = state_list_append (sym, core_size, core);
273
274 if (trace_flag & trace_automaton)
275 fprintf (stderr, "Exiting get_state => %d\n", s->number);
276
277 return s;
278 }
279
280 /*---------------------------------------------------------------.
281 | Use the information computed by new_itemsets to find the state |
282 | numbers reached by each shift transition from S. |
283 | |
284 | SHIFTSET is set up as a vector of those states. |
285 `---------------------------------------------------------------*/
286
287 static void
288 append_states (state *s)
289 {
290 if (trace_flag & trace_automaton)
291 fprintf (stderr, "append_states: begin: state = %d\n", s->number);
292
293 bitset_iterator iter;
294 symbol_number sym;
295 int i = 0;
296 BITSET_FOR_EACH (iter, shift_symbol, sym, 0)
297 {
298 shiftset[i] = get_state (sym, kernel_size[sym], kernel_base[sym]);
299 ++i;
300 }
301
302 if (trace_flag & trace_automaton)
303 fprintf (stderr, "append_states: end: state = %d\n", s->number);
304 }
305
306
307 /*----------------------------------------------------------------.
308 | Find which rules can be used for reduction transitions from the |
309 | current state and make a reductions structure for the state to |
310 | record their rule numbers. |
311 `----------------------------------------------------------------*/
312
313 static void
314 save_reductions (state *s)
315 {
316 int count = 0;
317
318 /* Find and count the active items that represent ends of rules. */
319 for (size_t i = 0; i < nitemset; ++i)
320 {
321 item_number item = ritem[itemset[i]];
322 if (item_number_is_rule_number (item))
323 {
324 rule_number r = item_number_as_rule_number (item);
325 redset[count++] = &rules[r];
326 if (r == 0)
327 {
328 /* This is "reduce 0", i.e., accept. */
329 aver (!final_state);
330 final_state = s;
331 }
332 }
333 }
334
335 if (trace_flag & trace_automaton)
336 {
337 fprintf (stderr, "reduction[%d] = {\n", s->number);
338 for (int i = 0; i < count; ++i)
339 {
340 rule_print (redset[i], NULL, stderr);
341 fputc ('\n', stderr);
342 }
343 fputs ("}\n", stderr);
344 }
345
346 /* Make a reductions structure and copy the data into it. */
347 state_reductions_set (s, count, redset);
348 }
349
350
351 /*---------------.
352 | Build STATES. |
353 `---------------*/
354
355 static void
356 set_states (void)
357 {
358 states = xcalloc (nstates, sizeof *states);
359
360 while (first_state)
361 {
362 state_list *this = first_state;
363
364 /* Pessimization, but simplification of the code: make sure all
365 the states have valid transitions and reductions members,
366 even if reduced to 0. It is too soon for errs, which are
367 computed later, but set_conflicts. */
368 state *s = this->state;
369 if (!s->transitions)
370 state_transitions_set (s, 0, 0);
371 if (!s->reductions)
372 state_reductions_set (s, 0, 0);
373
374 states[s->number] = s;
375
376 first_state = this->next;
377 free (this);
378 }
379 first_state = NULL;
380 last_state = NULL;
381 }
382
383
384 /*-------------------------------------------------------------------.
385 | Compute the LR(0) parser states (see state.h for details) from the |
386 | grammar. |
387 `-------------------------------------------------------------------*/
388
389 void
390 generate_states (void)
391 {
392 allocate_storage ();
393 closure_new (nritems);
394
395 /* Create the initial state, whose accessing symbol (by convention)
396 is 0, aka $end. */
397 {
398 /* The items of its core: beginning of all the rules of $accept. */
399 kernel_size[0] = 0;
400 for (rule_number r = 0; r < nrules && rules[r].lhs->symbol == acceptsymbol; ++r)
401 kernel_base[0][kernel_size[0]++] = rules[r].rhs - ritem;
402 state_list_append (0, kernel_size[0], kernel_base[0]);
403 }
404
405 /* States are queued when they are created; process them all. */
406 for (state_list *list = first_state; list; list = list->next)
407 {
408 state *s = list->state;
409 if (trace_flag & trace_automaton)
410 fprintf (stderr, "Processing state %d (reached by %s)\n",
411 s->number,
412 symbols[s->accessing_symbol]->tag);
413 /* Set up itemset for the transitions out of this state. itemset gets a
414 vector of all the items that could be accepted next. */
415 closure (s->items, s->nitems);
416 /* Record the reductions allowed out of this state. */
417 save_reductions (s);
418 /* Find the itemsets of the states that shifts/gotos can reach. */
419 new_itemsets (s);
420 /* Find or create the core structures for those states. */
421 append_states (s);
422
423 /* Create the shifts structures for the shifts to those states,
424 now that the state numbers transitioning to are known. */
425 state_transitions_set (s, bitset_count (shift_symbol), shiftset);
426 }
427
428 /* discard various storage */
429 free_storage ();
430
431 /* Set up STATES. */
432 set_states ();
433 }