1 /* Symbol table manager for Bison.
2
3 Copyright (C) 1984, 1989, 2000-2002, 2004-2015, 2018-2021 Free
4 Software Foundation, Inc.
5
6 This file is part of Bison, the GNU Compiler Compiler.
7
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <https://www.gnu.org/licenses/>. */
20
21 #include <config.h>
22 #include "symtab.h"
23
24 #include "system.h"
25
26 #include <assure.h>
27 #include <fstrcmp.h>
28 #include <hash.h>
29 #include <quote.h>
30
31 #include "complain.h"
32 #include "getargs.h"
33 #include "gram.h"
34 #include "intprops.h"
35
36 /** Undefined token code. */
37 #define CODE_UNDEFINED (-1)
38
39 /* Undefined symbol number. */
40 #define NUMBER_UNDEFINED (-1)
41
42
43 static struct hash_table *symbol_table = NULL;
44 static struct hash_table *semantic_type_table = NULL;
45
46 /*----------------------------------------------------------------.
47 | Symbols sorted by tag. Allocated by table_sort, after which no |
48 | more symbols should be created. |
49 `----------------------------------------------------------------*/
50
51 static symbol **symbols_sorted = NULL;
52 static semantic_type **semantic_types_sorted = NULL;
53
54
55 /*------------------------.
56 | Distinguished symbols. |
57 `------------------------*/
58
59 symbol *errtoken = NULL;
60 symbol *undeftoken = NULL;
61 symbol *eoftoken = NULL;
62 symbol *acceptsymbol = NULL;
63
64 /* Precedence relation graph. */
65 static symgraph **prec_nodes;
66
67 /* Store which associativity is used. */
68 static bool *used_assoc = NULL;
69
70 bool tag_seen = false;
71
72
73 /* Whether SYM was defined by the user. */
74
75 static bool
76 symbol_is_user_defined (symbol *sym)
77 {
78 const bool eof_is_user_defined
79 = !eoftoken->alias || STRNEQ (eoftoken->alias->tag, "$end");
80 return sym->tag[0] != '$'
81 && (eof_is_user_defined || (sym != eoftoken && sym->alias != errtoken))
82 && sym != errtoken && sym->alias != errtoken
83 && sym != undeftoken && sym->alias != undeftoken;
84 }
85
86
87 /*--------------------------.
88 | Create a new sym_content. |
89 `--------------------------*/
90
91 static sym_content *
92 sym_content_new (symbol *s)
93 {
94 sym_content *res = xmalloc (sizeof *res);
95
96 res->symbol = s;
97
98 res->type_name = NULL;
99 res->type_loc = empty_loc;
100 for (int i = 0; i < CODE_PROPS_SIZE; ++i)
101 code_props_none_init (&res->props[i]);
102
103 res->number = NUMBER_UNDEFINED;
104 res->prec_loc = empty_loc;
105 res->prec = 0;
106 res->assoc = undef_assoc;
107 res->code = CODE_UNDEFINED;
108
109 res->class = unknown_sym;
110 res->status = undeclared;
111
112 return res;
113 }
114
115 /*---------------------------------.
116 | Create a new symbol, named TAG. |
117 `---------------------------------*/
118
119 static symbol *
120 symbol_new (uniqstr tag, location loc)
121 {
122 symbol *res = xmalloc (sizeof *res);
123 uniqstr_assert (tag);
124
125 /* If the tag is not a string (starts with a double quote), check
126 that it is valid for Yacc. */
127 if (tag[0] != '\"' && tag[0] != '\'' && strchr (tag, '-'))
128 complain (&loc, Wyacc,
129 _("POSIX Yacc forbids dashes in symbol names: %s"), tag);
130
131 res->tag = tag;
132 res->location = loc;
133 res->translatable = false;
134 res->location_of_lhs = false;
135 res->alias = NULL;
136 res->content = sym_content_new (res);
137 res->is_alias = false;
138 return res;
139 }
140
141 /*--------------------.
142 | Free a sym_content. |
143 `--------------------*/
144
145 static void
146 sym_content_free (sym_content *sym)
147 {
148 free (sym);
149 }
150
151
152 /*---------------------------------------------------------.
153 | Free a symbol and its associated content if appropriate. |
154 `---------------------------------------------------------*/
155
156 static void
157 symbol_free (void *ptr)
158 {
159 symbol *sym = (symbol *)ptr;
160 if (!sym->is_alias)
161 sym_content_free (sym->content);
162 free (sym);
163
164 }
165
166 /* If needed, swap first and second so that first has the earliest
167 location (according to location_cmp).
168
169 Many symbol features (e.g., token codes) are not assigned during
170 parsing, but in a second step, via a traversal of the symbol table
171 sorted on tag.
172
173 However, error messages make more sense if we keep the first
174 declaration first.
175 */
176
177 static void
178 symbols_sort (const symbol **first, const symbol **second)
179 {
180 if (0 < location_cmp ((*first)->location, (*second)->location))
181 {
182 const symbol* tmp = *first;
183 *first = *second;
184 *second = tmp;
185 }
186 }
187
188 /* Likewise, for locations. */
189
190 static void
191 locations_sort (location *first, location *second)
192 {
193 if (0 < location_cmp (*first, *second))
194 {
195 location tmp = *first;
196 *first = *second;
197 *second = tmp;
198 }
199 }
200
201 char const *
202 code_props_type_string (code_props_type kind)
203 {
204 switch (kind)
205 {
206 case destructor:
207 return "%destructor";
208 case printer:
209 return "%printer";
210 }
211 abort ();
212 }
213
214
215 /*----------------------------------------.
216 | Create a new semantic type, named TAG. |
217 `----------------------------------------*/
218
219 static semantic_type *
220 semantic_type_new (uniqstr tag, const location *loc)
221 {
222 semantic_type *res = xmalloc (sizeof *res);
223
224 uniqstr_assert (tag);
225 res->tag = tag;
226 res->location = loc ? *loc : empty_loc;
227 res->status = undeclared;
228 for (int i = 0; i < CODE_PROPS_SIZE; ++i)
229 code_props_none_init (&res->props[i]);
230
231 return res;
232 }
233
234
235 /*-----------------.
236 | Print a symbol. |
237 `-----------------*/
238
239 #define SYMBOL_INT_ATTR_PRINT(Attr) \
240 if (s->content) \
241 fprintf (f, " %s = %d", #Attr, s->content->Attr)
242
243 #define SYMBOL_STR_ATTR_PRINT(Attr) \
244 if (s->content && s->content->Attr) \
245 fprintf (f, " %s { %s }", #Attr, s->content->Attr)
246
247 #define SYMBOL_CODE_PRINT(Attr) \
248 if (s->content && s->content->props[Attr].code) \
249 fprintf (f, " %s { %s }", #Attr, s->content->props[Attr].code)
250
251 void
252 symbol_print (symbol const *s, FILE *f)
253 {
254 if (s)
255 {
256 symbol_class c = s->content->class;
257 fprintf (f, "%s: %s",
258 c == unknown_sym ? "unknown"
259 : c == pct_type_sym ? "%type"
260 : c == token_sym ? "token"
261 : c == nterm_sym ? "nterm"
262 : NULL, /* abort. */
263 s->tag);
264 putc (' ', f);
265 location_print (s->location, f);
266 SYMBOL_INT_ATTR_PRINT (code);
267 SYMBOL_INT_ATTR_PRINT (number);
268 SYMBOL_STR_ATTR_PRINT (type_name);
269 SYMBOL_CODE_PRINT (destructor);
270 SYMBOL_CODE_PRINT (printer);
271 }
272 else
273 fputs ("<NULL>", f);
274 }
275
276 #undef SYMBOL_ATTR_PRINT
277 #undef SYMBOL_CODE_PRINT
278
279
280 /*----------------------------------.
281 | Whether S is a valid identifier. |
282 `----------------------------------*/
283
284 static bool
285 is_identifier (uniqstr s)
286 {
287 static char const alphanum[26 + 26 + 1 + 10] =
288 "abcdefghijklmnopqrstuvwxyz"
289 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
290 "_"
291 "0123456789";
292 if (!s || ! memchr (alphanum, *s, sizeof alphanum - 10))
293 return false;
294 for (++s; *s; ++s)
295 if (! memchr (alphanum, *s, sizeof alphanum))
296 return false;
297 return true;
298 }
299
300
301 /*-----------------------------------------------.
302 | Get the identifier associated to this symbol. |
303 `-----------------------------------------------*/
304
305 uniqstr
306 symbol_id_get (symbol const *sym)
307 {
308 // There's one weird case: YYerror is the alias, and error is the
309 // base symbol. Return YYerror in that case.
310 if (sym->alias && is_identifier (sym->alias->tag))
311 return sym->alias->tag;
312 else if (is_identifier (sym->tag))
313 return sym->tag;
314 else
315 return NULL;
316 }
317
318
319 /*------------------------------------------------------------------.
320 | Complain that S's WHAT is redeclared at SECOND, and was first set |
321 | at FIRST. |
322 `------------------------------------------------------------------*/
323
324 static void
325 complain_symbol_redeclared (symbol *s, const char *what, location first,
326 location second)
327 {
328 locations_sort (&first, &second);
329 complain (&second, complaint, _("%s redeclaration for %s"), what, s->tag);
330 subcomplain (&first, complaint, _("previous declaration"));
331 }
332
333 static void
334 complain_semantic_type_redeclared (semantic_type *s, const char *what, location first,
335 location second)
336 {
337 locations_sort (&first, &second);
338 complain (&second, complaint, _("%s redeclaration for <%s>"), what, s->tag);
339 subcomplain (&first, complaint, _("previous declaration"));
340 }
341
342 static void
343 complain_class_redeclared (symbol *sym, symbol_class class, location second)
344 {
345 complain (&second, complaint,
346 class == token_sym
347 ? _("symbol %s redeclared as a token")
348 : _("symbol %s redeclared as a nonterminal"), sym->tag);
349 if (!location_empty (sym->location))
350 subcomplain (&sym->location, complaint, _("previous definition"));
351 }
352
353 static const symbol *
354 symbol_from_uniqstr_fuzzy (const uniqstr key)
355 {
356 aver (symbols_sorted);
357 #define FSTRCMP_THRESHOLD 0.6
358 double best_similarity = FSTRCMP_THRESHOLD;
359 const symbol *res = NULL;
360 size_t count = hash_get_n_entries (symbol_table);
361 for (int i = 0; i < count; ++i)
362 {
363 symbol *sym = symbols_sorted[i];
364 if (STRNEQ (key, sym->tag)
365 && (sym->content->status == declared
366 || sym->content->status == undeclared))
367 {
368 double similarity = fstrcmp_bounded (key, sym->tag, best_similarity);
369 if (best_similarity < similarity)
370 {
371 res = sym;
372 best_similarity = similarity;
373 }
374 }
375 }
376 return res;
377 }
378
379 static void
380 complain_symbol_undeclared (const symbol *sym)
381 {
382 assert (sym->content->status != declared);
383 const symbol *best = symbol_from_uniqstr_fuzzy (sym->tag);
384 if (best)
385 {
386 complain (&sym->location,
387 sym->content->status == needed ? complaint : Wother,
388 _("symbol %s is used, but is not defined as a token"
389 " and has no rules; did you mean %s?"),
390 quote_n (0, sym->tag),
391 quote_n (1, best->tag));
392 if (feature_flag & feature_caret)
393 location_caret_suggestion (sym->location, best->tag, stderr);
394 }
395 else
396 complain (&sym->location,
397 sym->content->status == needed ? complaint : Wother,
398 _("symbol %s is used, but is not defined as a token"
399 " and has no rules"),
400 quote (sym->tag));
401 }
402
403 void
404 symbol_location_as_lhs_set (symbol *sym, location loc)
405 {
406 if (!sym->location_of_lhs)
407 {
408 sym->location = loc;
409 sym->location_of_lhs = true;
410 }
411 }
412
413
414 /*-----------------------------------------------------------------.
415 | Set the TYPE_NAME associated with SYM. Does nothing if passed 0 |
416 | as TYPE_NAME. |
417 `-----------------------------------------------------------------*/
418
419 void
420 symbol_type_set (symbol *sym, uniqstr type_name, location loc)
421 {
422 if (type_name)
423 {
424 tag_seen = true;
425 if (sym->content->type_name)
426 complain_symbol_redeclared (sym, "%type",
427 sym->content->type_loc, loc);
428 else
429 {
430 uniqstr_assert (type_name);
431 sym->content->type_name = type_name;
432 sym->content->type_loc = loc;
433 }
434 }
435 }
436
437 /*--------------------------------------------------------.
438 | Set the DESTRUCTOR or PRINTER associated with the SYM. |
439 `--------------------------------------------------------*/
440
441 void
442 symbol_code_props_set (symbol *sym, code_props_type kind,
443 code_props const *code)
444 {
445 if (sym->content->props[kind].code)
446 complain_symbol_redeclared (sym, code_props_type_string (kind),
447 sym->content->props[kind].location,
448 code->location);
449 else
450 sym->content->props[kind] = *code;
451 }
452
453
454 /*-----------------------------------------------------.
455 | Set the DESTRUCTOR or PRINTER associated with TYPE. |
456 `-----------------------------------------------------*/
457
458 void
459 semantic_type_code_props_set (semantic_type *type,
460 code_props_type kind,
461 code_props const *code)
462 {
463 if (type->props[kind].code)
464 complain_semantic_type_redeclared (type, code_props_type_string (kind),
465 type->props[kind].location,
466 code->location);
467 else
468 type->props[kind] = *code;
469 }
470
471 /*---------------------------------------------------.
472 | Get the computed %destructor or %printer for SYM. |
473 `---------------------------------------------------*/
474
475 code_props *
476 symbol_code_props_get (symbol *sym, code_props_type kind)
477 {
478 /* Per-symbol code props. */
479 if (sym->content->props[kind].code)
480 return &sym->content->props[kind];
481
482 /* Per-type code props. */
483 if (sym->content->type_name)
484 {
485 code_props *code =
486 &semantic_type_get (sym->content->type_name, NULL)->props[kind];
487 if (code->code)
488 return code;
489 }
490
491 /* Apply default code props's only to user-defined symbols. */
492 if (symbol_is_user_defined (sym))
493 {
494 code_props *code = &semantic_type_get (sym->content->type_name ? "*" : "",
495 NULL)->props[kind];
496 if (code->code)
497 return code;
498 }
499 return &code_props_none;
500 }
501
502 /*-----------------------------------------------------------------.
503 | Set the PRECEDENCE associated with SYM. Does nothing if invoked |
504 | with UNDEF_ASSOC as ASSOC. |
505 `-----------------------------------------------------------------*/
506
507 void
508 symbol_precedence_set (symbol *sym, int prec, assoc a, location loc)
509 {
510 if (a != undef_assoc)
511 {
512 sym_content *s = sym->content;
513 if (s->prec)
514 complain_symbol_redeclared (sym, assoc_to_string (a),
515 s->prec_loc, loc);
516 else
517 {
518 s->prec = prec;
519 s->assoc = a;
520 s->prec_loc = loc;
521 }
522 }
523
524 /* Only terminals have a precedence. */
525 symbol_class_set (sym, token_sym, loc, false);
526 }
527
528
529 /*------------------------------------.
530 | Set the CLASS associated with SYM. |
531 `------------------------------------*/
532
533 static void
534 complain_pct_type_on_token (location *loc)
535 {
536 complain (loc, Wyacc,
537 _("POSIX yacc reserves %%type to nonterminals"));
538 }
539
540 void
541 symbol_class_set (symbol *sym, symbol_class class, location loc, bool declaring)
542 {
543 aver (class != unknown_sym);
544 sym_content *s = sym->content;
545 if (class == pct_type_sym)
546 {
547 if (s->class == token_sym)
548 complain_pct_type_on_token (&loc);
549 else if (s->class == unknown_sym)
550 s->class = class;
551 }
552 else if (s->class != unknown_sym && s->class != pct_type_sym
553 && s->class != class)
554 complain_class_redeclared (sym, class, loc);
555 else
556 {
557 if (class == token_sym && s->class == pct_type_sym)
558 complain_pct_type_on_token (&sym->location);
559
560 s->class = class;
561
562 if (declaring)
563 {
564 if (s->status == declared)
565 {
566 complain (&loc, Wother,
567 _("symbol %s redeclared"), sym->tag);
568 subcomplain (&sym->location, Wother,
569 _("previous declaration"));
570 }
571 else
572 {
573 sym->location = loc;
574 s->status = declared;
575 }
576 }
577 }
578 }
579
580
581 /*----------------------------.
582 | Set the token code of SYM. |
583 `----------------------------*/
584
585 void
586 symbol_code_set (symbol *sym, int code, location loc)
587 {
588 int *codep = &sym->content->code;
589 if (sym->content->class != token_sym)
590 complain (&loc, complaint,
591 _("nonterminals cannot be given a token code"));
592 else if (*codep != CODE_UNDEFINED
593 && *codep != code)
594 complain (&loc, complaint, _("redefining code of token %s"),
595 sym->tag);
596 else if (code == INT_MAX)
597 complain (&loc, complaint, _("code of token %s too large"),
598 sym->tag);
599 else
600 {
601 *codep = code;
602 /* User defined $end token? */
603 if (code == 0 && !eoftoken)
604 {
605 eoftoken = sym->content->symbol;
606 eoftoken->content->number = 0;
607 }
608 }
609 }
610
611
612 /*----------------------------------------------------------.
613 | If SYM is not defined, report an error, and consider it a |
614 | nonterminal. |
615 `----------------------------------------------------------*/
616
617 static void
618 symbol_check_defined (symbol *sym)
619 {
620 sym_content *s = sym->content;
621 if (s->class == unknown_sym || s->class == pct_type_sym)
622 {
623 complain_symbol_undeclared (sym);
624 s->class = nterm_sym;
625 }
626
627 if (s->number == NUMBER_UNDEFINED)
628 s->number = s->class == token_sym ? ntokens++ : nnterms++;
629
630 if (s->class == token_sym
631 && sym->tag[0] == '"'
632 && !sym->is_alias)
633 complain (&sym->location, Wdangling_alias,
634 _("string literal %s not attached to a symbol"),
635 sym->tag);
636
637 for (int i = 0; i < 2; ++i)
638 symbol_code_props_get (sym, i)->is_used = true;
639
640 /* Set the semantic type status associated to the current symbol to
641 'declared' so that we could check semantic types unnecessary uses. */
642 if (s->type_name)
643 {
644 semantic_type *sem_type = semantic_type_get (s->type_name, NULL);
645 if (sem_type)
646 sem_type->status = declared;
647 }
648 }
649
650 static void
651 semantic_type_check_defined (semantic_type *sem_type)
652 {
653 /* <*> and <> do not have to be "declared". */
654 if (sem_type->status == declared
655 || !*sem_type->tag
656 || STREQ (sem_type->tag, "*"))
657 {
658 for (int i = 0; i < 2; ++i)
659 if (sem_type->props[i].kind != CODE_PROPS_NONE
660 && ! sem_type->props[i].is_used)
661 complain (&sem_type->location, Wother,
662 _("useless %s for type <%s>"),
663 code_props_type_string (i), sem_type->tag);
664 }
665 else
666 complain (&sem_type->location, Wother,
667 _("type <%s> is used, but is not associated to any symbol"),
668 sem_type->tag);
669 }
670
671 /*-------------------------------------------------------------------.
672 | Merge the properties (precedence, associativity, etc.) of SYM, and |
673 | its string-named alias STR; check consistency. |
674 `-------------------------------------------------------------------*/
675
676 static void
677 symbol_merge_properties (symbol *sym, symbol *str)
678 {
679 if (str->content->type_name != sym->content->type_name)
680 {
681 if (str->content->type_name)
682 symbol_type_set (sym,
683 str->content->type_name, str->content->type_loc);
684 else
685 symbol_type_set (str,
686 sym->content->type_name, sym->content->type_loc);
687 }
688
689
690 for (int i = 0; i < CODE_PROPS_SIZE; ++i)
691 if (str->content->props[i].code)
692 symbol_code_props_set (sym, i, &str->content->props[i]);
693 else if (sym->content->props[i].code)
694 symbol_code_props_set (str, i, &sym->content->props[i]);
695
696 if (sym->content->prec || str->content->prec)
697 {
698 if (str->content->prec)
699 symbol_precedence_set (sym, str->content->prec, str->content->assoc,
700 str->content->prec_loc);
701 else
702 symbol_precedence_set (str, sym->content->prec, sym->content->assoc,
703 sym->content->prec_loc);
704 }
705 }
706
707 void
708 symbol_make_alias (symbol *sym, symbol *str, location loc)
709 {
710 if (sym->content->class != token_sym)
711 complain (&loc, complaint,
712 _("nonterminals cannot be given a string alias"));
713 else if (str->alias)
714 complain (&loc, Wother,
715 _("symbol %s used more than once as a literal string"), str->tag);
716 else if (sym->alias)
717 complain (&loc, Wother,
718 _("symbol %s given more than one literal string"), sym->tag);
719 else
720 {
721 symbol_merge_properties (sym, str);
722 sym_content_free (str->content);
723 str->content = sym->content;
724 str->content->symbol = str;
725 str->is_alias = true;
726 str->alias = sym;
727 sym->alias = str;
728 }
729 }
730
731
732 /*-------------------------------------------------------------------.
733 | Assign a symbol number, and write the definition of the token name |
734 | into FDEFINES. Put in SYMBOLS. |
735 `-------------------------------------------------------------------*/
736
737 static void
738 symbol_pack (symbol *sym)
739 {
740 aver (sym->content->number != NUMBER_UNDEFINED);
741 if (sym->content->class == nterm_sym)
742 sym->content->number += ntokens;
743
744 symbols[sym->content->number] = sym->content->symbol;
745 }
746
747 static void
748 complain_code_redeclared (int num, const symbol *first, const symbol *second)
749 {
750 symbols_sort (&first, &second);
751 complain (&second->location, complaint,
752 _("code %d reassigned to token %s"),
753 num, second->tag);
754 subcomplain (&first->location, complaint,
755 _("previous declaration for %s"),
756 first->tag);
757 }
758
759 /*-------------------------------------------------.
760 | Put SYM in TOKEN_TRANSLATIONS if it is a token. |
761 `-------------------------------------------------*/
762
763 static void
764 symbol_translation (const symbol *sym)
765 {
766 if (sym->content->class == token_sym && !sym->is_alias)
767 {
768 /* A token whose translation has already been set? */
769 if (token_translations[sym->content->code]
770 != undeftoken->content->number)
771 complain_code_redeclared
772 (sym->content->code,
773 symbols[token_translations[sym->content->code]], sym);
774 else
775 token_translations[sym->content->code]
776 = sym->content->number;
777 }
778 }
779
780
781 /*---------------------------------------.
782 | Symbol and semantic type hash tables. |
783 `---------------------------------------*/
784
785 /* Initial capacity of symbol and semantic type hash table. */
786 #define HT_INITIAL_CAPACITY 257
787
788 static inline bool
789 hash_compare_symbol (const symbol *m1, const symbol *m2)
790 {
791 /* Since tags are unique, we can compare the pointers themselves. */
792 return UNIQSTR_EQ (m1->tag, m2->tag);
793 }
794
795 static inline bool
796 hash_compare_semantic_type (const semantic_type *m1, const semantic_type *m2)
797 {
798 /* Since names are unique, we can compare the pointers themselves. */
799 return UNIQSTR_EQ (m1->tag, m2->tag);
800 }
801
802 static bool
803 hash_symbol_comparator (void const *m1, void const *m2)
804 {
805 return hash_compare_symbol (m1, m2);
806 }
807
808 static bool
809 hash_semantic_type_comparator (void const *m1, void const *m2)
810 {
811 return hash_compare_semantic_type (m1, m2);
812 }
813
814 static inline size_t
815 hash_symbol (const symbol *m, size_t tablesize)
816 {
817 /* Since tags are unique, we can hash the pointer itself. */
818 return ((uintptr_t) m->tag) % tablesize;
819 }
820
821 static inline size_t
822 hash_semantic_type (const semantic_type *m, size_t tablesize)
823 {
824 /* Since names are unique, we can hash the pointer itself. */
825 return ((uintptr_t) m->tag) % tablesize;
826 }
827
828 static size_t
829 hash_symbol_hasher (void const *m, size_t tablesize)
830 {
831 return hash_symbol (m, tablesize);
832 }
833
834 static size_t
835 hash_semantic_type_hasher (void const *m, size_t tablesize)
836 {
837 return hash_semantic_type (m, tablesize);
838 }
839
840 /*-------------------------------.
841 | Create the symbol hash table. |
842 `-------------------------------*/
843
844 void
845 symbols_new (void)
846 {
847 symbol_table = hash_xinitialize (HT_INITIAL_CAPACITY,
848 NULL,
849 hash_symbol_hasher,
850 hash_symbol_comparator,
851 symbol_free);
852
853 /* Construct the acceptsymbol symbol. */
854 acceptsymbol = symbol_get ("$accept", empty_loc);
855 acceptsymbol->content->class = nterm_sym;
856 acceptsymbol->content->number = nnterms++;
857
858 /* Construct the YYerror/"error" token */
859 errtoken = symbol_get ("YYerror", empty_loc);
860 errtoken->content->class = token_sym;
861 errtoken->content->number = ntokens++;
862 {
863 symbol *alias = symbol_get ("error", empty_loc);
864 symbol_class_set (alias, token_sym, empty_loc, false);
865 symbol_make_alias (errtoken, alias, empty_loc);
866 }
867
868 /* Construct the YYUNDEF/"$undefined" token that represents all
869 undefined literal tokens. It is always symbol number 2. */
870 undeftoken = symbol_get ("YYUNDEF", empty_loc);
871 undeftoken->content->class = token_sym;
872 undeftoken->content->number = ntokens++;
873 {
874 symbol *alias = symbol_get ("$undefined", empty_loc);
875 symbol_class_set (alias, token_sym, empty_loc, false);
876 symbol_make_alias (undeftoken, alias, empty_loc);
877 }
878
879 semantic_type_table = hash_xinitialize (HT_INITIAL_CAPACITY,
880 NULL,
881 hash_semantic_type_hasher,
882 hash_semantic_type_comparator,
883 free);
884 }
885
886
887 /*----------------------------------------------------------------.
888 | Find the symbol named KEY, and return it. If it does not exist |
889 | yet, create it. |
890 `----------------------------------------------------------------*/
891
892 symbol *
893 symbol_from_uniqstr (const uniqstr key, location loc)
894 {
895 symbol probe;
896
897 probe.tag = key;
898 symbol *res = hash_lookup (symbol_table, &probe);
899
900 if (!res)
901 {
902 /* First insertion in the hash. */
903 aver (!symbols_sorted);
904 res = symbol_new (key, loc);
905 hash_xinsert (symbol_table, res);
906 }
907 return res;
908 }
909
910
911 /*-----------------------------------------------------------------------.
912 | Find the semantic type named KEY, and return it. If it does not exist |
913 | yet, create it. |
914 `-----------------------------------------------------------------------*/
915
916 semantic_type *
917 semantic_type_from_uniqstr (const uniqstr key, const location *loc)
918 {
919 semantic_type probe;
920
921 probe.tag = key;
922 semantic_type *res = hash_lookup (semantic_type_table, &probe);
923
924 if (!res)
925 {
926 /* First insertion in the hash. */
927 res = semantic_type_new (key, loc);
928 hash_xinsert (semantic_type_table, res);
929 }
930 return res;
931 }
932
933
934 /*----------------------------------------------------------------.
935 | Find the symbol named KEY, and return it. If it does not exist |
936 | yet, create it. |
937 `----------------------------------------------------------------*/
938
939 symbol *
940 symbol_get (const char *key, location loc)
941 {
942 return symbol_from_uniqstr (uniqstr_new (key), loc);
943 }
944
945
946 /*-----------------------------------------------------------------------.
947 | Find the semantic type named KEY, and return it. If it does not exist |
948 | yet, create it. |
949 `-----------------------------------------------------------------------*/
950
951 semantic_type *
952 semantic_type_get (const char *key, const location *loc)
953 {
954 return semantic_type_from_uniqstr (uniqstr_new (key), loc);
955 }
956
957
958 /*------------------------------------------------------------------.
959 | Generate a dummy nonterminal, whose name cannot conflict with the |
960 | user's names. |
961 `------------------------------------------------------------------*/
962
963 symbol *
964 dummy_symbol_get (location loc)
965 {
966 /* Incremented for each generated symbol. */
967 static int dummy_count = 0;
968 char buf[32];
969 int len = snprintf (buf, sizeof buf, "$@%d", ++dummy_count);
970 assure (len < sizeof buf);
971 symbol *sym = symbol_get (buf, loc);
972 sym->content->class = nterm_sym;
973 return sym;
974 }
975
976 bool
977 symbol_is_dummy (symbol const *sym)
978 {
979 return sym->tag[0] == '@' || (sym->tag[0] == '$' && sym->tag[1] == '@');
980 }
981
982 /*-------------------.
983 | Free the symbols. |
984 `-------------------*/
985
986 void
987 symbols_free (void)
988 {
989 hash_free (symbol_table);
990 hash_free (semantic_type_table);
991 free (symbols);
992 free (symbols_sorted);
993 free (semantic_types_sorted);
994 }
995
996
997 static int
998 symbol_cmp (void const *a, void const *b)
999 {
1000 return location_cmp ((*(symbol * const *)a)->location,
1001 (*(symbol * const *)b)->location);
1002 }
1003
1004 /* Store in *SORTED an array of pointers to the symbols contained in
1005 TABLE, sorted by order of appearance (i.e., by location). */
1006
1007 static void
1008 table_sort (struct hash_table *table, symbol ***sorted)
1009 {
1010 aver (!*sorted);
1011 size_t count = hash_get_n_entries (table);
1012 *sorted = xnmalloc (count + 1, sizeof **sorted);
1013 hash_get_entries (table, (void**)*sorted, count);
1014 qsort (*sorted, count, sizeof **sorted, symbol_cmp);
1015 (*sorted)[count] = NULL;
1016 }
1017
1018 /*--------------------------------------------------------------.
1019 | Check that all the symbols are defined. Report any undefined |
1020 | symbols and consider them nonterminals. |
1021 `--------------------------------------------------------------*/
1022
1023 void
1024 symbols_check_defined (void)
1025 {
1026 table_sort (symbol_table, &symbols_sorted);
1027 /* semantic_type, like symbol, starts with a 'tag' field and then a
1028 'location' field. And here we only deal with arrays/hashes of
1029 pointers, sizeof is not an issue.
1030
1031 So instead of implementing table_sort (and symbol_cmp) once for
1032 each type, let's lie a bit to the typing system, and treat
1033 'semantic_type' as if it were 'symbol'. */
1034 table_sort (semantic_type_table, (symbol ***) &semantic_types_sorted);
1035
1036 for (int i = 0; symbols_sorted[i]; ++i)
1037 symbol_check_defined (symbols_sorted[i]);
1038 for (int i = 0; semantic_types_sorted[i]; ++i)
1039 semantic_type_check_defined (semantic_types_sorted[i]);
1040 }
1041
1042 /*------------------------------------------------------------------.
1043 | Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1044 | number. |
1045 `------------------------------------------------------------------*/
1046
1047 static void
1048 symbols_token_translations_init (void)
1049 {
1050 bool code_256_available_p = true;
1051
1052 /* Find the highest token code, and whether 256, the POSIX preferred
1053 token code for the error token, is used. */
1054 max_code = 0;
1055 for (int i = 0; i < ntokens; ++i)
1056 {
1057 sym_content *sym = symbols[i]->content;
1058 if (sym->code != CODE_UNDEFINED)
1059 {
1060 if (sym->code > max_code)
1061 max_code = sym->code;
1062 if (sym->code == 256)
1063 code_256_available_p = false;
1064 }
1065 }
1066
1067 /* If 256 is not used, assign it to error, to follow POSIX. */
1068 if (code_256_available_p
1069 && errtoken->content->code == CODE_UNDEFINED)
1070 errtoken->content->code = 256;
1071
1072 /* Set the missing codes. */
1073 if (max_code < 256)
1074 max_code = 256;
1075
1076 for (int i = 0; i < ntokens; ++i)
1077 {
1078 sym_content *sym = symbols[i]->content;
1079 if (sym->code == CODE_UNDEFINED)
1080 {
1081 IGNORE_TYPE_LIMITS_BEGIN
1082 if (INT_ADD_WRAPV (max_code, 1, &max_code))
1083 complain (NULL, fatal, _("token number too large"));
1084 IGNORE_TYPE_LIMITS_END
1085 sym->code = max_code;
1086 }
1087 if (sym->code > max_code)
1088 max_code = sym->code;
1089 }
1090
1091 token_translations = xnmalloc (max_code + 1,
1092 sizeof *token_translations);
1093
1094 /* Initialize all entries for literal tokens to the internal token
1095 number for $undefined, which represents all invalid inputs. */
1096 for (int i = 0; i < max_code + 1; ++i)
1097 token_translations[i] = undeftoken->content->number;
1098 for (int i = 0; symbols_sorted[i]; ++i)
1099 symbol_translation (symbols_sorted[i]);
1100 }
1101
1102
1103 /* Whether some symbol requires internationalization. */
1104 static bool
1105 has_translations (void)
1106 {
1107 for (const void *entry = hash_get_first (symbol_table);
1108 entry;
1109 entry = hash_get_next (symbol_table, entry))
1110 {
1111 const symbol *sym = (const symbol *) entry;
1112 if (sym->translatable)
1113 return true;
1114 }
1115 return false;
1116 }
1117
1118 /*----------------------------------------------------------------.
1119 | Assign symbol numbers, and write definition of token names into |
1120 | FDEFINES. Set up vectors SYMBOL_TABLE, TAGS of symbols. |
1121 `----------------------------------------------------------------*/
1122
1123 void
1124 symbols_pack (void)
1125 {
1126 symbols = xcalloc (nsyms, sizeof *symbols);
1127 for (int i = 0; symbols_sorted[i]; ++i)
1128 symbol_pack (symbols_sorted[i]);
1129
1130 /* Aliases leave empty slots in symbols, so remove them. */
1131 {
1132 int nsyms_old = nsyms;
1133 for (int writei = 0, readi = 0; readi < nsyms_old; readi += 1)
1134 {
1135 if (symbols[readi] == NULL)
1136 {
1137 nsyms -= 1;
1138 ntokens -= 1;
1139 }
1140 else
1141 {
1142 symbols[writei] = symbols[readi];
1143 symbols[writei]->content->number = writei;
1144 writei += 1;
1145 }
1146 }
1147 }
1148 symbols = xnrealloc (symbols, nsyms, sizeof *symbols);
1149
1150 symbols_token_translations_init ();
1151
1152 // If some user tokens are internationalized, the internal ones
1153 // should be too.
1154 if (has_translations ())
1155 {
1156 const bool eof_is_user_defined
1157 = !eoftoken->alias || STRNEQ (eoftoken->alias->tag, "$end");
1158 if (!eof_is_user_defined)
1159 eoftoken->alias->translatable = true;
1160 undeftoken->alias->translatable = true;
1161 errtoken->alias->translatable = true;
1162 }
1163 }
1164
1165 /*---------------------------------.
1166 | Initialize relation graph nodes. |
1167 `---------------------------------*/
1168
1169 static void
1170 init_prec_nodes (void)
1171 {
1172 prec_nodes = xcalloc (nsyms, sizeof *prec_nodes);
1173 for (int i = 0; i < nsyms; ++i)
1174 {
1175 prec_nodes[i] = xmalloc (sizeof *prec_nodes[i]);
1176 symgraph *s = prec_nodes[i];
1177 s->id = i;
1178 s->succ = 0;
1179 s->pred = 0;
1180 }
1181 }
1182
1183 /*----------------.
1184 | Create a link. |
1185 `----------------*/
1186
1187 static symgraphlink *
1188 symgraphlink_new (graphid id, symgraphlink *next)
1189 {
1190 symgraphlink *res = xmalloc (sizeof *res);
1191 res->id = id;
1192 res->next = next;
1193 return res;
1194 }
1195
1196
1197 /*------------------------------------------------------------------.
1198 | Register the second symbol of the precedence relation, and return |
1199 | whether this relation is new. Use only in register_precedence. |
1200 `------------------------------------------------------------------*/
1201
1202 static bool
1203 register_precedence_second_symbol (symgraphlink **first, graphid sym)
1204 {
1205 if (!*first || sym < (*first)->id)
1206 *first = symgraphlink_new (sym, *first);
1207 else
1208 {
1209 symgraphlink *slist = *first;
1210
1211 while (slist->next && slist->next->id <= sym)
1212 slist = slist->next;
1213
1214 if (slist->id == sym)
1215 /* Relation already present. */
1216 return false;
1217
1218 slist->next = symgraphlink_new (sym, slist->next);
1219 }
1220 return true;
1221 }
1222
1223 /*------------------------------------------------------------------.
1224 | Register a new relation between symbols as used. The first symbol |
1225 | has a greater precedence than the second one. |
1226 `------------------------------------------------------------------*/
1227
1228 void
1229 register_precedence (graphid first, graphid snd)
1230 {
1231 if (!prec_nodes)
1232 init_prec_nodes ();
1233 register_precedence_second_symbol (&(prec_nodes[first]->succ), snd);
1234 register_precedence_second_symbol (&(prec_nodes[snd]->pred), first);
1235 }
1236
1237
1238 /*---------------------------------------.
1239 | Deep clear a linked / adjacency list). |
1240 `---------------------------------------*/
1241
1242 static void
1243 linkedlist_free (symgraphlink *node)
1244 {
1245 if (node)
1246 {
1247 while (node->next)
1248 {
1249 symgraphlink *tmp = node->next;
1250 free (node);
1251 node = tmp;
1252 }
1253 free (node);
1254 }
1255 }
1256
1257 /*----------------------------------------------.
1258 | Clear and destroy association tracking table. |
1259 `----------------------------------------------*/
1260
1261 static void
1262 assoc_free (void)
1263 {
1264 for (int i = 0; i < nsyms; ++i)
1265 {
1266 linkedlist_free (prec_nodes[i]->pred);
1267 linkedlist_free (prec_nodes[i]->succ);
1268 free (prec_nodes[i]);
1269 }
1270 free (prec_nodes);
1271 }
1272
1273 /*---------------------------------------.
1274 | Initialize association tracking table. |
1275 `---------------------------------------*/
1276
1277 static void
1278 init_assoc (void)
1279 {
1280 used_assoc = xcalloc (nsyms, sizeof *used_assoc);
1281 for (graphid i = 0; i < nsyms; ++i)
1282 used_assoc[i] = false;
1283 }
1284
1285 /*------------------------------------------------------------------.
1286 | Test if the associativity for the symbols is defined and useless. |
1287 `------------------------------------------------------------------*/
1288
1289 static inline bool
1290 is_assoc_useless (symbol *s)
1291 {
1292 return s
1293 && s->content->assoc != undef_assoc
1294 && s->content->assoc != precedence_assoc
1295 && !used_assoc[s->content->number];
1296 }
1297
1298 /*-------------------------------.
1299 | Register a used associativity. |
1300 `-------------------------------*/
1301
1302 void
1303 register_assoc (graphid i, graphid j)
1304 {
1305 if (!used_assoc)
1306 init_assoc ();
1307 used_assoc[i] = true;
1308 used_assoc[j] = true;
1309 }
1310
1311 /*--------------------------------------------------.
1312 | Print a warning for unused precedence relations. |
1313 `--------------------------------------------------*/
1314
1315 void
1316 print_precedence_warnings (void)
1317 {
1318 if (!prec_nodes)
1319 init_prec_nodes ();
1320 if (!used_assoc)
1321 init_assoc ();
1322 for (int i = 0; i < nsyms; ++i)
1323 {
1324 symbol *s = symbols[i];
1325 if (s
1326 && s->content->prec != 0
1327 && !prec_nodes[i]->pred
1328 && !prec_nodes[i]->succ)
1329 {
1330 if (is_assoc_useless (s))
1331 complain (&s->content->prec_loc, Wprecedence,
1332 _("useless precedence and associativity for %s"), s->tag);
1333 else if (s->content->assoc == precedence_assoc)
1334 complain (&s->content->prec_loc, Wprecedence,
1335 _("useless precedence for %s"), s->tag);
1336 }
1337 else if (is_assoc_useless (s))
1338 complain (&s->content->prec_loc, Wprecedence,
1339 _("useless associativity for %s, use %%precedence"), s->tag);
1340 }
1341 free (used_assoc);
1342 assoc_free ();
1343 }