2 * Copyright (c) 1989 The Regents of the University of California.
5 * This code is derived from software contributed to Berkeley by
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 static char sccsid[] = "@(#)reader.c 5.7 (Berkeley) 1/20/91";
43 /* The line size must be a positive integer. One hundred was chosen */
44 /* because few lines in Yacc input grammars exceed 100 characters. */
45 /* Note that if a line exceeds LINESIZE characters, the line buffer */
46 /* will be expanded to accomodate it. */
74 char *line_format = "\t\t\t\t\t// line %d \"%s\"\n";
75 char *default_line_format = "\t\t\t\t\t// line %d\n";
82 if (cinc >= cache_size)
85 cache = REALLOC(cache, cache_size);
86 if (cache == 0) no_space();
95 register FILE *f = input_file;
99 if (saw_eof || (c = getc(f)) == EOF)
101 if (line) { FREE(line); line = 0; }
107 if (line == 0 || linesize != (LINESIZE + 1))
109 if (line) FREE(line);
110 linesize = LINESIZE + 1;
111 line = MALLOC(linesize);
112 if (line == 0) no_space();
120 if (c == '\n') { cptr = line; return; }
123 linesize += LINESIZE;
124 line = REALLOC(line, linesize);
125 if (line == 0) no_space();
142 register char *p, *s, *t;
144 if (line == 0) return (0);
146 while (*s != '\n') ++s;
147 p = MALLOC(s - line + 1);
148 if (p == 0) no_space();
152 while ((*t++ = *s++) != '\n') continue;
161 int st_lineno = lineno;
162 char *st_line = dup_line();
163 char *st_cptr = st_line + (cptr - line);
168 if (*s == '*' && s[1] == '/')
178 unterminated_comment(st_lineno, st_line, st_cptr);
206 if (line == 0) return (EOF);
232 else if (s[1] == '/')
235 if (line == 0) return (EOF);
263 if (isupper(c)) c = tolower(c);
266 else if (isdigit(c) || c == '_' || c == '.' || c == '$')
274 if (strcmp(cache, "token") == 0 || strcmp(cache, "term") == 0)
276 if (strcmp(cache, "type") == 0)
278 if (strcmp(cache, "left") == 0)
280 if (strcmp(cache, "right") == 0)
282 if (strcmp(cache, "nonassoc") == 0 || strcmp(cache, "binary") == 0)
284 if (strcmp(cache, "start") == 0)
292 if (c == '%' || c == '\\')
303 syntax_error(lineno, line, t_cptr);
313 int need_newline = 0;
314 int t_lineno = lineno;
315 char *t_line = dup_line();
316 char *t_cptr = t_line + (cptr - line - 2);
322 unterminated_text(t_lineno, t_line, t_cptr);
324 fprintf(f, line_format, lineno, input_file_name);
336 unterminated_text(t_lineno, t_line, t_cptr);
341 int s_lineno = lineno;
342 char *s_line = dup_line();
343 char *s_cptr = s_line + (cptr - line - 1);
358 unterminated_string(s_lineno, s_line, s_cptr);
367 unterminated_string(s_lineno, s_line, s_cptr);
379 do putc(c, f); while ((c = *++cptr) != '\n');
384 int c_lineno = lineno;
385 char *c_line = dup_line();
386 char *c_cptr = c_line + (cptr - line - 1);
394 if (c == '*' && *cptr == '/')
405 unterminated_comment(c_lineno, c_line, c_cptr);
416 if (need_newline) putc('\n', f);
434 if (c >= '0' && c <= '9')
436 if (c >= 'A' && c <= 'F')
437 return (c - 'A' + 10);
438 if (c >= 'a' && c <= 'f')
439 return (c - 'a' + 10);
447 register int c, quote;
452 int s_lineno = lineno;
453 char *s_line = dup_line();
454 char *s_cptr = s_line + (cptr - line);
461 if (c == quote) break;
462 if (c == '\n') unterminated_string(s_lineno, s_line, s_cptr);
465 char *c_cptr = cptr - 1;
472 if (line == 0) unterminated_string(s_lineno, s_line, s_cptr);
475 case '0': case '1': case '2': case '3':
476 case '4': case '5': case '6': case '7':
481 n = (n << 3) + (c - '0');
485 n = (n << 3) + (c - '0');
489 if (n > MAXCHAR) illegal_character(c_cptr);
496 if (n < 0 || n >= 16)
497 illegal_character(c_cptr);
502 if (i < 0 || i >= 16) break;
505 if (n > MAXCHAR) illegal_character(c_cptr);
510 case 'a': c = 7; break;
511 case 'b': c = '\b'; break;
512 case 'f': c = '\f'; break;
513 case 'n': c = '\n'; break;
514 case 'r': c = '\r'; break;
515 case 't': c = '\t'; break;
516 case 'v': c = '\v'; break;
525 if (s == 0) no_space();
527 for (i = 0; i < n; ++i)
536 for (i = 0; i < n; ++i)
538 c = ((unsigned char *)s)[i];
539 if (c == '\\' || c == cache[0])
551 case 7: cachec('a'); break;
552 case '\b': cachec('b'); break;
553 case '\f': cachec('f'); break;
554 case '\n': cachec('n'); break;
555 case '\r': cachec('r'); break;
556 case '\t': cachec('t'); break;
557 case '\v': cachec('v'); break;
559 cachec(((c >> 6) & 7) + '0');
560 cachec(((c >> 3) & 7) + '0');
561 cachec((c & 7) + '0');
575 if (n == 1 && bp->value == UNDEFINED)
576 bp->value = *(unsigned char *)s;
589 if (strcmp(name, ".") == 0 ||
590 strcmp(name, "$accept") == 0 ||
591 strcmp(name, "$end") == 0)
594 if (name[0] == '$' && name[1] == '$' && isdigit(name[2]))
597 while (isdigit(*s)) ++s;
598 if (*s == NUL) return (1);
611 for (c = *cptr; IS_IDENT(c); c = *++cptr)
615 if (is_reserved(cache)) used_reserved(cache);
617 return (lookup(cache));
628 for (c = *cptr; isdigit(c); c = *++cptr)
629 n = 10*n + (c - '0');
641 int t_lineno = lineno;
642 char *t_line = dup_line();
643 char *t_cptr = t_line + (cptr - line);
647 if (c == EOF) unexpected_EOF();
648 if (emptyOk && c == '>') {
649 ++cptr; return 0; // 0 indicates empty tag if emptyOk
651 if (!isalpha(c) && c != '_' && c != '$')
652 illegal_tag(t_lineno, t_line, t_cptr);
655 do { cachec(c); c = *++cptr; } while (IS_IDENT(c));
659 if (c == EOF) unexpected_EOF();
661 illegal_tag(t_lineno, t_line, t_cptr);
664 for (i = 0; i < ntags; ++i)
666 if (strcmp(cache, tag_table[i]) == 0)
667 return (tag_table[i]);
673 tag_table = (char **)
674 (tag_table ? REALLOC(tag_table, tagmax*sizeof(char *))
675 : MALLOC(tagmax*sizeof(char *)));
676 if (tag_table == 0) no_space();
680 if (s == 0) no_space();
682 tag_table[ntags] = s;
689 declare_tokens(assoc)
697 if (assoc != TOKEN) ++prec;
700 if (c == EOF) unexpected_EOF();
705 if (c == EOF) unexpected_EOF();
710 if (isalpha(c) || c == '_' || c == '.' || c == '$')
712 else if (c == '\'' || c == '"')
717 if (bp == goal) tokenized_start(bp->name);
722 if (bp->tag && tag != bp->tag)
723 retyped_warning(bp->name);
729 if (bp->prec && prec != bp->prec)
730 reprec_warning(bp->name);
736 if (c == EOF) unexpected_EOF();
740 value = get_number();
741 if (bp->value != UNDEFINED && value != bp->value)
742 revalued_warning(bp->name);
745 if (c == EOF) unexpected_EOF();
758 if (c == EOF) unexpected_EOF();
759 if (c != '<') syntax_error(lineno, line, cptr);
765 if (isalpha(c) || c == '_' || c == '.' || c == '$')
767 else if (c == '\'' || c == '"')
772 if (bp->tag && tag != bp->tag)
773 retyped_warning(bp->name);
785 if (c == EOF) unexpected_EOF();
786 if (!isalpha(c) && c != '_' && c != '.' && c != '$')
787 syntax_error(lineno, line, cptr);
789 if (bp->class == TERM)
790 terminal_start(bp->name);
791 if (goal && goal != bp)
802 cache = MALLOC(cache_size);
803 if (cache == 0) no_space();
808 if (c == EOF) unexpected_EOF();
809 if (c != '%') syntax_error(lineno, line, cptr);
810 switch (k = keyword())
816 copy_text(prolog_file);
842 pitem = (bucket **) MALLOC(maxitems*sizeof(bucket *));
843 if (pitem == 0) no_space();
851 plhs = (bucket **) MALLOC(maxrules*sizeof(bucket *));
852 if (plhs == 0) no_space();
856 rprec = (short *) MALLOC(maxrules*sizeof(short));
857 if (rprec == 0) no_space();
861 rassoc = (char *) MALLOC(maxrules*sizeof(char));
862 if (rassoc == 0) no_space();
872 pitem = (bucket **) REALLOC(pitem, maxitems*sizeof(bucket *));
873 if (pitem == 0) no_space();
880 plhs = (bucket **) REALLOC(plhs, maxrules*sizeof(bucket *));
881 if (plhs == 0) no_space();
882 rprec = (short *) REALLOC(rprec, maxrules*sizeof(short));
883 if (rprec == 0) no_space();
884 rassoc = (char *) REALLOC(rassoc, maxrules*sizeof(char));
885 if (rassoc == 0) no_space();
907 copy_text(local_file);
915 syntax_error(lineno, line, s_cptr);
920 if (!isalpha(c) && c != '_' && c != '.' && c != '_')
921 syntax_error(lineno, line, cptr);
925 if (bp->class == TERM)
926 terminal_start(bp->name);
932 if (c == EOF) unexpected_EOF();
933 if (c != ':') syntax_error(lineno, line, cptr);
934 start_rule(bp, s_lineno);
939 start_rule(bp, s_lineno)
943 if (bp->class == TERM)
944 terminal_lhs(s_lineno);
946 if (nrules >= maxrules)
949 rprec[nrules] = UNDEFINED;
950 rassoc[nrules] = TOKEN;
958 if (!last_was_action && plhs[nrules]->tag)
960 for (i = nitems - 1; pitem[i]; --i) continue;
961 if (pitem[i+1] == 0 || pitem[i+1]->tag != plhs[nrules]->tag)
962 default_action_warning(); /** if classes don't match exactly **/
963 } /** bug: could be superclass... **/
966 if (nitems >= maxitems) expand_items();
975 register bucket *bp, **bpp;
978 sprintf(cache, "$$%d", ++gensym);
979 bp = make_bucket(cache);
980 last_symbol->next = bp;
982 bp->tag = plhs[nrules]->tag;
985 if ((nitems += 2) > maxitems)
987 bpp = pitem + nitems - 1;
989 while (bpp[0] = bpp[-1]) --bpp;
991 if (++nrules >= maxrules)
993 plhs[nrules] = plhs[nrules-1];
995 rprec[nrules] = rprec[nrules-1];
997 rassoc[nrules] = rassoc[nrules-1];
998 rassoc[nrules-1] = TOKEN;
1005 register bucket *bp;
1006 int s_lineno = lineno;
1009 if (c == '\'' || c == '"')
1018 start_rule(bp, s_lineno);
1023 if (last_was_action)
1024 insert_empty_rule();
1025 last_was_action = 0;
1027 if (++nitems > maxitems)
1029 pitem[nitems-1] = bp;
1040 register FILE *f = action_file;
1041 int a_lineno = lineno;
1042 char *a_line = dup_line();
1043 char *a_cptr = a_line + (cptr - line);
1045 if (last_was_action)
1046 insert_empty_rule();
1047 last_was_action = 1;
1049 fprintf(f, "case %d:\n", nrules - 2);
1050 fprintf(f, line_format, lineno, input_file_name);
1051 putc(' ', f); putc(' ', f);
1052 if (*cptr == '=') ++cptr;
1055 for (i = nitems - 1; pitem[i]; --i) ++n;
1064 int d_lineno = lineno;
1065 char *d_line = dup_line();
1066 char *d_cptr = d_line + (cptr - line);
1072 { if (tag && strcmp(tag, "Object"))
1073 fprintf(f, "((%s)yyVal)", tag);
1074 else fprintf(f, "yyVal");
1079 else if (isdigit(c))
1082 if (i > n) dollar_warning(d_lineno, i);
1083 if (tag && strcmp(tag, "Object"))
1084 fprintf(f, "((%s)yyVals[%d+yyTop])", tag, i - n);
1085 else fprintf(f, "yyVals[%d+yyTop]", i - n);
1089 else if (c == '-' && isdigit(cptr[1]))
1092 i = -get_number() - n;
1093 if (tag && strcmp(tag, "Object"))
1094 fprintf(f, "((%s)yyVals[%d+yyTop])", tag, i);
1095 else fprintf(f, "yyVals[%d+yyTop]", i);
1100 dollar_error(d_lineno, d_line, d_cptr);
1102 else if (cptr[1] == '$')
1104 if (ntags && plhs[nrules]->tag == 0)
1106 fprintf(f, "yyVal");
1110 else if (isdigit(cptr[1]))
1116 if (i <= 0 || i > n)
1118 tag = pitem[nitems + i - n - 1]->tag;
1120 untyped_rhs(i, pitem[nitems + i - n - 1]->name),
1121 fprintf(f, "yyVals[%d+yyTop]", i - n);
1122 else if (strcmp(tag, "Object"))
1123 fprintf(f, "((%s)yyVals[%d+yyTop])", tag, i - n);
1125 fprintf(f, "yyVals[%d+yyTop]", i - n);
1130 dollar_warning(lineno, i);
1131 fprintf(f, "yyVals[%d+yyTop]", i - n);
1135 else if (cptr[1] == '-')
1141 fprintf(f, "yyVals[%d+yyTop]", -i - n);
1145 if (isalpha(c) || c == '_' || c == '$')
1151 } while (isalnum(c) || c == '_' || c == '$');
1161 if (line) goto loop;
1162 unterminated_action(a_lineno, a_line, a_cptr);
1165 if (depth > 0) goto loop;
1166 fprintf(f, "\nbreak;\n");
1174 if (--depth > 0) goto loop;
1175 fprintf(f, "\n break;\n");
1181 int s_lineno = lineno;
1182 char *s_line = dup_line();
1183 char *s_cptr = s_line + (cptr - line - 1);
1196 unterminated_string(s_lineno, s_line, s_cptr);
1205 unterminated_string(s_lineno, s_line, s_cptr);
1216 while ((c = *++cptr) != '\n')
1218 if (c == '*' && cptr[1] == '/')
1228 int c_lineno = lineno;
1229 char *c_line = dup_line();
1230 char *c_cptr = c_line + (cptr - line - 1);
1238 if (c == '*' && *cptr == '/')
1249 unterminated_comment(c_lineno, c_line, c_cptr);
1265 register bucket *bp;
1268 if (c == '%' || c == '\\')
1276 else if ((c == 'p' || c == 'P') &&
1277 ((c = cptr[2]) == 'r' || c == 'R') &&
1278 ((c = cptr[3]) == 'e' || c == 'E') &&
1279 ((c = cptr[4]) == 'c' || c == 'C') &&
1280 ((c = cptr[5], !IS_IDENT(c))))
1283 syntax_error(lineno, line, cptr);
1286 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1288 else if (c == '\'' || c == '"')
1292 syntax_error(lineno, line, cptr);
1296 if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules])
1299 rprec[nrules] = bp->prec;
1300 rassoc[nrules] = bp->assoc;
1309 initialize_grammar();
1315 if (c == EOF) break;
1316 if (isalpha(c) || c == '_' || c == '.' || c == '$' || c == '\'' ||
1319 else if (c == '{' || c == '=')
1324 start_rule(plhs[nrules-1], 0);
1329 if (mark_symbol()) break;
1332 syntax_error(lineno, line, cptr);
1342 if (tag_table == 0) return;
1344 for (i = 0; i < ntags; ++i)
1346 assert(tag_table[i]);
1355 register bucket *bp;
1356 register char *p, *s, *t;
1358 name_pool_size = 13; /* 13 == sizeof("$end") + sizeof("$accept") */
1359 for (bp = first_symbol; bp; bp = bp->next)
1360 name_pool_size += strlen(bp->name) + 1;
1361 name_pool = MALLOC(name_pool_size);
1362 if (name_pool == 0) no_space();
1364 strcpy(name_pool, "$accept");
1365 strcpy(name_pool+8, "$end");
1367 for (bp = first_symbol; bp; bp = bp->next)
1371 while (*t++ = *s++) continue;
1380 register bucket *bp;
1382 if (goal->class == UNKNOWN)
1383 undefined_goal(goal->name);
1385 for (bp = first_symbol; bp; bp = bp->next)
1387 if (bp->class == UNKNOWN)
1389 undefined_symbol_warning(bp->name);
1398 register bucket *bp;
1399 register bucket **v;
1400 register int i, j, k, n;
1404 for (bp = first_symbol; bp; bp = bp->next)
1407 if (bp->class == TERM) ++ntokens;
1409 start_symbol = ntokens;
1410 nvars = nsyms - ntokens;
1412 symbol_name = (char **) MALLOC(nsyms*sizeof(char *));
1413 if (symbol_name == 0) no_space();
1414 symbol_value = (short *) MALLOC(nsyms*sizeof(short));
1415 if (symbol_value == 0) no_space();
1416 symbol_prec = (short *) MALLOC(nsyms*sizeof(short));
1417 if (symbol_prec == 0) no_space();
1418 symbol_assoc = MALLOC(nsyms);
1419 if (symbol_assoc == 0) no_space();
1421 v = (bucket **) MALLOC(nsyms*sizeof(bucket *));
1422 if (v == 0) no_space();
1425 v[start_symbol] = 0;
1428 j = start_symbol + 1;
1429 for (bp = first_symbol; bp; bp = bp->next)
1431 if (bp->class == TERM)
1436 assert(i == ntokens && j == nsyms);
1438 for (i = 1; i < ntokens; ++i)
1441 goal->index = start_symbol + 1;
1442 k = start_symbol + 2;
1452 for (i = start_symbol + 1; i < nsyms; ++i)
1462 for (i = 1; i < ntokens; ++i)
1467 for (j = k++; j > 0 && symbol_value[j-1] > n; --j)
1468 symbol_value[j] = symbol_value[j-1];
1469 symbol_value[j] = n;
1473 if (v[1]->value == UNDEFINED)
1478 for (i = 2; i < ntokens; ++i)
1480 if (v[i]->value == UNDEFINED)
1482 while (j < k && n == symbol_value[j])
1484 while (++j < k && n == symbol_value[j]) continue;
1492 symbol_name[0] = name_pool + 8;
1493 symbol_value[0] = 0;
1495 symbol_assoc[0] = TOKEN;
1496 for (i = 1; i < ntokens; ++i)
1498 symbol_name[i] = v[i]->name;
1499 symbol_value[i] = v[i]->value;
1500 symbol_prec[i] = v[i]->prec;
1501 symbol_assoc[i] = v[i]->assoc;
1503 symbol_name[start_symbol] = name_pool;
1504 symbol_value[start_symbol] = -1;
1505 symbol_prec[start_symbol] = 0;
1506 symbol_assoc[start_symbol] = TOKEN;
1507 for (++i; i < nsyms; ++i)
1510 symbol_name[k] = v[i]->name;
1511 symbol_value[k] = v[i]->value;
1512 symbol_prec[k] = v[i]->prec;
1513 symbol_assoc[k] = v[i]->assoc;
1525 ritem = (short *) MALLOC(nitems*sizeof(short));
1526 if (ritem == 0) no_space();
1527 rlhs = (short *) MALLOC(nrules*sizeof(short));
1528 if (rlhs == 0) no_space();
1529 rrhs = (short *) MALLOC((nrules+1)*sizeof(short));
1530 if (rrhs == 0) no_space();
1531 rprec = (short *) REALLOC(rprec, nrules*sizeof(short));
1532 if (rprec == 0) no_space();
1533 rassoc = REALLOC(rassoc, nrules);
1534 if (rassoc == 0) no_space();
1537 ritem[1] = goal->index;
1542 rlhs[2] = start_symbol;
1548 for (i = 3; i < nrules; ++i)
1550 rlhs[i] = plhs[i]->index;
1556 ritem[j] = pitem[j]->index;
1557 if (pitem[j]->class == TERM)
1559 prec = pitem[j]->prec;
1560 assoc = pitem[j]->assoc;
1566 if (rprec[i] == UNDEFINED)
1581 register int i, j, k;
1583 register FILE *f = verbose_file;
1588 for (i = 2; i < nrules; ++i)
1590 if (rlhs[i] != rlhs[i-1])
1592 if (i != 2) fprintf(f, "\n");
1593 fprintf(f, "%4d %s :", i - 2, symbol_name[rlhs[i]]);
1594 spacing = strlen(symbol_name[rlhs[i]]) + 1;
1598 fprintf(f, "%4d ", i - 2);
1600 while (--j >= 0) putc(' ', f);
1604 while (ritem[k] >= 0)
1606 fprintf(f, " %s", symbol_name[ritem[k]]);
1617 create_symbol_table();
1618 read_declarations();
1620 free_symbol_table();