2 * Copyright (c) 1989 The Regents of the University of California.
5 * This code is derived from software contributed to Berkeley by
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 static char sccsid[] = "@(#)reader.c 5.7 (Berkeley) 1/20/91";
43 /* The line size must be a positive integer. One hundred was chosen */
44 /* because few lines in Yacc input grammars exceed 100 characters. */
45 /* Note that if a line exceeds LINESIZE characters, the line buffer */
46 /* will be expanded to accomodate it. */
74 char *line_format = "\t\t\t\t\t// line %d \"%s\"\n";
81 if (cinc >= cache_size)
84 cache = REALLOC(cache, cache_size);
85 if (cache == 0) no_space();
94 register FILE *f = input_file;
98 if (saw_eof || (c = getc(f)) == EOF)
100 if (line) { FREE(line); line = 0; }
106 if (line == 0 || linesize != (LINESIZE + 1))
108 if (line) FREE(line);
109 linesize = LINESIZE + 1;
110 line = MALLOC(linesize);
111 if (line == 0) no_space();
119 if (c == '\n') { cptr = line; return; }
122 linesize += LINESIZE;
123 line = REALLOC(line, linesize);
124 if (line == 0) no_space();
141 register char *p, *s, *t;
143 if (line == 0) return (0);
145 while (*s != '\n') ++s;
146 p = MALLOC(s - line + 1);
147 if (p == 0) no_space();
151 while ((*t++ = *s++) != '\n') continue;
160 int st_lineno = lineno;
161 char *st_line = dup_line();
162 char *st_cptr = st_line + (cptr - line);
167 if (*s == '*' && s[1] == '/')
177 unterminated_comment(st_lineno, st_line, st_cptr);
205 if (line == 0) return (EOF);
231 else if (s[1] == '/')
234 if (line == 0) return (EOF);
262 if (isupper(c)) c = tolower(c);
265 else if (isdigit(c) || c == '_' || c == '.' || c == '$')
273 if (strcmp(cache, "token") == 0 || strcmp(cache, "term") == 0)
275 if (strcmp(cache, "type") == 0)
277 if (strcmp(cache, "left") == 0)
279 if (strcmp(cache, "right") == 0)
281 if (strcmp(cache, "nonassoc") == 0 || strcmp(cache, "binary") == 0)
283 if (strcmp(cache, "start") == 0)
291 if (c == '%' || c == '\\')
302 syntax_error(lineno, line, t_cptr);
312 int need_newline = 0;
313 int t_lineno = lineno;
314 char *t_line = dup_line();
315 char *t_cptr = t_line + (cptr - line - 2);
321 unterminated_text(t_lineno, t_line, t_cptr);
323 fprintf(f, line_format, lineno, input_file_name);
335 unterminated_text(t_lineno, t_line, t_cptr);
340 int s_lineno = lineno;
341 char *s_line = dup_line();
342 char *s_cptr = s_line + (cptr - line - 1);
357 unterminated_string(s_lineno, s_line, s_cptr);
366 unterminated_string(s_lineno, s_line, s_cptr);
378 do putc(c, f); while ((c = *++cptr) != '\n');
383 int c_lineno = lineno;
384 char *c_line = dup_line();
385 char *c_cptr = c_line + (cptr - line - 1);
393 if (c == '*' && *cptr == '/')
404 unterminated_comment(c_lineno, c_line, c_cptr);
415 if (need_newline) putc('\n', f);
433 if (c >= '0' && c <= '9')
435 if (c >= 'A' && c <= 'F')
436 return (c - 'A' + 10);
437 if (c >= 'a' && c <= 'f')
438 return (c - 'a' + 10);
446 register int c, quote;
451 int s_lineno = lineno;
452 char *s_line = dup_line();
453 char *s_cptr = s_line + (cptr - line);
460 if (c == quote) break;
461 if (c == '\n') unterminated_string(s_lineno, s_line, s_cptr);
464 char *c_cptr = cptr - 1;
471 if (line == 0) unterminated_string(s_lineno, s_line, s_cptr);
474 case '0': case '1': case '2': case '3':
475 case '4': case '5': case '6': case '7':
480 n = (n << 3) + (c - '0');
484 n = (n << 3) + (c - '0');
488 if (n > MAXCHAR) illegal_character(c_cptr);
495 if (n < 0 || n >= 16)
496 illegal_character(c_cptr);
501 if (i < 0 || i >= 16) break;
504 if (n > MAXCHAR) illegal_character(c_cptr);
509 case 'a': c = 7; break;
510 case 'b': c = '\b'; break;
511 case 'f': c = '\f'; break;
512 case 'n': c = '\n'; break;
513 case 'r': c = '\r'; break;
514 case 't': c = '\t'; break;
515 case 'v': c = '\v'; break;
524 if (s == 0) no_space();
526 for (i = 0; i < n; ++i)
535 for (i = 0; i < n; ++i)
537 c = ((unsigned char *)s)[i];
538 if (c == '\\' || c == cache[0])
550 case 7: cachec('a'); break;
551 case '\b': cachec('b'); break;
552 case '\f': cachec('f'); break;
553 case '\n': cachec('n'); break;
554 case '\r': cachec('r'); break;
555 case '\t': cachec('t'); break;
556 case '\v': cachec('v'); break;
558 cachec(((c >> 6) & 7) + '0');
559 cachec(((c >> 3) & 7) + '0');
560 cachec((c & 7) + '0');
574 if (n == 1 && bp->value == UNDEFINED)
575 bp->value = *(unsigned char *)s;
588 if (strcmp(name, ".") == 0 ||
589 strcmp(name, "$accept") == 0 ||
590 strcmp(name, "$end") == 0)
593 if (name[0] == '$' && name[1] == '$' && isdigit(name[2]))
596 while (isdigit(*s)) ++s;
597 if (*s == NUL) return (1);
610 for (c = *cptr; IS_IDENT(c); c = *++cptr)
614 if (is_reserved(cache)) used_reserved(cache);
616 return (lookup(cache));
627 for (c = *cptr; isdigit(c); c = *++cptr)
628 n = 10*n + (c - '0');
640 int t_lineno = lineno;
641 char *t_line = dup_line();
642 char *t_cptr = t_line + (cptr - line);
646 if (c == EOF) unexpected_EOF();
647 if (emptyOk && c == '>') {
648 ++cptr; return 0; // 0 indicates empty tag if emptyOk
650 if (!isalpha(c) && c != '_' && c != '$')
651 illegal_tag(t_lineno, t_line, t_cptr);
654 do { cachec(c); c = *++cptr; } while (IS_IDENT(c));
658 if (c == EOF) unexpected_EOF();
660 illegal_tag(t_lineno, t_line, t_cptr);
663 for (i = 0; i < ntags; ++i)
665 if (strcmp(cache, tag_table[i]) == 0)
666 return (tag_table[i]);
672 tag_table = (char **)
673 (tag_table ? REALLOC(tag_table, tagmax*sizeof(char *))
674 : MALLOC(tagmax*sizeof(char *)));
675 if (tag_table == 0) no_space();
679 if (s == 0) no_space();
681 tag_table[ntags] = s;
688 declare_tokens(assoc)
696 if (assoc != TOKEN) ++prec;
699 if (c == EOF) unexpected_EOF();
704 if (c == EOF) unexpected_EOF();
709 if (isalpha(c) || c == '_' || c == '.' || c == '$')
711 else if (c == '\'' || c == '"')
716 if (bp == goal) tokenized_start(bp->name);
721 if (bp->tag && tag != bp->tag)
722 retyped_warning(bp->name);
728 if (bp->prec && prec != bp->prec)
729 reprec_warning(bp->name);
735 if (c == EOF) unexpected_EOF();
739 value = get_number();
740 if (bp->value != UNDEFINED && value != bp->value)
741 revalued_warning(bp->name);
744 if (c == EOF) unexpected_EOF();
757 if (c == EOF) unexpected_EOF();
758 if (c != '<') syntax_error(lineno, line, cptr);
764 if (isalpha(c) || c == '_' || c == '.' || c == '$')
766 else if (c == '\'' || c == '"')
771 if (bp->tag && tag != bp->tag)
772 retyped_warning(bp->name);
784 if (c == EOF) unexpected_EOF();
785 if (!isalpha(c) && c != '_' && c != '.' && c != '$')
786 syntax_error(lineno, line, cptr);
788 if (bp->class == TERM)
789 terminal_start(bp->name);
790 if (goal && goal != bp)
801 cache = MALLOC(cache_size);
802 if (cache == 0) no_space();
807 if (c == EOF) unexpected_EOF();
808 if (c != '%') syntax_error(lineno, line, cptr);
809 switch (k = keyword())
815 copy_text(prolog_file);
841 pitem = (bucket **) MALLOC(maxitems*sizeof(bucket *));
842 if (pitem == 0) no_space();
850 plhs = (bucket **) MALLOC(maxrules*sizeof(bucket *));
851 if (plhs == 0) no_space();
855 rprec = (short *) MALLOC(maxrules*sizeof(short));
856 if (rprec == 0) no_space();
860 rassoc = (char *) MALLOC(maxrules*sizeof(char));
861 if (rassoc == 0) no_space();
871 pitem = (bucket **) REALLOC(pitem, maxitems*sizeof(bucket *));
872 if (pitem == 0) no_space();
879 plhs = (bucket **) REALLOC(plhs, maxrules*sizeof(bucket *));
880 if (plhs == 0) no_space();
881 rprec = (short *) REALLOC(rprec, maxrules*sizeof(short));
882 if (rprec == 0) no_space();
883 rassoc = (char *) REALLOC(rassoc, maxrules*sizeof(char));
884 if (rassoc == 0) no_space();
906 copy_text(local_file);
914 syntax_error(lineno, line, s_cptr);
919 if (!isalpha(c) && c != '_' && c != '.' && c != '_')
920 syntax_error(lineno, line, cptr);
924 if (bp->class == TERM)
925 terminal_start(bp->name);
931 if (c == EOF) unexpected_EOF();
932 if (c != ':') syntax_error(lineno, line, cptr);
933 start_rule(bp, s_lineno);
938 start_rule(bp, s_lineno)
942 if (bp->class == TERM)
943 terminal_lhs(s_lineno);
945 if (nrules >= maxrules)
948 rprec[nrules] = UNDEFINED;
949 rassoc[nrules] = TOKEN;
957 if (!last_was_action && plhs[nrules]->tag)
959 for (i = nitems - 1; pitem[i]; --i) continue;
960 if (pitem[i+1] == 0 || pitem[i+1]->tag != plhs[nrules]->tag)
961 default_action_warning(); /** if classes don't match exactly **/
962 } /** bug: could be superclass... **/
965 if (nitems >= maxitems) expand_items();
974 register bucket *bp, **bpp;
977 sprintf(cache, "$$%d", ++gensym);
978 bp = make_bucket(cache);
979 last_symbol->next = bp;
981 bp->tag = plhs[nrules]->tag;
984 if ((nitems += 2) > maxitems)
986 bpp = pitem + nitems - 1;
988 while (bpp[0] = bpp[-1]) --bpp;
990 if (++nrules >= maxrules)
992 plhs[nrules] = plhs[nrules-1];
994 rprec[nrules] = rprec[nrules-1];
996 rassoc[nrules] = rassoc[nrules-1];
997 rassoc[nrules-1] = TOKEN;
1004 register bucket *bp;
1005 int s_lineno = lineno;
1008 if (c == '\'' || c == '"')
1017 start_rule(bp, s_lineno);
1022 if (last_was_action)
1023 insert_empty_rule();
1024 last_was_action = 0;
1026 if (++nitems > maxitems)
1028 pitem[nitems-1] = bp;
1039 register FILE *f = action_file;
1040 int a_lineno = lineno;
1041 char *a_line = dup_line();
1042 char *a_cptr = a_line + (cptr - line);
1044 if (last_was_action)
1045 insert_empty_rule();
1046 last_was_action = 1;
1048 fprintf(f, "case %d:\n", nrules - 2);
1049 fprintf(f, line_format, lineno, input_file_name);
1050 putc(' ', f); putc(' ', f);
1051 if (*cptr == '=') ++cptr;
1054 for (i = nitems - 1; pitem[i]; --i) ++n;
1063 int d_lineno = lineno;
1064 char *d_line = dup_line();
1065 char *d_cptr = d_line + (cptr - line);
1071 { if (tag && strcmp(tag, "Object"))
1072 fprintf(f, "((%s)yyVal)", tag);
1073 else fprintf(f, "yyVal");
1078 else if (isdigit(c))
1081 if (i > n) dollar_warning(d_lineno, i);
1082 if (tag && strcmp(tag, "Object"))
1083 fprintf(f, "((%s)yyVals[%d+yyTop])", tag, i - n);
1084 else fprintf(f, "yyVals[%d+yyTop]", i - n);
1088 else if (c == '-' && isdigit(cptr[1]))
1091 i = -get_number() - n;
1092 if (tag && strcmp(tag, "Object"))
1093 fprintf(f, "((%s)yyVals[%d+yyTop])", tag, i);
1094 else fprintf(f, "yyVals[%d+yyTop]", tag, i);
1099 dollar_error(d_lineno, d_line, d_cptr);
1101 else if (cptr[1] == '$')
1103 if (ntags && plhs[nrules]->tag == 0)
1105 fprintf(f, "yyVal");
1109 else if (isdigit(cptr[1]))
1115 if (i <= 0 || i > n)
1117 tag = pitem[nitems + i - n - 1]->tag;
1119 untyped_rhs(i, pitem[nitems + i - n - 1]->name),
1120 fprintf(f, "yyVals[%d+yyTop]", i - n);
1121 else if (strcmp(tag, "Object"))
1122 fprintf(f, "((%s)yyVals[%d+yyTop])", tag, i - n);
1124 fprintf(f, "yyVals[%d+yyTop]", i - n);
1129 dollar_warning(lineno, i);
1130 fprintf(f, "yyVals[%d+yyTop]", i - n);
1134 else if (cptr[1] == '-')
1140 fprintf(f, "yyVals[%d+yyTop]", -i - n);
1144 if (isalpha(c) || c == '_' || c == '$')
1150 } while (isalnum(c) || c == '_' || c == '$');
1160 if (line) goto loop;
1161 unterminated_action(a_lineno, a_line, a_cptr);
1164 if (depth > 0) goto loop;
1165 fprintf(f, "\nbreak;\n");
1173 if (--depth > 0) goto loop;
1174 fprintf(f, "\n break;\n");
1180 int s_lineno = lineno;
1181 char *s_line = dup_line();
1182 char *s_cptr = s_line + (cptr - line - 1);
1195 unterminated_string(s_lineno, s_line, s_cptr);
1204 unterminated_string(s_lineno, s_line, s_cptr);
1215 while ((c = *++cptr) != '\n')
1217 if (c == '*' && cptr[1] == '/')
1227 int c_lineno = lineno;
1228 char *c_line = dup_line();
1229 char *c_cptr = c_line + (cptr - line - 1);
1237 if (c == '*' && *cptr == '/')
1248 unterminated_comment(c_lineno, c_line, c_cptr);
1264 register bucket *bp;
1267 if (c == '%' || c == '\\')
1275 else if ((c == 'p' || c == 'P') &&
1276 ((c = cptr[2]) == 'r' || c == 'R') &&
1277 ((c = cptr[3]) == 'e' || c == 'E') &&
1278 ((c = cptr[4]) == 'c' || c == 'C') &&
1279 ((c = cptr[5], !IS_IDENT(c))))
1282 syntax_error(lineno, line, cptr);
1285 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1287 else if (c == '\'' || c == '"')
1291 syntax_error(lineno, line, cptr);
1295 if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules])
1298 rprec[nrules] = bp->prec;
1299 rassoc[nrules] = bp->assoc;
1308 initialize_grammar();
1314 if (c == EOF) break;
1315 if (isalpha(c) || c == '_' || c == '.' || c == '$' || c == '\'' ||
1318 else if (c == '{' || c == '=')
1323 start_rule(plhs[nrules-1], 0);
1328 if (mark_symbol()) break;
1331 syntax_error(lineno, line, cptr);
1341 if (tag_table == 0) return;
1343 for (i = 0; i < ntags; ++i)
1345 assert(tag_table[i]);
1354 register bucket *bp;
1355 register char *p, *s, *t;
1357 name_pool_size = 13; /* 13 == sizeof("$end") + sizeof("$accept") */
1358 for (bp = first_symbol; bp; bp = bp->next)
1359 name_pool_size += strlen(bp->name) + 1;
1360 name_pool = MALLOC(name_pool_size);
1361 if (name_pool == 0) no_space();
1363 strcpy(name_pool, "$accept");
1364 strcpy(name_pool+8, "$end");
1366 for (bp = first_symbol; bp; bp = bp->next)
1370 while (*t++ = *s++) continue;
1379 register bucket *bp;
1381 if (goal->class == UNKNOWN)
1382 undefined_goal(goal->name);
1384 for (bp = first_symbol; bp; bp = bp->next)
1386 if (bp->class == UNKNOWN)
1388 undefined_symbol_warning(bp->name);
1397 register bucket *bp;
1398 register bucket **v;
1399 register int i, j, k, n;
1403 for (bp = first_symbol; bp; bp = bp->next)
1406 if (bp->class == TERM) ++ntokens;
1408 start_symbol = ntokens;
1409 nvars = nsyms - ntokens;
1411 symbol_name = (char **) MALLOC(nsyms*sizeof(char *));
1412 if (symbol_name == 0) no_space();
1413 symbol_value = (short *) MALLOC(nsyms*sizeof(short));
1414 if (symbol_value == 0) no_space();
1415 symbol_prec = (short *) MALLOC(nsyms*sizeof(short));
1416 if (symbol_prec == 0) no_space();
1417 symbol_assoc = MALLOC(nsyms);
1418 if (symbol_assoc == 0) no_space();
1420 v = (bucket **) MALLOC(nsyms*sizeof(bucket *));
1421 if (v == 0) no_space();
1424 v[start_symbol] = 0;
1427 j = start_symbol + 1;
1428 for (bp = first_symbol; bp; bp = bp->next)
1430 if (bp->class == TERM)
1435 assert(i == ntokens && j == nsyms);
1437 for (i = 1; i < ntokens; ++i)
1440 goal->index = start_symbol + 1;
1441 k = start_symbol + 2;
1451 for (i = start_symbol + 1; i < nsyms; ++i)
1461 for (i = 1; i < ntokens; ++i)
1466 for (j = k++; j > 0 && symbol_value[j-1] > n; --j)
1467 symbol_value[j] = symbol_value[j-1];
1468 symbol_value[j] = n;
1472 if (v[1]->value == UNDEFINED)
1477 for (i = 2; i < ntokens; ++i)
1479 if (v[i]->value == UNDEFINED)
1481 while (j < k && n == symbol_value[j])
1483 while (++j < k && n == symbol_value[j]) continue;
1491 symbol_name[0] = name_pool + 8;
1492 symbol_value[0] = 0;
1494 symbol_assoc[0] = TOKEN;
1495 for (i = 1; i < ntokens; ++i)
1497 symbol_name[i] = v[i]->name;
1498 symbol_value[i] = v[i]->value;
1499 symbol_prec[i] = v[i]->prec;
1500 symbol_assoc[i] = v[i]->assoc;
1502 symbol_name[start_symbol] = name_pool;
1503 symbol_value[start_symbol] = -1;
1504 symbol_prec[start_symbol] = 0;
1505 symbol_assoc[start_symbol] = TOKEN;
1506 for (++i; i < nsyms; ++i)
1509 symbol_name[k] = v[i]->name;
1510 symbol_value[k] = v[i]->value;
1511 symbol_prec[k] = v[i]->prec;
1512 symbol_assoc[k] = v[i]->assoc;
1524 ritem = (short *) MALLOC(nitems*sizeof(short));
1525 if (ritem == 0) no_space();
1526 rlhs = (short *) MALLOC(nrules*sizeof(short));
1527 if (rlhs == 0) no_space();
1528 rrhs = (short *) MALLOC((nrules+1)*sizeof(short));
1529 if (rrhs == 0) no_space();
1530 rprec = (short *) REALLOC(rprec, nrules*sizeof(short));
1531 if (rprec == 0) no_space();
1532 rassoc = REALLOC(rassoc, nrules);
1533 if (rassoc == 0) no_space();
1536 ritem[1] = goal->index;
1541 rlhs[2] = start_symbol;
1547 for (i = 3; i < nrules; ++i)
1549 rlhs[i] = plhs[i]->index;
1555 ritem[j] = pitem[j]->index;
1556 if (pitem[j]->class == TERM)
1558 prec = pitem[j]->prec;
1559 assoc = pitem[j]->assoc;
1565 if (rprec[i] == UNDEFINED)
1580 register int i, j, k;
1582 register FILE *f = verbose_file;
1587 for (i = 2; i < nrules; ++i)
1589 if (rlhs[i] != rlhs[i-1])
1591 if (i != 2) fprintf(f, "\n");
1592 fprintf(f, "%4d %s :", i - 2, symbol_name[rlhs[i]]);
1593 spacing = strlen(symbol_name[rlhs[i]]) + 1;
1597 fprintf(f, "%4d ", i - 2);
1599 while (--j >= 0) putc(' ', f);
1603 while (ritem[k] >= 0)
1605 fprintf(f, " %s", symbol_name[ritem[k]]);
1616 create_symbol_table();
1617 read_declarations();
1619 free_symbol_table();