2 * monoburg.c: an iburg like code generator generator
5 * Dietmar Maurer (dietmar@ximian.com)
7 * (C) 2001 Ximian, Inc.
14 extern void yyparse (void);
16 static GHashTable *term_hash;
17 static GList *term_list;
18 static GHashTable *nonterm_hash;
19 static GList *nonterm_list;
20 static GList *rule_list;
26 static void output (char *fmt, ...)
31 vfprintf (outputfd, fmt, ap);
36 create_rule (char *id, Tree *tree, char *code, char *cost, char *cfunc)
38 Rule *rule = g_new0 (Rule, 1);
43 rule->lhs = nonterm (id);
45 rule_list = g_list_append (rule_list, rule);
46 rule->cost = g_strdup (cost);
47 rule->cfunc = g_strdup (cfunc);
48 rule->code = g_strdup (code);
52 yyerror ("duplicated costs (constant costs and cost function)");
54 rule->cost = g_strdup_printf ("mono_burg_cost_%d (tree)",
55 g_list_length (rule_list));
58 rule->lhs->rules = g_list_append (rule->lhs->rules, rule);
61 tree->op->rules = g_list_append (tree->op->rules, rule);
63 tree->nonterm->chain = g_list_append (tree->nonterm->chain, rule);
67 create_tree (char *id, Tree *left, Tree *right)
69 int arity = (left != NULL) + (right != NULL);
70 Term *term = g_hash_table_lookup (term_hash, id);
71 Tree *tree = g_new0 (Tree, 1);
74 if (term->arity == -1)
77 if (term->arity != arity)
78 yyerror ("changed arity of terminal %s from %d to %d",
79 id, term->arity, arity);
85 tree->nonterm = nonterm (id);
92 check_term_num (char *key, Term *value, int num)
94 if (num != -1 && value->number == num)
95 yyerror ("duplicate terminal id \"%s\"", key);
99 create_term (char *id, int num)
104 yyerror ("terminal definition after nonterminal definition");
107 yyerror ("invalid terminal number %d", num);
110 term_hash = g_hash_table_new (g_str_hash , g_str_equal);
112 g_hash_table_foreach (term_hash, (GHFunc) check_term_num, (gpointer) num);
114 term = g_new0 (Term, 1);
116 term->name = g_strdup (id);
120 term_list = g_list_append (term_list, term);
122 g_hash_table_insert (term_hash, term->name, term);
131 nonterm_hash = g_hash_table_new (g_str_hash , g_str_equal);
133 if ((nterm = g_hash_table_lookup (nonterm_hash, id)))
136 nterm = g_new0 (NonTerm, 1);
138 nterm->name = g_strdup (id);
139 nonterm_list = g_list_append (nonterm_list, nterm);
140 nterm->number = g_list_length (nonterm_list);
142 g_hash_table_insert (nonterm_hash, nterm->name, nterm);
148 start_nonterm (char *id)
150 static gboolean start_def;
153 yyerror ("start symbol redeclared");
160 emit_tree_string (Tree *tree)
163 output ("%s", tree->op->name);
164 if (tree->op->arity) {
166 emit_tree_string (tree->left);
169 emit_tree_string (tree->right);
174 output ("%s", tree->nonterm->name);
178 emit_rule_string (Rule *rule, char *fill)
180 output ("%s/* ", fill);
182 output ("%s: ", rule->lhs->name);
184 emit_tree_string (rule->tree);
192 GList *l = term_list;
196 Term *t = (Term *)l->data;
197 if (t->number == i) {
207 term_compare_func (Term *t1, Term *t2)
209 return t1->number - t2->number;
217 output ("#include <glib.h>\n");
220 output ("#ifndef MBTREE_TYPE\n#error MBTREE_TYPE undefined\n#endif\n");
221 output ("#ifndef MBTREE_OP\n#define MBTREE_OP(t) ((t)->op)\n#endif\n");
222 output ("#ifndef MBTREE_LEFT\n#define MBTREE_LEFT(t) ((t)->left)\n#endif\n");
223 output ("#ifndef MBTREE_RIGHT\n#define MBTREE_RIGHT(t) ((t)->right)\n#endif\n");
224 output ("#ifndef MBTREE_STATE\n#define MBTREE_STATE(t) ((t)->state)\n#endif\n");
225 output ("#ifndef MBCGEN_TYPE\n#define MBCGEN_TYPE int\n#endif\n");
228 output ("#define MBMAXCOST 32768\n");
231 output ("#define MBCOND(x) if (!(x)) return MBMAXCOST;\n");
235 for (l = term_list; l; l = l->next) {
236 Term *t = (Term *)l->data;
238 t->number = next_term_num ();
240 term_list = g_list_sort (term_list, (GCompareFunc)term_compare_func);
242 for (l = term_list; l; l = l->next) {
243 Term *t = (Term *)l->data;
245 t->number = next_term_num ();
247 output ("#define MB_TERM_%s\t %d\n", t->name, t->number);
259 for (l = nonterm_list; l; l = l->next) {
260 NonTerm *n = (NonTerm *)l->data;
261 output ("#define MB_NTERM_%s\t%d\n", n->name, n->number);
263 output ("#define MB_MAX_NTERMS\t%d\n", g_list_length (nonterm_list));
273 output ("typedef struct _MBState MBState;\n");
274 output ("struct _MBState {\n");
275 output ("\tint\t\t op;\n");
276 output ("\tMBState\t\t*left, *right;\n");
277 output ("\tguint16\t\tcost[%d];\n", g_list_length (nonterm_list) + 1);
279 for (l = nonterm_list; l; l = l->next) {
280 NonTerm *n = (NonTerm *)l->data;
281 g_assert (g_list_length (n->rules) < 256);
282 i = g_list_length (n->rules);
286 output ("\tunsigned int\t rule_%s:%d;\n", n->name, j);
297 for (l = nonterm_list; l; l = l->next) {
298 NonTerm *n = (NonTerm *)l->data;
299 output ("int mono_burg_decode_%s[] = {\n", n->name);
301 for (rl = n->rules; rl; rl = rl->next) {
302 Rule *rule = (Rule *)rl->data;
303 output ("\t%d,\n", g_list_index (rule_list, rule) + 1);
311 emit_tree_match (char *st, Tree *t)
315 output ("\t\t\t%sop == %d /* %s */", st, t->op->number, t->op->name);
317 if (t->left && t->left->op) {
318 tn = g_strconcat (st, "left->", NULL);
320 emit_tree_match (tn, t->left);
324 if (t->right && t->right->op) {
325 tn = g_strconcat (st, "right->", NULL);
327 emit_tree_match (tn, t->right);
333 emit_rule_match (Rule *rule)
335 Tree *t = rule->tree;
337 if ((t->left && t->left->op) ||
338 (t->right && t->right->op)) {
339 output ("\t\tif (\n");
340 emit_tree_match ("p->", t);
341 output ("\n\t\t)\n");
346 emit_costs (char *st, Tree *t)
353 tn = g_strconcat (st, "left->", NULL);
354 emit_costs (tn, t->left);
359 tn = g_strconcat (st, "right->", NULL);
360 emit_costs (tn, t->right);
363 output ("%scost[MB_NTERM_%s] + ", st, t->nonterm->name);
367 emit_cond_assign (Rule *rule, char *cost, char *fill)
372 rc = g_strconcat ("c + ", cost, NULL);
377 output ("%sif (%s < p->cost[MB_NTERM_%s]) {\n", fill, rc, rule->lhs->name);
379 output ("%s\tp->cost[MB_NTERM_%s] = %s;\n", fill, rule->lhs->name, rc);
381 output ("%s\tp->rule_%s = %d;\n", fill, rule->lhs->name,
382 g_list_index (rule->lhs->rules, rule) + 1);
384 if (rule->lhs->chain)
385 output ("%s\tclosure_%s (p, %s);\n", fill, rule->lhs->name, rc);
387 output ("%s}\n", fill);
399 output ("static MBState *\n");
400 output ("mono_burg_label_priv (MBTREE_TYPE *tree) {\n");
402 output ("\tint arity;\n");
403 output ("\tint c;\n");
404 output ("\tMBState *p, *left, *right;\n\n");
406 output ("\tswitch (mono_burg_arity [MBTREE_OP(tree)]) {\n");
407 output ("\tcase 0:\n");
408 output ("\t\tleft = NULL;\n");
409 output ("\t\tright = NULL;\n");
410 output ("\t\tbreak;\n");
411 output ("\tcase 1:\n");
412 output ("\t\tleft = mono_burg_label_priv (MBTREE_LEFT(tree));\n");
413 output ("\t\tright = NULL;\n");
414 output ("\t\tbreak;\n");
415 output ("\tcase 2:\n");
416 output ("\t\tleft = mono_burg_label_priv (MBTREE_LEFT(tree));\n");
417 output ("\t\tright = mono_burg_label_priv (MBTREE_RIGHT(tree));\n");
420 output ("\tarity = (left != NULL) + (right != NULL);\n");
421 output ("\tg_assert (arity == mono_burg_arity [MBTREE_OP(tree)]);\n\n");
423 output ("\tp = g_new0 (MBState, 1);\n");
424 output ("\tp->op = MBTREE_OP(tree);\n");
425 output ("\tp->left = left;\n");
426 output ("\tp->right = right;\n");
428 for (l = nonterm_list, i = 0; l; l = l->next) {
429 output ("\tp->cost [%d] = 32767;\n", ++i);
433 output ("\tswitch (MBTREE_OP(tree)) {\n");
434 for (l = term_list; l; l = l->next) {
435 Term *t = (Term *)l->data;
437 output ("\tcase %d: /* %s */\n", t->number, t->name);
439 for (rl = t->rules; rl; rl = rl->next) {
440 Rule *rule = (Rule *)rl->data;
441 Tree *t = rule->tree;
443 emit_rule_string (rule, "\t\t");
445 emit_rule_match (rule);
449 output ("\t\t\tc = ");
453 output ("%s;\n", rule->cost);
455 emit_cond_assign (rule, NULL, "\t\t\t");
460 output ("\t\tbreak;\n");
463 output ("\tdefault:\n");
464 output ("\t\tg_error (\"unknown operator\");\n");
467 output ("\tMBTREE_STATE(tree) = p;\n");
469 output ("\treturn p;\n");
473 output ("MBState *\n");
474 output ("mono_burg_label (MBTREE_TYPE *tree)\n{\n");
475 output ("\tMBState *p = mono_burg_label_priv (tree);\n");
476 output ("\treturn p->rule_%s ? p : NULL;\n", ((NonTerm *)nonterm_list->data)->name);
481 compute_kids (char *ts, Tree *tree, int *n)
486 return g_strdup_printf ("\t\tkids[%d] = %s;\n", (*n)++, ts);
487 } else if (tree->op && tree->op->arity) {
490 res = compute_kids (g_strdup_printf ("MBTREE_LEFT(%s)", ts),
492 if (tree->op->arity == 2)
493 res2 = compute_kids (g_strdup_printf ("MBTREE_RIGHT(%s)", ts),
495 return g_strconcat (res, res2, NULL);
508 output ("mono_burg_rule (MBState *state, int goal)\n{\n");
510 output ("\tg_return_val_if_fail (state != NULL, 0);\n");
511 output ("\tg_return_val_if_fail (goal > 0, 0);\n\n");
513 output ("\tswitch (goal) {\n");
515 for (nl = nonterm_list; nl; nl = nl->next) {
516 NonTerm *n = (NonTerm *)nl->data;
517 output ("\tcase MB_NTERM_%s:\n", n->name);
518 output ("\t\treturn mono_burg_decode_%s [state->rule_%s];\n",
522 output ("\tdefault: g_assert_not_reached ();\n");
524 output ("\treturn 0;\n");
528 output ("MBTREE_TYPE **\n");
529 output ("mono_burg_kids (MBTREE_TYPE *tree, int rulenr, MBTREE_TYPE *kids [])\n{\n");
530 output ("\tg_return_val_if_fail (tree != NULL, NULL);\n");
531 output ("\tg_return_val_if_fail (MBTREE_STATE(tree) != NULL, NULL);\n");
532 output ("\tg_return_val_if_fail (kids != NULL, NULL);\n\n");
534 output ("\tswitch (rulenr) {\n");
536 n = g_list_length (rule_list);
537 sa = g_new0 (char *, n);
538 si = g_new0 (int, n);
540 /* compress the case statement */
541 for (l = rule_list, i = 0, c = 0; l; l = l->next) {
542 Rule *rule = (Rule *)l->data;
546 k = compute_kids ("tree", rule->tree, &kn);
547 for (j = 0; j < c; j++)
548 if (!strcmp (sa [j], k))
556 for (i = 0; i < c; i++) {
557 for (l = rule_list, j = 0; l; l = l->next, j++)
559 output ("\tcase %d:\n", j + 1);
560 output ("%s", sa [i]);
561 output ("\t\tbreak;\n");
564 output ("\tdefault:\n\t\tg_assert_not_reached ();\n");
566 output ("\treturn kids;\n");
577 for (l = rule_list, i = 0; l; l = l->next) {
578 Rule *rule = (Rule *)l->data;
581 output ("static void ");
583 emit_rule_string (rule, "");
585 output ("mono_burg_emit_%d (MBTREE_TYPE *tree, MBCGEN_TYPE *s)\n", i);
587 output ("%s\n", rule->code);
593 output ("MBEmitFunc mono_burg_func [] = {\n");
594 output ("\tNULL,\n");
595 for (l = rule_list, i = 0; l; l = l->next) {
596 Rule *rule = (Rule *)l->data;
598 output ("\tmono_burg_emit_%d,\n", i);
600 output ("\tNULL,\n");
612 for (l = rule_list, i = 0; l; l = l->next) {
613 Rule *rule = (Rule *)l->data;
616 output ("static guint16\n");
618 emit_rule_string (rule, "");
620 output ("mono_burg_cost_%d (MBTREE_TYPE *tree)\n", i + 1);
622 output ("%s\n", rule->cfunc);
634 for (l = nonterm_list; l; l = l->next) {
635 NonTerm *n = (NonTerm *)l->data;
638 output ("static void closure_%s (MBState *p, int c);\n", n->name);
643 for (l = nonterm_list; l; l = l->next) {
644 NonTerm *n = (NonTerm *)l->data;
647 output ("static void\n");
648 output ("closure_%s (MBState *p, int c)\n{\n", n->name);
649 for (rl = n->chain; rl; rl = rl->next) {
650 Rule *rule = (Rule *)rl->data;
652 emit_rule_string (rule, "\t");
653 emit_cond_assign (rule, rule->cost, "\t");
661 compute_nonterms (Tree *tree)
667 return g_strdup_printf ("MB_NTERM_%s, ", tree->nonterm->name);
669 return g_strconcat (compute_nonterms (tree->left),
670 compute_nonterms (tree->right), NULL);
681 output ("guint8 mono_burg_arity [] = {\n");
682 for (l = term_list, i = 0; l; l = l->next) {
683 Term *t = (Term *)l->data;
685 while (i < t->number) {
690 output ("\t%d, /* %s */\n", t->arity, t->name);
696 output ("char *mono_burg_term_string [] = {\n");
697 output ("\tNULL,\n");
698 for (l = term_list, i = 0; l; l = l->next) {
699 Term *t = (Term *)l->data;
700 output ("\t\"%s\",\n", t->name);
704 output ("char *mono_burg_rule_string [] = {\n");
705 output ("\tNULL,\n");
706 for (l = rule_list, i = 0; l; l = l->next) {
707 Rule *rule = (Rule *)l->data;
708 output ("\t\"%s: ", rule->lhs->name);
709 emit_tree_string (rule->tree);
714 n = g_list_length (rule_list);
715 sa = g_new0 (char *, n);
716 si = g_new0 (int, n);
718 /* compress the _nts array */
719 for (l = rule_list, i = 0, c = 0; l; l = l->next) {
720 Rule *rule = (Rule *)l->data;
721 char *s = compute_nonterms (rule->tree);
723 for (j = 0; j < c; j++)
724 if (!strcmp (sa [j], s))
729 output ("static guint16 mono_burg_nts_%d [] = { %s0 };\n", c, s);
735 output ("guint16 *mono_burg_nts [] = {\n");
737 for (l = rule_list, i = 0; l; l = l->next) {
738 Rule *rule = (Rule *)l->data;
739 output ("\tmono_burg_nts_%d, ", si [i++]);
740 emit_rule_string (rule, "");
748 output ("typedef void (*MBEmitFunc) (MBTREE_TYPE *tree, MBCGEN_TYPE *s);\n\n");
750 output ("extern char *mono_burg_term_string [];\n");
751 output ("extern char *mono_burg_rule_string [];\n");
752 output ("extern guint16 *mono_burg_nts [];\n");
753 output ("extern MBEmitFunc mono_burg_func [];\n");
755 output ("MBState *mono_burg_label (MBTREE_TYPE *tree);\n");
756 output ("int mono_burg_rule (MBState *state, int goal);\n");
757 output ("MBTREE_TYPE **mono_burg_kids (MBTREE_TYPE *tree, int rulenr, MBTREE_TYPE *kids []);\n");
762 static void check_reach (NonTerm *n);
765 mark_reached (Tree *tree)
767 if (tree->nonterm && !tree->nonterm->reached)
768 check_reach (tree->nonterm);
770 mark_reached (tree->left);
772 mark_reached (tree->right);
776 check_reach (NonTerm *n)
781 for (l = n->rules; l; l = l->next) {
782 Rule *rule = (Rule *)l->data;
783 mark_reached (rule->tree);
792 for (l = term_list; l; l = l->next) {
793 Term *term = (Term *)l->data;
794 if (term->arity == -1)
795 g_warning ("unused terminal \"%s\"",term->name);
798 check_reach (((NonTerm *)nonterm_list->data));
800 for (l = nonterm_list; l; l = l->next) {
801 NonTerm *n = (NonTerm *)l->data;
803 g_error ("unreachable nonterm \"%s\"", n->name);
811 "Usage is: monoburg [-d file] [file] \n");
816 main (int argc, char *argv [])
818 char *deffile = NULL;
822 for (i = 1; i < argc; i++){
823 if (argv [i][0] == '-'){
824 if (argv [i][1] == 'h') {
826 } else if (argv [i][1] == 'd') {
827 deffile = argv [++i];
840 if (!(deffd = fopen (deffile, "w"))) {
841 perror ("cant open header output file");
845 output ("#ifndef _MONO_BURG_DEFS_\n");
846 output ("#define _MONO_BURG_DEFS_\n\n");
852 if (!(inputfd = fopen (infile, "r"))) {
853 perror ("cant open input file");
865 g_error ("no start symbol found");
873 output ("#endif /* _MONO_BURG_DEFS_ */\n");
877 output ("#include \"%s\"\n\n", deffile);
882 emit_emitter_func ();