2 * monoburg.c: an iburg like code generator generator
5 * Dietmar Maurer (dietmar@ximian.com)
7 * (C) 2001 Ximian, Inc.
14 extern void yyparse (void);
16 static GHashTable *term_hash;
17 static GList *term_list;
18 static GHashTable *nonterm_hash;
19 static GList *nonterm_list;
20 static GList *rule_list;
26 static void output (char *fmt, ...)
31 vfprintf (outputfd, fmt, ap);
36 create_rule (char *id, Tree *tree, char *code, char *cost, char *cfunc)
38 Rule *rule = g_new0 (Rule, 1);
43 rule->lhs = nonterm (id);
45 rule_list = g_list_append (rule_list, rule);
52 yyerror ("duplicated costs (constant costs and cost function)");
54 rule->cost = g_strdup_printf ("mono_burg_cost_%d (tree)",
55 g_list_length (rule_list));
58 rule->lhs->rules = g_list_append (rule->lhs->rules, rule);
61 tree->op->rules = g_list_append (tree->op->rules, rule);
63 tree->nonterm->chain = g_list_append (tree->nonterm->chain, rule);
67 create_tree (char *id, Tree *left, Tree *right)
69 int arity = (left != NULL) + (right != NULL);
70 Term *term = g_hash_table_lookup (term_hash, id);
71 Tree *tree = g_new0 (Tree, 1);
74 if (term->arity == -1)
77 if (term->arity != arity)
78 yyerror ("changed arity of terminal %s from %d to %d",
79 id, term->arity, arity);
85 tree->nonterm = nonterm (id);
92 check_term_num (char *key, Term *value, int num)
94 if (num != -1 && value->number == num)
95 yyerror ("duplicate terminal id \"%s\"", key);
99 create_term (char *id, int num)
104 yyerror ("terminal definition after nonterminal definition");
107 yyerror ("invalid terminal number %d", num);
110 term_hash = g_hash_table_new (g_str_hash , g_str_equal);
112 g_hash_table_foreach (term_hash, (GHFunc) check_term_num, (gpointer) num);
114 term = g_new0 (Term, 1);
120 term_list = g_list_append (term_list, term);
122 g_hash_table_insert (term_hash, term->name, term);
131 nonterm_hash = g_hash_table_new (g_str_hash , g_str_equal);
133 if ((nterm = g_hash_table_lookup (nonterm_hash, id)))
136 nterm = g_new0 (NonTerm, 1);
139 nonterm_list = g_list_append (nonterm_list, nterm);
140 nterm->number = g_list_length (nonterm_list);
142 g_hash_table_insert (nonterm_hash, nterm->name, nterm);
148 start_nonterm (char *id)
150 static gboolean start_def;
153 yyerror ("start symbol redeclared");
160 emit_tree_string (Tree *tree)
163 output ("%s", tree->op->name);
164 if (tree->op->arity) {
166 emit_tree_string (tree->left);
169 emit_tree_string (tree->right);
174 output ("%s", tree->nonterm->name);
178 emit_rule_string (Rule *rule, char *fill)
180 output ("%s/* ", fill);
182 output ("%s: ", rule->lhs->name);
184 emit_tree_string (rule->tree);
192 GList *l = term_list;
196 Term *t = (Term *)l->data;
197 if (t->number == i) {
207 term_compare_func (Term *t1, Term *t2)
209 return t1->number - t2->number;
217 output ("#include <glib.h>\n");
220 output ("#ifndef MBTREE_TYPE\n#error MBTREE_TYPE undefined\n#endif\n");
221 output ("#ifndef MBTREE_OP\n#define MBTREE_OP(t) ((t)->op)\n#endif\n");
222 output ("#ifndef MBTREE_LEFT\n#define MBTREE_LEFT(t) ((t)->left)\n#endif\n");
223 output ("#ifndef MBTREE_RIGHT\n#define MBTREE_RIGHT(t) ((t)->right)\n#endif\n");
224 output ("#ifndef MBTREE_STATE\n#define MBTREE_STATE(t) ((t)->state)\n#endif\n");
227 output ("#define MBMAXCOST 32768\n");
230 output ("#define MBCOND(x) if (!(x)) return MBMAXCOST;\n");
234 for (l = term_list; l; l = l->next) {
235 Term *t = (Term *)l->data;
237 t->number = next_term_num ();
239 term_list = g_list_sort (term_list, (GCompareFunc)term_compare_func);
241 for (l = term_list; l; l = l->next) {
242 Term *t = (Term *)l->data;
244 t->number = next_term_num ();
246 output ("#define MB_TERM_%s\t %d\n", t->name, t->number);
258 for (l = nonterm_list; l; l = l->next) {
259 NonTerm *n = (NonTerm *)l->data;
260 output ("#define MB_NTERM_%s\t%d\n", n->name, n->number);
262 output ("#define MB_MAX_NTERMS\t%d\n", g_list_length (nonterm_list));
272 output ("typedef struct _MBState MBState;\n");
273 output ("struct _MBState {\n");
274 output ("\tint\t\t op;\n");
275 output ("\tMBState\t\t*left, *right;\n");
276 output ("\tguint16\t\tcost[%d];\n", g_list_length (nonterm_list) + 1);
278 for (l = nonterm_list; l; l = l->next) {
279 NonTerm *n = (NonTerm *)l->data;
280 g_assert (g_list_length (n->rules) < 256);
281 i = g_list_length (n->rules);
285 output ("\tunsigned int\t rule_%s:%d;\n", n->name, j);
296 for (l = nonterm_list; l; l = l->next) {
297 NonTerm *n = (NonTerm *)l->data;
298 output ("int mono_burg_decode_%s[] = {\n", n->name);
300 for (rl = n->rules; rl; rl = rl->next) {
301 Rule *rule = (Rule *)rl->data;
302 output ("\t%d,\n", g_list_index (rule_list, rule) + 1);
310 emit_tree_match (char *st, Tree *t)
314 output ("\t\t\t%sop == %d /* %s */", st, t->op->number, t->op->name);
316 if (t->left && t->left->op) {
317 tn = g_strconcat (st, "left->", NULL);
319 emit_tree_match (tn, t->left);
323 if (t->right && t->right->op) {
324 tn = g_strconcat (st, "right->", NULL);
326 emit_tree_match (tn, t->right);
332 emit_rule_match (Rule *rule)
334 Tree *t = rule->tree;
336 if ((t->left && t->left->op) ||
337 (t->right && t->right->op)) {
338 output ("\t\tif (\n");
339 emit_tree_match ("p->", t);
340 output ("\n\t\t)\n");
345 emit_costs (char *st, Tree *t)
352 tn = g_strconcat (st, "left->", NULL);
353 emit_costs (tn, t->left);
358 tn = g_strconcat (st, "right->", NULL);
359 emit_costs (tn, t->right);
362 output ("%scost[MB_NTERM_%s] + ", st, t->nonterm->name);
366 emit_cond_assign (Rule *rule, char *cost, char *fill)
371 rc = g_strconcat ("c + ", cost, NULL);
376 output ("%sif (%s < p->cost[MB_NTERM_%s]) {\n", fill, rc, rule->lhs->name);
378 output ("%s\tp->cost[MB_NTERM_%s] = %s;\n", fill, rule->lhs->name, rc);
380 output ("%s\tp->rule_%s = %d;\n", fill, rule->lhs->name,
381 g_list_index (rule->lhs->rules, rule) + 1);
383 if (rule->lhs->chain)
384 output ("%s\tclosure_%s (p, %s);\n", fill, rule->lhs->name, rc);
386 output ("%s}\n", fill);
398 output ("static MBState *\n");
399 output ("mono_burg_label_priv (MBTREE_TYPE *tree) {\n");
401 output ("\tint arity;\n");
402 output ("\tint c;\n");
403 output ("\tMBState *p, *left, *right;\n\n");
405 output ("\tswitch (mono_burg_arity [MBTREE_OP(tree)]) {\n");
406 output ("\tcase 0:\n");
407 output ("\t\tleft = NULL;\n");
408 output ("\t\tright = NULL;\n");
409 output ("\t\tbreak;\n");
410 output ("\tcase 1:\n");
411 output ("\t\tleft = mono_burg_label_priv (MBTREE_LEFT(tree));\n");
412 output ("\t\tright = NULL;\n");
413 output ("\t\tbreak;\n");
414 output ("\tcase 2:\n");
415 output ("\t\tleft = mono_burg_label_priv (MBTREE_LEFT(tree));\n");
416 output ("\t\tright = mono_burg_label_priv (MBTREE_RIGHT(tree));\n");
419 output ("\tarity = (left != NULL) + (right != NULL);\n");
420 output ("\tg_assert (arity == mono_burg_arity [MBTREE_OP(tree)]);\n\n");
422 output ("\tp = g_new0 (MBState, 1);\n");
423 output ("\tp->op = MBTREE_OP(tree);\n");
424 output ("\tp->left = left;\n");
425 output ("\tp->right = right;\n");
427 for (l = nonterm_list, i = 0; l; l = l->next) {
428 output ("\tp->cost [%d] = 32767;\n", ++i);
432 output ("\tswitch (MBTREE_OP(tree)) {\n");
433 for (l = term_list; l; l = l->next) {
434 Term *t = (Term *)l->data;
436 output ("\tcase %d: /* %s */\n", t->number, t->name);
438 for (rl = t->rules; rl; rl = rl->next) {
439 Rule *rule = (Rule *)rl->data;
440 Tree *t = rule->tree;
442 emit_rule_string (rule, "\t\t");
444 emit_rule_match (rule);
448 output ("\t\t\tc = ");
452 output ("%s;\n", rule->cost);
454 emit_cond_assign (rule, NULL, "\t\t\t");
459 output ("\t\tbreak;\n");
462 output ("\tdefault:\n");
463 output ("\t\tg_error (\"unknown operator\");\n");
466 output ("\tMBTREE_STATE(tree) = p;\n");
468 output ("\treturn p;\n");
472 output ("MBState *\n");
473 output ("mono_burg_label (MBTREE_TYPE *tree)\n{\n");
474 output ("\tMBState *p = mono_burg_label_priv (tree);\n");
475 output ("\treturn p->rule_%s ? p : NULL;\n", ((NonTerm *)nonterm_list->data)->name);
480 compute_kids (char *ts, Tree *tree, int *n)
485 return g_strdup_printf ("\t\tkids[%d] = %s;\n", (*n)++, ts);
486 } else if (tree->op && tree->op->arity) {
489 res = compute_kids (g_strdup_printf ("MBTREE_LEFT(%s)", ts),
491 if (tree->op->arity == 2)
492 res2 = compute_kids (g_strdup_printf ("MBTREE_RIGHT(%s)", ts),
494 return g_strconcat (res, res2, NULL);
507 output ("mono_burg_rule (MBState *state, int goal)\n{\n");
509 output ("\tg_return_val_if_fail (state != NULL, 0);\n");
510 output ("\tg_return_val_if_fail (goal > 0, 0);\n\n");
512 output ("\tswitch (goal) {\n");
514 for (nl = nonterm_list; nl; nl = nl->next) {
515 NonTerm *n = (NonTerm *)nl->data;
516 output ("\tcase MB_NTERM_%s:\n", n->name);
517 output ("\t\treturn mono_burg_decode_%s [state->rule_%s];\n",
521 output ("\tdefault: g_assert_not_reached ();\n");
523 output ("\treturn 0;\n");
527 output ("MBTREE_TYPE **\n");
528 output ("mono_burg_kids (MBTREE_TYPE *tree, int rulenr, MBTREE_TYPE *kids [])\n{\n");
529 output ("\tg_return_val_if_fail (tree != NULL, NULL);\n");
530 output ("\tg_return_val_if_fail (MBTREE_STATE(tree) != NULL, NULL);\n");
531 output ("\tg_return_val_if_fail (kids != NULL, NULL);\n\n");
533 output ("\tswitch (rulenr) {\n");
535 n = g_list_length (rule_list);
536 sa = g_new0 (char *, n);
537 si = g_new0 (int, n);
539 /* compress the case statement */
540 for (l = rule_list, i = 0, c = 0; l; l = l->next) {
541 Rule *rule = (Rule *)l->data;
545 k = compute_kids ("tree", rule->tree, &kn);
546 for (j = 0; j < c; j++)
547 if (!strcmp (sa [j], k))
555 for (i = 0; i < c; i++) {
556 for (l = rule_list, j = 0; l; l = l->next, j++)
558 output ("\tcase %d:\n", j + 1);
559 output ("%s", sa [i]);
560 output ("\t\tbreak;\n");
563 output ("\tdefault:\n\t\tg_assert_not_reached ();\n");
565 output ("\treturn kids;\n");
576 for (l = rule_list, i = 0; l; l = l->next) {
577 Rule *rule = (Rule *)l->data;
580 output ("static void ");
582 emit_rule_string (rule, "");
584 output ("mono_burg_emit_%d (MBTREE_TYPE *tree, guint8 **code)\n", i);
586 output ("%s\n", rule->code);
592 output ("MBEmitFunc mono_burg_func [] = {\n");
593 output ("\tNULL,\n");
594 for (l = rule_list, i = 0; l; l = l->next) {
595 Rule *rule = (Rule *)l->data;
597 output ("\tmono_burg_emit_%d,\n", i);
599 output ("\tNULL,\n");
611 for (l = rule_list, i = 0; l; l = l->next) {
612 Rule *rule = (Rule *)l->data;
615 output ("static guint16\n");
617 emit_rule_string (rule, "");
619 output ("mono_burg_cost_%d (MBTREE_TYPE *tree)\n", i + 1);
621 output ("%s\n", rule->cfunc);
633 for (l = nonterm_list; l; l = l->next) {
634 NonTerm *n = (NonTerm *)l->data;
637 output ("static void closure_%s (MBState *p, int c);\n", n->name);
642 for (l = nonterm_list; l; l = l->next) {
643 NonTerm *n = (NonTerm *)l->data;
646 output ("static void\n");
647 output ("closure_%s (MBState *p, int c)\n{\n", n->name);
648 for (rl = n->chain; rl; rl = rl->next) {
649 Rule *rule = (Rule *)rl->data;
651 emit_rule_string (rule, "\t");
652 emit_cond_assign (rule, rule->cost, "\t");
660 compute_nonterms (Tree *tree)
666 return g_strdup_printf ("MB_NTERM_%s, ", tree->nonterm->name);
668 return g_strconcat (compute_nonterms (tree->left),
669 compute_nonterms (tree->right), NULL);
680 output ("guint8 mono_burg_arity [] = {\n");
681 for (l = term_list, i = 0; l; l = l->next) {
682 Term *t = (Term *)l->data;
684 while (i < t->number) {
689 output ("\t%d, /* %s */\n", t->arity, t->name);
695 output ("char *mono_burg_term_string [] = {\n");
696 output ("\tNULL,\n");
697 for (l = term_list, i = 0; l; l = l->next) {
698 Term *t = (Term *)l->data;
699 output ("\t\"%s\",\n", t->name);
703 output ("char *mono_burg_rule_string [] = {\n");
704 output ("\tNULL,\n");
705 for (l = rule_list, i = 0; l; l = l->next) {
706 Rule *rule = (Rule *)l->data;
707 output ("\t\"%s: ", rule->lhs->name);
708 emit_tree_string (rule->tree);
713 n = g_list_length (rule_list);
714 sa = g_new0 (char *, n);
715 si = g_new0 (int, n);
717 /* compress the _nts array */
718 for (l = rule_list, i = 0, c = 0; l; l = l->next) {
719 Rule *rule = (Rule *)l->data;
720 char *s = compute_nonterms (rule->tree);
722 for (j = 0; j < c; j++)
723 if (!strcmp (sa [j], s))
728 output ("static guint16 mono_burg_nts_%d [] = { %s0 };\n", c, s);
734 output ("guint16 *mono_burg_nts [] = {\n");
736 for (l = rule_list, i = 0; l; l = l->next) {
737 Rule *rule = (Rule *)l->data;
738 output ("\tmono_burg_nts_%d, ", si [i++]);
739 emit_rule_string (rule, "");
747 output ("typedef void (*MBEmitFunc) (MBTREE_TYPE *tree, guint8 **code);\n\n");
749 output ("extern char *mono_burg_term_string [];\n");
750 output ("extern char *mono_burg_rule_string [];\n");
751 output ("extern guint16 *mono_burg_nts [];\n");
752 output ("extern MBEmitFunc mono_burg_func [];\n");
754 output ("MBState *mono_burg_label (MBTREE_TYPE *tree);\n");
755 output ("int mono_burg_rule (MBState *state, int goal);\n");
756 output ("MBTREE_TYPE **mono_burg_kids (MBTREE_TYPE *tree, int rulenr, MBTREE_TYPE *kids []);\n");
761 static void check_reach (NonTerm *n);
764 mark_reached (Tree *tree)
766 if (tree->nonterm && !tree->nonterm->reached)
767 check_reach (tree->nonterm);
769 mark_reached (tree->left);
771 mark_reached (tree->right);
775 check_reach (NonTerm *n)
780 for (l = n->rules; l; l = l->next) {
781 Rule *rule = (Rule *)l->data;
782 mark_reached (rule->tree);
791 for (l = term_list; l; l = l->next) {
792 Term *term = (Term *)l->data;
793 if (term->arity == -1)
794 g_warning ("unused terminal \"%s\"",term->name);
797 check_reach (((NonTerm *)nonterm_list->data));
799 for (l = nonterm_list; l; l = l->next) {
800 NonTerm *n = (NonTerm *)l->data;
802 g_error ("unreachable nonterm \"%s\"", n->name);
810 "Usage is: monoburg [-d file] [file] \n");
815 main (int argc, char *argv [])
817 char *deffile = NULL;
821 for (i = 1; i < argc; i++){
822 if (argv [i][0] == '-'){
823 if (argv [i][1] == 'h') {
825 } else if (argv [i][1] == 'd') {
826 deffile = argv [++i];
839 if (!(deffd = fopen (deffile, "w"))) {
840 perror ("cant open header output file");
844 output ("#ifndef _MONO_BURG_DEFS_\n");
845 output ("#define _MONO_BURG_DEFS_\n\n");
851 if (!(inputfd = fopen (infile, "r"))) {
852 perror ("cant open input file");
864 g_error ("no start symbol found");
872 output ("#endif /* _MONO_BURG_DEFS_ */\n");
876 output ("#include \"%s\"\n\n", deffile);
881 emit_emitter_func ();