2 * monoburg.c: an iburg like code generator generator
5 * Dietmar Maurer (dietmar@ximian.com)
7 * (C) 2001 Ximian, Inc.
15 extern void yyparse (void);
17 static GHashTable *term_hash;
18 static GList *term_list;
19 static GHashTable *nonterm_hash;
20 static GList *nonterm_list;
21 static GList *rule_list;
28 static void output (char *fmt, ...)
33 vfprintf (outputfd, fmt, ap);
38 create_rule (char *id, Tree *tree, char *code, char *cost, char *cfunc)
40 Rule *rule = g_new0 (Rule, 1);
45 rule->lhs = nonterm (id);
47 rule_list = g_list_append (rule_list, rule);
48 rule->cost = g_strdup (cost);
49 rule->cfunc = g_strdup (cfunc);
50 rule->code = g_strdup (code);
54 yyerror ("duplicated costs (constant costs and cost function)");
56 rule->cost = g_strdup_printf ("mono_burg_cost_%d (tree, data)",
57 g_list_length (rule_list));
60 rule->lhs->rules = g_list_append (rule->lhs->rules, rule);
63 tree->op->rules = g_list_append (tree->op->rules, rule);
65 tree->nonterm->chain = g_list_append (tree->nonterm->chain, rule);
69 create_tree (char *id, Tree *left, Tree *right)
71 int arity = (left != NULL) + (right != NULL);
72 Term *term = g_hash_table_lookup (term_hash, id);
73 Tree *tree = g_new0 (Tree, 1);
76 if (term->arity == -1)
79 if (term->arity != arity)
80 yyerror ("changed arity of terminal %s from %d to %d",
81 id, term->arity, arity);
87 tree->nonterm = nonterm (id);
94 check_term_num (char *key, Term *value, int num)
96 if (num != -1 && value->number == num)
97 yyerror ("duplicate terminal id \"%s\"", key);
101 create_term (char *id, int num)
106 yyerror ("terminal definition after nonterminal definition");
109 yyerror ("invalid terminal number %d", num);
112 term_hash = g_hash_table_new (g_str_hash , g_str_equal);
114 g_hash_table_foreach (term_hash, (GHFunc) check_term_num, (gpointer) num);
116 term = g_new0 (Term, 1);
118 term->name = g_strdup (id);
122 term_list = g_list_append (term_list, term);
124 g_hash_table_insert (term_hash, term->name, term);
133 nonterm_hash = g_hash_table_new (g_str_hash , g_str_equal);
135 if ((nterm = g_hash_table_lookup (nonterm_hash, id)))
138 nterm = g_new0 (NonTerm, 1);
140 nterm->name = g_strdup (id);
141 nonterm_list = g_list_append (nonterm_list, nterm);
142 nterm->number = g_list_length (nonterm_list);
144 g_hash_table_insert (nonterm_hash, nterm->name, nterm);
150 start_nonterm (char *id)
152 static gboolean start_def;
155 yyerror ("start symbol redeclared");
162 emit_tree_string (Tree *tree)
165 output ("%s", tree->op->name);
166 if (tree->op->arity) {
168 emit_tree_string (tree->left);
171 emit_tree_string (tree->right);
176 output ("%s", tree->nonterm->name);
180 emit_rule_string (Rule *rule, char *fill)
182 output ("%s/* ", fill);
184 output ("%s: ", rule->lhs->name);
186 emit_tree_string (rule->tree);
194 GList *l = term_list;
198 Term *t = (Term *)l->data;
199 if (t->number == i) {
209 term_compare_func (Term *t1, Term *t2)
211 return t1->number - t2->number;
219 output ("#include <glib.h>\n");
222 output ("#ifndef MBTREE_TYPE\n#error MBTREE_TYPE undefined\n#endif\n");
223 output ("#ifndef MBTREE_OP\n#define MBTREE_OP(t) ((t)->op)\n#endif\n");
224 output ("#ifndef MBTREE_LEFT\n#define MBTREE_LEFT(t) ((t)->left)\n#endif\n");
225 output ("#ifndef MBTREE_RIGHT\n#define MBTREE_RIGHT(t) ((t)->right)\n#endif\n");
226 output ("#ifndef MBTREE_STATE\n#define MBTREE_STATE(t) ((t)->state)\n#endif\n");
227 output ("#ifndef MBCGEN_TYPE\n#define MBCGEN_TYPE int\n#endif\n");
228 output ("#ifndef MBALLOC_STATE\n#define MBALLOC_STATE g_new (MBState, 1)\n#endif\n");
229 output ("#ifndef MBCOST_DATA\n#define MBCOST_DATA gpointer\n#endif\n");
231 output ("#define MBMAXCOST 32768\n");
234 output ("#define MBCOND(x) if (!(x)) return MBMAXCOST;\n");
238 for (l = term_list; l; l = l->next) {
239 Term *t = (Term *)l->data;
241 t->number = next_term_num ();
243 term_list = g_list_sort (term_list, (GCompareFunc)term_compare_func);
245 for (l = term_list; l; l = l->next) {
246 Term *t = (Term *)l->data;
248 t->number = next_term_num ();
250 output ("#define MB_TERM_%s\t %d\n", t->name, t->number);
262 for (l = nonterm_list; l; l = l->next) {
263 NonTerm *n = (NonTerm *)l->data;
264 output ("#define MB_NTERM_%s\t%d\n", n->name, n->number);
266 output ("#define MB_MAX_NTERMS\t%d\n", g_list_length (nonterm_list));
276 output ("typedef struct _MBState MBState;\n");
277 output ("struct _MBState {\n");
278 output ("\tint\t\t op;\n");
279 output ("\tMBState\t\t*left, *right;\n");
280 output ("\tguint16\t\tcost[%d];\n", g_list_length (nonterm_list) + 1);
282 for (l = nonterm_list; l; l = l->next) {
283 NonTerm *n = (NonTerm *)l->data;
284 g_assert (g_list_length (n->rules) < 256);
285 i = g_list_length (n->rules);
289 output ("\tunsigned int\t rule_%s:%d;\n", n->name, j);
300 for (l = nonterm_list; l; l = l->next) {
301 NonTerm *n = (NonTerm *)l->data;
302 output ("const int mono_burg_decode_%s[] = {\n", n->name);
304 for (rl = n->rules; rl; rl = rl->next) {
305 Rule *rule = (Rule *)rl->data;
306 output ("\t%d,\n", g_list_index (rule_list, rule) + 1);
314 emit_tree_match (char *st, Tree *t)
318 output ("\t\t\t%sop == %d /* %s */", st, t->op->number, t->op->name);
320 if (t->left && t->left->op) {
321 tn = g_strconcat (st, "left->", NULL);
323 emit_tree_match (tn, t->left);
327 if (t->right && t->right->op) {
328 tn = g_strconcat (st, "right->", NULL);
330 emit_tree_match (tn, t->right);
336 emit_rule_match (Rule *rule)
338 Tree *t = rule->tree;
340 if ((t->left && t->left->op) ||
341 (t->right && t->right->op)) {
342 output ("\t\tif (\n");
343 emit_tree_match ("p->", t);
344 output ("\n\t\t)\n");
349 emit_costs (char *st, Tree *t)
356 tn = g_strconcat (st, "left->", NULL);
357 emit_costs (tn, t->left);
362 tn = g_strconcat (st, "right->", NULL);
363 emit_costs (tn, t->right);
366 output ("%scost[MB_NTERM_%s] + ", st, t->nonterm->name);
370 emit_cond_assign (Rule *rule, char *cost, char *fill)
375 rc = g_strconcat ("c + ", cost, NULL);
380 output ("%sif (%s < p->cost[MB_NTERM_%s]) {\n", fill, rc, rule->lhs->name);
382 output ("%s\tp->cost[MB_NTERM_%s] = %s;\n", fill, rule->lhs->name, rc);
384 output ("%s\tp->rule_%s = %d;\n", fill, rule->lhs->name,
385 g_list_index (rule->lhs->rules, rule) + 1);
387 if (rule->lhs->chain)
388 output ("%s\tclosure_%s (p, %s);\n", fill, rule->lhs->name, rc);
390 output ("%s}\n", fill);
402 output ("static MBState *\n");
403 output ("mono_burg_label_priv (MBTREE_TYPE *tree, MBCOST_DATA *data) {\n");
405 output ("\tint arity;\n");
406 output ("\tint c;\n");
407 output ("\tMBState *p, *left, *right;\n\n");
409 output ("\tswitch (mono_burg_arity [MBTREE_OP(tree)]) {\n");
410 output ("\tcase 0:\n");
411 output ("\t\tleft = NULL;\n");
412 output ("\t\tright = NULL;\n");
413 output ("\t\tbreak;\n");
414 output ("\tcase 1:\n");
415 output ("\t\tleft = mono_burg_label_priv (MBTREE_LEFT(tree), data);\n");
416 output ("\t\tright = NULL;\n");
417 output ("\t\tbreak;\n");
418 output ("\tcase 2:\n");
419 output ("\t\tleft = mono_burg_label_priv (MBTREE_LEFT(tree), data);\n");
420 output ("\t\tright = mono_burg_label_priv (MBTREE_RIGHT(tree), data);\n");
423 output ("\tarity = (left != NULL) + (right != NULL);\n");
424 output ("\tg_assert (arity == mono_burg_arity [MBTREE_OP(tree)]);\n\n");
426 output ("\tp = MBALLOC_STATE;\n");
427 output ("\tmemset (p, 0, sizeof (MBState));\n");
428 output ("\tp->op = MBTREE_OP(tree);\n");
429 output ("\tp->left = left;\n");
430 output ("\tp->right = right;\n");
432 for (l = nonterm_list, i = 0; l; l = l->next) {
433 output ("\tp->cost [%d] = 32767;\n", ++i);
437 output ("\tswitch (MBTREE_OP(tree)) {\n");
438 for (l = term_list; l; l = l->next) {
439 Term *t = (Term *)l->data;
441 output ("\tcase %d: /* %s */\n", t->number, t->name);
443 for (rl = t->rules; rl; rl = rl->next) {
444 Rule *rule = (Rule *)rl->data;
445 Tree *t = rule->tree;
447 emit_rule_string (rule, "\t\t");
449 emit_rule_match (rule);
453 output ("\t\t\tc = ");
457 output ("%s;\n", rule->cost);
459 emit_cond_assign (rule, NULL, "\t\t\t");
464 output ("\t\tbreak;\n");
467 output ("\tdefault:\n");
468 output ("\t\tg_error (\"unknown operator\");\n");
471 output ("\tMBTREE_STATE(tree) = p;\n");
473 output ("\treturn p;\n");
477 output ("MBState *\n");
478 output ("mono_burg_label (MBTREE_TYPE *tree, MBCOST_DATA *data)\n{\n");
479 output ("\tMBState *p = mono_burg_label_priv (tree, data);\n");
480 output ("\treturn p->rule_%s ? p : NULL;\n", ((NonTerm *)nonterm_list->data)->name);
485 compute_kids (char *ts, Tree *tree, int *n)
490 return g_strdup_printf ("\t\tkids[%d] = %s;\n", (*n)++, ts);
491 } else if (tree->op && tree->op->arity) {
494 res = compute_kids (g_strdup_printf ("MBTREE_LEFT(%s)", ts),
496 if (tree->op->arity == 2)
497 res2 = compute_kids (g_strdup_printf ("MBTREE_RIGHT(%s)", ts),
499 return g_strconcat (res, res2, NULL);
512 output ("mono_burg_rule (MBState *state, int goal)\n{\n");
514 output ("\tg_return_val_if_fail (state != NULL, 0);\n");
515 output ("\tg_return_val_if_fail (goal > 0, 0);\n\n");
517 output ("\tswitch (goal) {\n");
519 for (nl = nonterm_list; nl; nl = nl->next) {
520 NonTerm *n = (NonTerm *)nl->data;
521 output ("\tcase MB_NTERM_%s:\n", n->name);
522 output ("\t\treturn mono_burg_decode_%s [state->rule_%s];\n",
526 output ("\tdefault: g_assert_not_reached ();\n");
528 output ("\treturn 0;\n");
532 output ("MBTREE_TYPE **\n");
533 output ("mono_burg_kids (MBTREE_TYPE *tree, int rulenr, MBTREE_TYPE *kids [])\n{\n");
534 output ("\tg_return_val_if_fail (tree != NULL, NULL);\n");
535 output ("\tg_return_val_if_fail (MBTREE_STATE(tree) != NULL, NULL);\n");
536 output ("\tg_return_val_if_fail (kids != NULL, NULL);\n\n");
538 output ("\tswitch (rulenr) {\n");
540 n = g_list_length (rule_list);
541 sa = g_new0 (char *, n);
542 si = g_new0 (int, n);
544 /* compress the case statement */
545 for (l = rule_list, i = 0, c = 0; l; l = l->next) {
546 Rule *rule = (Rule *)l->data;
550 k = compute_kids ("tree", rule->tree, &kn);
551 for (j = 0; j < c; j++)
552 if (!strcmp (sa [j], k))
560 for (i = 0; i < c; i++) {
561 for (l = rule_list, j = 0; l; l = l->next, j++)
563 output ("\tcase %d:\n", j + 1);
564 output ("%s", sa [i]);
565 output ("\t\tbreak;\n");
568 output ("\tdefault:\n\t\tg_assert_not_reached ();\n");
570 output ("\treturn kids;\n");
581 for (l = rule_list, i = 0; l; l = l->next) {
582 Rule *rule = (Rule *)l->data;
585 output ("static void ");
587 emit_rule_string (rule, "");
589 output ("mono_burg_emit_%d (MBTREE_TYPE *tree, MBCGEN_TYPE *s)\n", i);
591 output ("%s\n", rule->code);
597 output ("MBEmitFunc const mono_burg_func [] = {\n");
598 output ("\tNULL,\n");
599 for (l = rule_list, i = 0; l; l = l->next) {
600 Rule *rule = (Rule *)l->data;
602 output ("\tmono_burg_emit_%d,\n", i);
604 output ("\tNULL,\n");
616 for (l = rule_list, i = 0; l; l = l->next) {
617 Rule *rule = (Rule *)l->data;
620 output ("inline static guint16\n");
622 emit_rule_string (rule, "");
624 output ("mono_burg_cost_%d (MBTREE_TYPE *tree, MBCOST_DATA *data)\n", i + 1);
626 output ("%s\n", rule->cfunc);
638 for (l = nonterm_list; l; l = l->next) {
639 NonTerm *n = (NonTerm *)l->data;
642 output ("static void closure_%s (MBState *p, int c);\n", n->name);
647 for (l = nonterm_list; l; l = l->next) {
648 NonTerm *n = (NonTerm *)l->data;
651 output ("static void\n");
652 output ("closure_%s (MBState *p, int c)\n{\n", n->name);
653 for (rl = n->chain; rl; rl = rl->next) {
654 Rule *rule = (Rule *)rl->data;
656 emit_rule_string (rule, "\t");
657 emit_cond_assign (rule, rule->cost, "\t");
665 compute_nonterms (Tree *tree)
671 return g_strdup_printf ("MB_NTERM_%s, ", tree->nonterm->name);
673 return g_strconcat (compute_nonterms (tree->left),
674 compute_nonterms (tree->right), NULL);
685 output ("const guint8 mono_burg_arity [] = {\n");
686 for (l = term_list, i = 0; l; l = l->next) {
687 Term *t = (Term *)l->data;
689 while (i < t->number) {
694 output ("\t%d, /* %s */\n", t->arity, t->name);
700 output ("const char *const mono_burg_term_string [] = {\n");
701 output ("\tNULL,\n");
702 for (l = term_list, i = 0; l; l = l->next) {
703 Term *t = (Term *)l->data;
704 output ("\t\"%s\",\n", t->name);
708 output ("const char * const mono_burg_rule_string [] = {\n");
709 output ("\tNULL,\n");
710 for (l = rule_list, i = 0; l; l = l->next) {
711 Rule *rule = (Rule *)l->data;
712 output ("\t\"%s: ", rule->lhs->name);
713 emit_tree_string (rule->tree);
718 n = g_list_length (rule_list);
719 sa = g_new0 (char *, n);
720 si = g_new0 (int, n);
722 /* compress the _nts array */
723 for (l = rule_list, i = 0, c = 0; l; l = l->next) {
724 Rule *rule = (Rule *)l->data;
725 char *s = compute_nonterms (rule->tree);
727 for (j = 0; j < c; j++)
728 if (!strcmp (sa [j], s))
733 output ("static const guint16 mono_burg_nts_%d [] = { %s0 };\n", c, s);
739 output ("const guint16 *const mono_burg_nts [] = {\n");
741 for (l = rule_list, i = 0; l; l = l->next) {
742 Rule *rule = (Rule *)l->data;
743 output ("\tmono_burg_nts_%d, ", si [i++]);
744 emit_rule_string (rule, "");
752 output ("typedef void (*MBEmitFunc) (MBTREE_TYPE *tree, MBCGEN_TYPE *s);\n\n");
754 output ("extern const char * const mono_burg_term_string [];\n");
755 output ("extern const char * const mono_burg_rule_string [];\n");
756 output ("extern const guint16 *const mono_burg_nts [];\n");
757 output ("extern MBEmitFunc const mono_burg_func [];\n");
759 output ("MBState *mono_burg_label (MBTREE_TYPE *tree, MBCOST_DATA *data);\n");
760 output ("int mono_burg_rule (MBState *state, int goal);\n");
761 output ("MBTREE_TYPE **mono_burg_kids (MBTREE_TYPE *tree, int rulenr, MBTREE_TYPE *kids []);\n");
766 static void check_reach (NonTerm *n);
769 mark_reached (Tree *tree)
771 if (tree->nonterm && !tree->nonterm->reached)
772 check_reach (tree->nonterm);
774 mark_reached (tree->left);
776 mark_reached (tree->right);
780 check_reach (NonTerm *n)
785 for (l = n->rules; l; l = l->next) {
786 Rule *rule = (Rule *)l->data;
787 mark_reached (rule->tree);
796 for (l = term_list; l; l = l->next) {
797 Term *term = (Term *)l->data;
798 if (term->arity == -1)
799 g_warning ("unused terminal \"%s\"",term->name);
802 check_reach (((NonTerm *)nonterm_list->data));
804 for (l = nonterm_list; l; l = l->next) {
805 NonTerm *n = (NonTerm *)l->data;
807 g_error ("unreachable nonterm \"%s\"", n->name);
815 "Usage is: monoburg -d file.h -s file.c [inputfile] \n");
820 warning_handler (const gchar *log_domain,
821 GLogLevelFlags log_level,
822 const gchar *message,
825 (void) fprintf ((FILE *) user_data, "** WARNING **: %s\n", message);
829 main (int argc, char *argv [])
832 char *deffile = NULL;
836 g_log_set_handler (NULL, G_LOG_LEVEL_WARNING, warning_handler, stderr);
838 for (i = 1; i < argc; i++){
839 if (argv [i][0] == '-'){
840 if (argv [i][1] == 'h') {
842 } else if (argv [i][1] == 'd') {
843 deffile = argv [++i];
844 } else if (argv [i][1] == 's') {
858 if (!(deffd = fopen (deffile, "w"))) {
859 perror ("cant open header output file");
863 output ("#ifndef _MONO_BURG_DEFS_\n");
864 output ("#define _MONO_BURG_DEFS_\n\n");
870 if (!(inputfd = fopen (infile, "r"))) {
871 perror ("cant open input file");
883 g_error ("no start symbol found");
891 output ("#endif /* _MONO_BURG_DEFS_ */\n");
897 if (!(cfd = fopen (cfile, "w"))) {
898 perror ("cant open c output file");
899 (void) remove (deffile);
906 output ("#include \"%s\"\n\n", deffile);
911 emit_emitter_func ();