2 * monoburg.c: an iburg like code generator generator
5 * Dietmar Maurer (dietmar@ximian.com)
7 * (C) 2001 Ximian, Inc.
15 extern void yyparse (void);
17 static GHashTable *term_hash;
18 static GList *term_list;
19 static GHashTable *nonterm_hash;
20 static GList *nonterm_list;
21 static GList *rule_list;
22 static GList *prefix_list;
26 GHashTable *definedvars;
30 static int dag_mode = 0;
31 static int predefined_terms = 0;
32 static int default_cost = 0;
34 static void output (char *fmt, ...)
39 vfprintf (outputfd, fmt, ap);
44 make_rule (char *id, Tree *tree)
46 Rule *rule = g_new0 (Rule, 1);
47 rule->lhs = nonterm (id);
54 rule_add (Rule *rule, char *code, char *cost, char *cfunc)
57 cost = g_strdup_printf ("%d", default_cost);
59 rule_list = g_list_append (rule_list, rule);
60 rule->cost = g_strdup (cost);
61 rule->cfunc = g_strdup (cfunc);
62 rule->code = g_strdup (code);
66 yyerror ("duplicated costs (constant costs and cost function)");
69 rule->cost = g_strdup_printf ("mono_burg_cost_%d (p, data)",
70 g_list_length (rule_list));
72 rule->cost = g_strdup_printf ("mono_burg_cost_%d (tree, data)",
73 g_list_length (rule_list));
77 rule->lhs->rules = g_list_append (rule->lhs->rules, rule);
80 rule->tree->op->rules = g_list_append (rule->tree->op->rules, rule);
82 rule->tree->nonterm->chain = g_list_append (rule->tree->nonterm->chain, rule);
86 create_rule (char *id, Tree *tree, char *code, char *cost, char *cfunc)
88 Rule *rule = make_rule (id, tree);
90 rule_add (rule, code, cost, cfunc);
94 create_tree (char *id, Tree *left, Tree *right)
96 int arity = (left != NULL) + (right != NULL);
98 Tree *tree = g_new0 (Tree, 1);
101 term = g_hash_table_lookup (term_hash, id);
103 /* try if id has termprefix */
106 for (pl = prefix_list; pl; pl = pl->next) {
107 char *pfx = (char *)pl->data;
108 if (!strncmp (pfx, id, strlen (pfx))) {
109 term = create_term (id, -1);
117 if (term->arity == -1)
120 if (term->arity != arity)
121 yyerror ("changed arity of terminal %s from %d to %d",
122 id, term->arity, arity);
128 tree->nonterm = nonterm (id);
135 check_term_num (char *key, Term *value, int num)
137 if (num != -1 && value->number == num)
138 yyerror ("duplicate terminal id \"%s\"", key);
142 create_term_prefix (char *id)
144 if (!predefined_terms)
145 yyerror ("%termprefix is only available with -p option");
147 prefix_list = g_list_prepend (prefix_list, g_strdup (id));
151 create_term (char *id, int num)
155 if (!predefined_terms && nonterm_list)
156 yyerror ("terminal definition after nonterminal definition");
159 yyerror ("invalid terminal number %d", num);
162 term_hash = g_hash_table_new (g_str_hash , g_str_equal);
164 g_hash_table_foreach (term_hash, (GHFunc) check_term_num, (gpointer) num);
166 term = g_new0 (Term, 1);
168 term->name = g_strdup (id);
172 term_list = g_list_append (term_list, term);
174 g_hash_table_insert (term_hash, term->name, term);
185 nonterm_hash = g_hash_table_new (g_str_hash , g_str_equal);
187 if ((nterm = g_hash_table_lookup (nonterm_hash, id)))
190 nterm = g_new0 (NonTerm, 1);
192 nterm->name = g_strdup (id);
193 nonterm_list = g_list_append (nonterm_list, nterm);
194 nterm->number = g_list_length (nonterm_list);
196 g_hash_table_insert (nonterm_hash, nterm->name, nterm);
202 start_nonterm (char *id)
204 static gboolean start_def;
207 yyerror ("start symbol redeclared");
214 emit_tree_string (Tree *tree)
217 output ("%s", tree->op->name);
218 if (tree->op->arity) {
220 emit_tree_string (tree->left);
223 emit_tree_string (tree->right);
228 output ("%s", tree->nonterm->name);
232 emit_rule_string (Rule *rule, char *fill)
234 output ("%s/* ", fill);
236 output ("%s: ", rule->lhs->name);
238 emit_tree_string (rule->tree);
246 GList *l = term_list;
250 Term *t = (Term *)l->data;
251 if (t->number == i) {
261 term_compare_func (Term *t1, Term *t2)
263 return t1->number - t2->number;
271 output ("#include <glib.h>\n");
274 output ("#ifndef MBTREE_TYPE\n#error MBTREE_TYPE undefined\n#endif\n");
275 output ("#ifndef MBTREE_OP\n#define MBTREE_OP(t) ((t)->op)\n#endif\n");
276 output ("#ifndef MBTREE_LEFT\n#define MBTREE_LEFT(t) ((t)->left)\n#endif\n");
277 output ("#ifndef MBTREE_RIGHT\n#define MBTREE_RIGHT(t) ((t)->right)\n#endif\n");
278 output ("#ifndef MBTREE_STATE\n#define MBTREE_STATE(t) ((t)->state)\n#endif\n");
279 output ("#ifndef MBCGEN_TYPE\n#define MBCGEN_TYPE int\n#endif\n");
280 output ("#ifndef MBALLOC_STATE\n#define MBALLOC_STATE g_new (MBState, 1)\n#endif\n");
281 output ("#ifndef MBCOST_DATA\n#define MBCOST_DATA gpointer\n#endif\n");
283 output ("#define MBMAXCOST 32768\n");
286 output ("#define MBCOND(x) if (!(x)) return MBMAXCOST;\n");
290 for (l = term_list; l; l = l->next) {
291 Term *t = (Term *)l->data;
293 t->number = next_term_num ();
295 term_list = g_list_sort (term_list, (GCompareFunc)term_compare_func);
297 for (l = term_list; l; l = l->next) {
298 Term *t = (Term *)l->data;
300 t->number = next_term_num ();
302 if (predefined_terms)
303 output ("#define MB_TERM_%s\t %s\n", t->name, t->name);
305 output ("#define MB_TERM_%s\t %d\n", t->name, t->number);
317 for (l = nonterm_list; l; l = l->next) {
318 NonTerm *n = (NonTerm *)l->data;
319 output ("#define MB_NTERM_%s\t%d\n", n->name, n->number);
321 output ("#define MB_MAX_NTERMS\t%d\n", g_list_length (nonterm_list));
331 output ("typedef struct _MBState MBState;\n");
332 output ("struct _MBState {\n");
333 output ("\tint\t\t op;\n");
336 output ("\tMBTREE_TYPE\t *tree;\n");
337 output ("\tgint32 reg1, reg2;\n");
340 output ("\tMBState\t\t*left, *right;\n");
341 output ("\tguint16\t\tcost[%d];\n", g_list_length (nonterm_list) + 1);
343 for (l = nonterm_list; l; l = l->next) {
344 NonTerm *n = (NonTerm *)l->data;
345 g_assert (g_list_length (n->rules) < 256);
346 i = g_list_length (n->rules);
350 output ("\tunsigned int\t rule_%s:%d;\n", n->name, j);
361 for (l = nonterm_list; l; l = l->next) {
362 NonTerm *n = (NonTerm *)l->data;
363 output ("const int mono_burg_decode_%s[] = {\n", n->name);
365 for (rl = n->rules; rl; rl = rl->next) {
366 Rule *rule = (Rule *)rl->data;
367 output ("\t%d,\n", g_list_index (rule_list, rule) + 1);
375 emit_tree_match (char *st, Tree *t)
378 int not_first = strcmp (st, "p->");
380 /* we can omit this check at the top level */
382 if (predefined_terms)
383 output ("\t\t\t%sop == %s /* %s */", st, t->op->name, t->op->name);
385 output ("\t\t\t%sop == %d /* %s */", st, t->op->number, t->op->name);
388 if (t->left && t->left->op) {
389 tn = g_strconcat (st, "left->", NULL);
393 emit_tree_match (tn, t->left);
397 if (t->right && t->right->op) {
398 tn = g_strconcat (st, "right->", NULL);
401 emit_tree_match (tn, t->right);
407 emit_rule_match (Rule *rule)
409 Tree *t = rule->tree;
411 if ((t->left && t->left->op) ||
412 (t->right && t->right->op)) {
413 output ("\t\tif (\n");
414 emit_tree_match ("p->", t);
415 output ("\n\t\t)\n");
420 emit_costs (char *st, Tree *t)
427 tn = g_strconcat (st, "left->", NULL);
428 emit_costs (tn, t->left);
433 tn = g_strconcat (st, "right->", NULL);
434 emit_costs (tn, t->right);
437 output ("%scost[MB_NTERM_%s] + ", st, t->nonterm->name);
441 emit_cond_assign (Rule *rule, char *cost, char *fill)
446 rc = g_strconcat ("c + ", cost, NULL);
451 output ("%sif (%s < p->cost[MB_NTERM_%s]) {\n", fill, rc, rule->lhs->name);
453 output ("%s\tp->cost[MB_NTERM_%s] = %s;\n", fill, rule->lhs->name, rc);
455 output ("%s\tp->rule_%s = %d;\n", fill, rule->lhs->name,
456 g_list_index (rule->lhs->rules, rule) + 1);
458 if (rule->lhs->chain)
459 output ("%s\tclosure_%s (p, %s);\n", fill, rule->lhs->name, rc);
461 output ("%s}\n", fill);
474 output ("static void\n");
475 output ("mono_burg_label_priv (MBTREE_TYPE *tree, MBCOST_DATA *data, MBState *p) {\n");
477 output ("static MBState *\n");
478 output ("mono_burg_label_priv (MBTREE_TYPE *tree, MBCOST_DATA *data) {\n");
481 output ("\tint arity;\n");
482 output ("\tint c;\n");
484 output ("\tMBState *p;\n");
485 output ("\tMBState *left = NULL, *right = NULL;\n\n");
487 output ("\tswitch (mono_burg_arity [MBTREE_OP(tree)]) {\n");
488 output ("\tcase 0:\n");
489 output ("\t\tbreak;\n");
490 output ("\tcase 1:\n");
492 output ("\t\tleft = MBALLOC_STATE;\n");
493 output ("\t\tmono_burg_label_priv (MBTREE_LEFT(tree), data, left);\n");
495 output ("\t\tleft = mono_burg_label_priv (MBTREE_LEFT(tree), data);\n");
496 output ("\t\tright = NULL;\n");
498 output ("\t\tbreak;\n");
499 output ("\tcase 2:\n");
501 output ("\t\tleft = MBALLOC_STATE;\n");
502 output ("\t\tmono_burg_label_priv (MBTREE_LEFT(tree), data, left);\n");
503 output ("\t\tright = MBALLOC_STATE;\n");
504 output ("\t\tmono_burg_label_priv (MBTREE_RIGHT(tree), data, right);\n");
506 output ("\t\tleft = mono_burg_label_priv (MBTREE_LEFT(tree), data);\n");
507 output ("\t\tright = mono_burg_label_priv (MBTREE_RIGHT(tree), data);\n");
511 output ("\tarity = (left != NULL) + (right != NULL);\n");
512 output ("\tg_assert (arity == mono_burg_arity [MBTREE_OP(tree)]);\n\n");
515 output ("\tp = MBALLOC_STATE;\n");
517 output ("\tmemset (p, 0, sizeof (MBState));\n");
518 output ("\tp->op = MBTREE_OP(tree);\n");
519 output ("\tp->left = left;\n");
520 output ("\tp->right = right;\n");
523 output ("\tp->tree = tree;\n");
525 for (l = nonterm_list, i = 0; l; l = l->next) {
526 output ("\tp->cost [%d] = 32767;\n", ++i);
530 output ("\tswitch (MBTREE_OP(tree)) {\n");
531 for (l = term_list; l; l = l->next) {
532 Term *t = (Term *)l->data;
535 if (predefined_terms)
536 output ("\tcase %s: /* %s */\n", t->name, t->name);
538 output ("\tcase %d: /* %s */\n", t->number, t->name);
540 for (rl = t->rules; rl; rl = rl->next) {
541 Rule *rule = (Rule *)rl->data;
542 Tree *t = rule->tree;
544 emit_rule_string (rule, "\t\t");
546 emit_rule_match (rule);
550 output ("\t\t\tc = ");
554 output ("%s;\n", rule->cost);
556 emit_cond_assign (rule, NULL, "\t\t\t");
561 output ("\t\tbreak;\n");
564 output ("\tdefault:\n");
565 output ("#ifdef MBGET_OP_NAME\n");
566 output ("\t\tg_error (\"unknown operator: %%s\", MBGET_OP_NAME(MBTREE_OP(tree)));\n");
568 output ("\t\tg_error (\"unknown operator: 0x%%04x\", MBTREE_OP(tree));\n");
573 output ("\tMBTREE_STATE(tree) = p;\n");
574 output ("\treturn p;\n");
579 output ("MBState *\n");
580 output ("mono_burg_label (MBTREE_TYPE *tree, MBCOST_DATA *data)\n{\n");
582 output ("\tMBState *p = MBALLOC_STATE;\n");
583 output ("\tmono_burg_label_priv (tree, data, p);\n");
585 output ("\tMBState *p = mono_burg_label_priv (tree, data);\n");
587 output ("\treturn p->rule_%s ? p : NULL;\n", ((NonTerm *)nonterm_list->data)->name);
592 compute_kids (char *ts, Tree *tree, int *n)
597 return g_strdup_printf ("\t\tkids[%d] = %s;\n", (*n)++, ts);
598 } else if (tree->op && tree->op->arity) {
602 res = compute_kids (g_strdup_printf ("%s->left", ts),
604 if (tree->op->arity == 2)
605 res2 = compute_kids (g_strdup_printf ("%s->right", ts),
608 res = compute_kids (g_strdup_printf ("MBTREE_LEFT(%s)", ts),
610 if (tree->op->arity == 2)
611 res2 = compute_kids (g_strdup_printf ("MBTREE_RIGHT(%s)", ts),
615 return g_strconcat (res, res2, NULL);
628 output ("mono_burg_rule (MBState *state, int goal)\n{\n");
630 output ("\tg_return_val_if_fail (state != NULL, 0);\n");
631 output ("\tg_return_val_if_fail (goal > 0, 0);\n\n");
633 output ("\tswitch (goal) {\n");
635 for (nl = nonterm_list; nl; nl = nl->next) {
636 NonTerm *n = (NonTerm *)nl->data;
637 output ("\tcase MB_NTERM_%s:\n", n->name);
638 output ("\t\treturn mono_burg_decode_%s [state->rule_%s];\n",
642 output ("\tdefault: g_assert_not_reached ();\n");
644 output ("\treturn 0;\n");
649 output ("MBState **\n");
650 output ("mono_burg_kids (MBState *state, int rulenr, MBState *kids [])\n{\n");
651 output ("\tg_return_val_if_fail (state != NULL, NULL);\n");
652 output ("\tg_return_val_if_fail (kids != NULL, NULL);\n\n");
655 output ("MBTREE_TYPE **\n");
656 output ("mono_burg_kids (MBTREE_TYPE *tree, int rulenr, MBTREE_TYPE *kids [])\n{\n");
657 output ("\tg_return_val_if_fail (tree != NULL, NULL);\n");
658 output ("\tg_return_val_if_fail (kids != NULL, NULL);\n\n");
661 output ("\tswitch (rulenr) {\n");
663 n = g_list_length (rule_list);
664 sa = g_new0 (char *, n);
665 si = g_new0 (int, n);
667 /* compress the case statement */
668 for (l = rule_list, i = 0, c = 0; l; l = l->next) {
669 Rule *rule = (Rule *)l->data;
674 k = compute_kids ("state", rule->tree, &kn);
676 k = compute_kids ("tree", rule->tree, &kn);
678 for (j = 0; j < c; j++)
679 if (!strcmp (sa [j], k))
687 for (i = 0; i < c; i++) {
688 for (l = rule_list, j = 0; l; l = l->next, j++)
690 output ("\tcase %d:\n", j + 1);
691 output ("%s", sa [i]);
692 output ("\t\tbreak;\n");
695 output ("\tdefault:\n\t\tg_assert_not_reached ();\n");
697 output ("\treturn kids;\n");
707 GHashTable *cache = g_hash_table_new (g_str_hash, g_str_equal);
709 for (l = rule_list, i = 0; l; l = l->next) {
710 Rule *rule = (Rule *)l->data;
713 if ((rulen = GPOINTER_TO_INT (g_hash_table_lookup (cache, rule->code)))) {
714 emit_rule_string (rule, "");
715 output ("#define mono_burg_emit_%d mono_burg_emit_%d\n\n", i, rulen);
719 output ("static void ");
721 emit_rule_string (rule, "");
724 output ("mono_burg_emit_%d (MBState *state, MBTREE_TYPE *tree, MBCGEN_TYPE *s)\n", i);
726 output ("mono_burg_emit_%d (MBTREE_TYPE *tree, MBCGEN_TYPE *s)\n", i);
728 output ("%s\n", rule->code);
730 g_hash_table_insert (cache, rule->code, GINT_TO_POINTER (i));
735 g_hash_table_destroy (cache);
737 output ("MBEmitFunc const mono_burg_func [] = {\n");
738 output ("\tNULL,\n");
739 for (l = rule_list, i = 0; l; l = l->next) {
740 Rule *rule = (Rule *)l->data;
742 output ("\tmono_burg_emit_%d,\n", i);
744 output ("\tNULL,\n");
756 for (l = rule_list, i = 0; l; l = l->next) {
757 Rule *rule = (Rule *)l->data;
760 output ("inline static guint16\n");
762 emit_rule_string (rule, "");
765 output ("mono_burg_cost_%d (MBState *state, MBCOST_DATA *data)\n", i + 1);
767 output ("mono_burg_cost_%d (MBTREE_TYPE *tree, MBCOST_DATA *data)\n", i + 1);
769 output ("%s\n", rule->cfunc);
781 for (l = nonterm_list; l; l = l->next) {
782 NonTerm *n = (NonTerm *)l->data;
785 output ("static void closure_%s (MBState *p, int c);\n", n->name);
790 for (l = nonterm_list; l; l = l->next) {
791 NonTerm *n = (NonTerm *)l->data;
794 output ("static void\n");
795 output ("closure_%s (MBState *p, int c)\n{\n", n->name);
796 for (rl = n->chain; rl; rl = rl->next) {
797 Rule *rule = (Rule *)rl->data;
799 emit_rule_string (rule, "\t");
800 emit_cond_assign (rule, rule->cost, "\t");
808 compute_nonterms (Tree *tree)
814 return g_strdup_printf ("MB_NTERM_%s, ", tree->nonterm->name);
816 return g_strconcat (compute_nonterms (tree->left),
817 compute_nonterms (tree->right), NULL);
828 if (predefined_terms) {
829 output ("guint8 mono_burg_arity [MBMAX_OPCODES];\n");
831 output ("void\nmono_burg_init (void)\n{\n");
833 for (l = term_list, i = 0; l; l = l->next) {
834 Term *t = (Term *)l->data;
836 output ("\tmono_burg_arity [%s] = %d; /* %s */\n", t->name, t->arity, t->name);
843 output ("const guint8 mono_burg_arity [] = {\n");
844 for (l = term_list, i = 0; l; l = l->next) {
845 Term *t = (Term *)l->data;
847 while (i < t->number) {
852 output ("\t%d, /* %s */\n", t->arity, t->name);
858 output ("const char *const mono_burg_term_string [] = {\n");
859 output ("\tNULL,\n");
860 for (l = term_list, i = 0; l; l = l->next) {
861 Term *t = (Term *)l->data;
862 output ("\t\"%s\",\n", t->name);
867 output ("const char * const mono_burg_rule_string [] = {\n");
868 output ("\tNULL,\n");
869 for (l = rule_list, i = 0; l; l = l->next) {
870 Rule *rule = (Rule *)l->data;
871 output ("\t\"%s: ", rule->lhs->name);
872 emit_tree_string (rule->tree);
877 n = g_list_length (rule_list);
878 sa = g_new0 (char *, n);
879 si = g_new0 (int, n);
881 /* compress the _nts array */
882 for (l = rule_list, i = 0, c = 0; l; l = l->next) {
883 Rule *rule = (Rule *)l->data;
884 char *s = compute_nonterms (rule->tree);
886 for (j = 0; j < c; j++)
887 if (!strcmp (sa [j], s))
892 output ("static const guint16 mono_burg_nts_%d [] = { %s0 };\n", c, s);
898 output ("const guint16 *const mono_burg_nts [] = {\n");
900 for (l = rule_list, i = 0; l; l = l->next) {
901 Rule *rule = (Rule *)l->data;
902 output ("\tmono_burg_nts_%d, ", si [i++]);
903 emit_rule_string (rule, "");
912 output ("typedef void (*MBEmitFunc) (MBState *state, MBTREE_TYPE *tree, MBCGEN_TYPE *s);\n\n");
914 output ("typedef void (*MBEmitFunc) (MBTREE_TYPE *tree, MBCGEN_TYPE *s);\n\n");
916 output ("extern const char * const mono_burg_term_string [];\n");
917 output ("extern const char * const mono_burg_rule_string [];\n");
918 output ("extern const guint16 *const mono_burg_nts [];\n");
919 output ("extern MBEmitFunc const mono_burg_func [];\n");
921 output ("MBState *mono_burg_label (MBTREE_TYPE *tree, MBCOST_DATA *data);\n");
922 output ("int mono_burg_rule (MBState *state, int goal);\n");
925 output ("MBState **mono_burg_kids (MBState *state, int rulenr, MBState *kids []);\n");
927 output ("MBTREE_TYPE **mono_burg_kids (MBTREE_TYPE *tree, int rulenr, MBTREE_TYPE *kids []);\n");
929 output ("extern void mono_burg_init (void);\n");
933 static void check_reach (NonTerm *n);
936 mark_reached (Tree *tree)
938 if (tree->nonterm && !tree->nonterm->reached)
939 check_reach (tree->nonterm);
941 mark_reached (tree->left);
943 mark_reached (tree->right);
947 check_reach (NonTerm *n)
952 for (l = n->rules; l; l = l->next) {
953 Rule *rule = (Rule *)l->data;
954 mark_reached (rule->tree);
963 for (l = term_list; l; l = l->next) {
964 Term *term = (Term *)l->data;
965 if (term->arity == -1)
966 g_warning ("unused terminal \"%s\"",term->name);
969 check_reach (((NonTerm *)nonterm_list->data));
971 for (l = nonterm_list; l; l = l->next) {
972 NonTerm *n = (NonTerm *)l->data;
974 g_warning ("unreachable nonterm \"%s\"", n->name);
982 "Usage is: monoburg -d file.h -s file.c [inputfile] \n");
987 warning_handler (const gchar *log_domain,
988 GLogLevelFlags log_level,
989 const gchar *message,
992 (void) fprintf ((FILE *) user_data, "** WARNING **: %s\n", message);
996 main (int argc, char *argv [])
999 char *deffile = NULL;
1000 GList *infiles = NULL;
1003 definedvars = g_hash_table_new (g_str_hash, g_str_equal);
1004 g_log_set_handler (NULL, G_LOG_LEVEL_WARNING, warning_handler, stderr);
1006 for (i = 1; i < argc; i++){
1007 if (argv [i][0] == '-'){
1008 if (argv [i][1] == 'h') {
1010 } else if (argv [i][1] == 'e') {
1012 } else if (argv [i][1] == 'p') {
1013 predefined_terms = 1;
1014 } else if (argv [i][1] == 'd') {
1015 deffile = argv [++i];
1016 } else if (argv [i][1] == 's') {
1018 } else if (argv [i][1] == 'c') {
1019 default_cost = atoi (argv [++i]);
1020 } else if (argv [i][1] == 'D') {
1021 g_hash_table_insert (definedvars, &argv [i][2],
1022 GUINT_TO_POINTER (1));
1027 infiles = g_list_append (infiles, argv [i]);
1032 if (!(deffd = fopen (deffile, "w"))) {
1033 perror ("cant open header output file");
1037 output ("#ifndef _MONO_BURG_DEFS_\n");
1038 output ("#define _MONO_BURG_DEFS_\n\n");
1046 char *infile = (char *)l->data;
1047 if (!(inputfd = fopen (infile, "r"))) {
1048 perror ("cant open input file");
1067 g_error ("no start symbol found");
1075 output ("#endif /* _MONO_BURG_DEFS_ */\n");
1081 if (!(cfd = fopen (cfile, "w"))) {
1082 perror ("cant open c output file");
1083 (void) remove (deffile);
1090 output ("#include \"%s\"\n\n", deffile);
1095 emit_emitter_func ();