2 * monoburg.c: an iburg like code generator generator
5 * Dietmar Maurer (dietmar@ximian.com)
7 * (C) 2001 Ximian, Inc.
15 extern void yyparse (void);
17 static GHashTable *term_hash;
18 static GList *term_list;
19 static GHashTable *nonterm_hash;
20 static GList *nonterm_list;
21 static GList *rule_list;
22 static GList *prefix_list;
29 static int dag_mode = 0;
30 static int predefined_terms = 0;
31 static int default_cost = 0;
33 static void output (char *fmt, ...)
38 vfprintf (outputfd, fmt, ap);
43 create_rule (char *id, Tree *tree, char *code, char *cost, char *cfunc)
45 Rule *rule = g_new0 (Rule, 1);
48 cost = g_strdup_printf ("%d", default_cost);
50 rule->lhs = nonterm (id);
52 rule_list = g_list_append (rule_list, rule);
53 rule->cost = g_strdup (cost);
54 rule->cfunc = g_strdup (cfunc);
55 rule->code = g_strdup (code);
59 yyerror ("duplicated costs (constant costs and cost function)");
62 rule->cost = g_strdup_printf ("mono_burg_cost_%d (p, data)",
63 g_list_length (rule_list));
65 rule->cost = g_strdup_printf ("mono_burg_cost_%d (tree, data)",
66 g_list_length (rule_list));
70 rule->lhs->rules = g_list_append (rule->lhs->rules, rule);
73 tree->op->rules = g_list_append (tree->op->rules, rule);
75 tree->nonterm->chain = g_list_append (tree->nonterm->chain, rule);
79 create_tree (char *id, Tree *left, Tree *right)
81 int arity = (left != NULL) + (right != NULL);
83 Tree *tree = g_new0 (Tree, 1);
86 term = g_hash_table_lookup (term_hash, id);
88 // try if id has termprefix
91 for (pl = prefix_list; pl; pl = pl->next) {
92 char *pfx = (char *)pl->data;
93 if (!strncmp (pfx, id, strlen (pfx))) {
94 term = create_term (id, -1);
102 if (term->arity == -1)
105 if (term->arity != arity)
106 yyerror ("changed arity of terminal %s from %d to %d",
107 id, term->arity, arity);
113 tree->nonterm = nonterm (id);
120 check_term_num (char *key, Term *value, int num)
122 if (num != -1 && value->number == num)
123 yyerror ("duplicate terminal id \"%s\"", key);
127 create_term_prefix (char *id)
129 if (!predefined_terms)
130 yyerror ("%termprefix is only available with -p option");
132 prefix_list = g_list_prepend (prefix_list, g_strdup (id));
136 create_term (char *id, int num)
140 if (!predefined_terms && nonterm_list)
141 yyerror ("terminal definition after nonterminal definition");
144 yyerror ("invalid terminal number %d", num);
147 term_hash = g_hash_table_new (g_str_hash , g_str_equal);
149 g_hash_table_foreach (term_hash, (GHFunc) check_term_num, (gpointer) num);
151 term = g_new0 (Term, 1);
153 term->name = g_strdup (id);
157 term_list = g_list_append (term_list, term);
159 g_hash_table_insert (term_hash, term->name, term);
170 nonterm_hash = g_hash_table_new (g_str_hash , g_str_equal);
172 if ((nterm = g_hash_table_lookup (nonterm_hash, id)))
175 nterm = g_new0 (NonTerm, 1);
177 nterm->name = g_strdup (id);
178 nonterm_list = g_list_append (nonterm_list, nterm);
179 nterm->number = g_list_length (nonterm_list);
181 g_hash_table_insert (nonterm_hash, nterm->name, nterm);
187 start_nonterm (char *id)
189 static gboolean start_def;
192 yyerror ("start symbol redeclared");
199 emit_tree_string (Tree *tree)
202 output ("%s", tree->op->name);
203 if (tree->op->arity) {
205 emit_tree_string (tree->left);
208 emit_tree_string (tree->right);
213 output ("%s", tree->nonterm->name);
217 emit_rule_string (Rule *rule, char *fill)
219 output ("%s/* ", fill);
221 output ("%s: ", rule->lhs->name);
223 emit_tree_string (rule->tree);
231 GList *l = term_list;
235 Term *t = (Term *)l->data;
236 if (t->number == i) {
246 term_compare_func (Term *t1, Term *t2)
248 return t1->number - t2->number;
256 output ("#include <glib.h>\n");
259 output ("#ifndef MBTREE_TYPE\n#error MBTREE_TYPE undefined\n#endif\n");
260 output ("#ifndef MBTREE_OP\n#define MBTREE_OP(t) ((t)->op)\n#endif\n");
261 output ("#ifndef MBTREE_LEFT\n#define MBTREE_LEFT(t) ((t)->left)\n#endif\n");
262 output ("#ifndef MBTREE_RIGHT\n#define MBTREE_RIGHT(t) ((t)->right)\n#endif\n");
263 output ("#ifndef MBTREE_STATE\n#define MBTREE_STATE(t) ((t)->state)\n#endif\n");
264 output ("#ifndef MBCGEN_TYPE\n#define MBCGEN_TYPE int\n#endif\n");
265 output ("#ifndef MBALLOC_STATE\n#define MBALLOC_STATE g_new (MBState, 1)\n#endif\n");
266 output ("#ifndef MBCOST_DATA\n#define MBCOST_DATA gpointer\n#endif\n");
268 output ("#define MBMAXCOST 32768\n");
271 output ("#define MBCOND(x) if (!(x)) return MBMAXCOST;\n");
275 for (l = term_list; l; l = l->next) {
276 Term *t = (Term *)l->data;
278 t->number = next_term_num ();
280 term_list = g_list_sort (term_list, (GCompareFunc)term_compare_func);
282 for (l = term_list; l; l = l->next) {
283 Term *t = (Term *)l->data;
285 t->number = next_term_num ();
287 if (predefined_terms)
288 output ("#define MB_TERM_%s\t %s\n", t->name, t->name);
290 output ("#define MB_TERM_%s\t %d\n", t->name, t->number);
302 for (l = nonterm_list; l; l = l->next) {
303 NonTerm *n = (NonTerm *)l->data;
304 output ("#define MB_NTERM_%s\t%d\n", n->name, n->number);
306 output ("#define MB_MAX_NTERMS\t%d\n", g_list_length (nonterm_list));
316 output ("typedef struct _MBState MBState;\n");
317 output ("struct _MBState {\n");
318 output ("\tint\t\t op;\n");
321 output ("\tMBTREE_TYPE\t *tree;\n");
322 output ("\tgint8 reg1, reg2, reg3;\n");
323 output ("\tunsigned spilled:1;\n");
326 output ("\tMBState\t\t*left, *right;\n");
327 output ("\tguint16\t\tcost[%d];\n", g_list_length (nonterm_list) + 1);
329 for (l = nonterm_list; l; l = l->next) {
330 NonTerm *n = (NonTerm *)l->data;
331 g_assert (g_list_length (n->rules) < 256);
332 i = g_list_length (n->rules);
336 output ("\tunsigned int\t rule_%s:%d;\n", n->name, j);
347 for (l = nonterm_list; l; l = l->next) {
348 NonTerm *n = (NonTerm *)l->data;
349 output ("const int mono_burg_decode_%s[] = {\n", n->name);
351 for (rl = n->rules; rl; rl = rl->next) {
352 Rule *rule = (Rule *)rl->data;
353 output ("\t%d,\n", g_list_index (rule_list, rule) + 1);
361 emit_tree_match (char *st, Tree *t)
365 if (predefined_terms)
366 output ("\t\t\t%sop == %s /* %s */", st, t->op->name, t->op->name);
368 output ("\t\t\t%sop == %d /* %s */", st, t->op->number, t->op->name);
370 if (t->left && t->left->op) {
371 tn = g_strconcat (st, "left->", NULL);
373 emit_tree_match (tn, t->left);
377 if (t->right && t->right->op) {
378 tn = g_strconcat (st, "right->", NULL);
380 emit_tree_match (tn, t->right);
386 emit_rule_match (Rule *rule)
388 Tree *t = rule->tree;
390 if ((t->left && t->left->op) ||
391 (t->right && t->right->op)) {
392 output ("\t\tif (\n");
393 emit_tree_match ("p->", t);
394 output ("\n\t\t)\n");
399 emit_costs (char *st, Tree *t)
406 tn = g_strconcat (st, "left->", NULL);
407 emit_costs (tn, t->left);
412 tn = g_strconcat (st, "right->", NULL);
413 emit_costs (tn, t->right);
416 output ("%scost[MB_NTERM_%s] + ", st, t->nonterm->name);
420 emit_cond_assign (Rule *rule, char *cost, char *fill)
425 rc = g_strconcat ("c + ", cost, NULL);
430 output ("%sif (%s < p->cost[MB_NTERM_%s]) {\n", fill, rc, rule->lhs->name);
432 output ("%s\tp->cost[MB_NTERM_%s] = %s;\n", fill, rule->lhs->name, rc);
434 output ("%s\tp->rule_%s = %d;\n", fill, rule->lhs->name,
435 g_list_index (rule->lhs->rules, rule) + 1);
437 if (rule->lhs->chain)
438 output ("%s\tclosure_%s (p, %s);\n", fill, rule->lhs->name, rc);
440 output ("%s}\n", fill);
453 output ("static void\n");
454 output ("mono_burg_label_priv (MBTREE_TYPE *tree, MBCOST_DATA *data, MBState *p) {\n");
456 output ("static MBState *\n");
457 output ("mono_burg_label_priv (MBTREE_TYPE *tree, MBCOST_DATA *data) {\n");
460 output ("\tint arity;\n");
461 output ("\tint c;\n");
463 output ("\tMBState *p;\n");
464 output ("\tMBState *left = NULL, *right = NULL;\n\n");
466 output ("\tswitch (mono_burg_arity [MBTREE_OP(tree)]) {\n");
467 output ("\tcase 0:\n");
468 output ("\t\tbreak;\n");
469 output ("\tcase 1:\n");
471 output ("\t\tleft = MBALLOC_STATE;\n");
472 output ("\t\tmono_burg_label_priv (MBTREE_LEFT(tree), data, left);\n");
474 output ("\t\tleft = mono_burg_label_priv (MBTREE_LEFT(tree), data);\n");
475 output ("\t\tright = NULL;\n");
477 output ("\t\tbreak;\n");
478 output ("\tcase 2:\n");
480 output ("\t\tleft = MBALLOC_STATE;\n");
481 output ("\t\tmono_burg_label_priv (MBTREE_LEFT(tree), data, left);\n");
482 output ("\t\tright = MBALLOC_STATE;\n");
483 output ("\t\tmono_burg_label_priv (MBTREE_RIGHT(tree), data, right);\n");
485 output ("\t\tleft = mono_burg_label_priv (MBTREE_LEFT(tree), data);\n");
486 output ("\t\tright = mono_burg_label_priv (MBTREE_RIGHT(tree), data);\n");
490 output ("\tarity = (left != NULL) + (right != NULL);\n");
491 output ("\tg_assert (arity == mono_burg_arity [MBTREE_OP(tree)]);\n\n");
494 output ("\tp = MBALLOC_STATE;\n");
496 output ("\tmemset (p, 0, sizeof (MBState));\n");
497 output ("\tp->op = MBTREE_OP(tree);\n");
498 output ("\tp->left = left;\n");
499 output ("\tp->right = right;\n");
502 output ("\tp->tree = tree;\n");
504 for (l = nonterm_list, i = 0; l; l = l->next) {
505 output ("\tp->cost [%d] = 32767;\n", ++i);
509 output ("\tswitch (MBTREE_OP(tree)) {\n");
510 for (l = term_list; l; l = l->next) {
511 Term *t = (Term *)l->data;
514 if (predefined_terms)
515 output ("\tcase %s: /* %s */\n", t->name, t->name);
517 output ("\tcase %d: /* %s */\n", t->number, t->name);
519 for (rl = t->rules; rl; rl = rl->next) {
520 Rule *rule = (Rule *)rl->data;
521 Tree *t = rule->tree;
523 emit_rule_string (rule, "\t\t");
525 emit_rule_match (rule);
529 output ("\t\t\tc = ");
533 output ("%s;\n", rule->cost);
535 emit_cond_assign (rule, NULL, "\t\t\t");
540 output ("\t\tbreak;\n");
543 output ("\tdefault:\n");
544 output ("\t\tg_error (\"unknown operator: 0x%%04x\", MBTREE_OP(tree));\n");
548 output ("\tMBTREE_STATE(tree) = p;\n");
549 output ("\treturn p;\n");
554 output ("MBState *\n");
555 output ("mono_burg_label (MBTREE_TYPE *tree, MBCOST_DATA *data)\n{\n");
557 output ("\tMBState *p = MBALLOC_STATE;\n");
558 output ("\tmono_burg_label_priv (tree, data, p);\n");
560 output ("\tMBState *p = mono_burg_label_priv (tree, data);\n");
562 output ("\treturn p->rule_%s ? p : NULL;\n", ((NonTerm *)nonterm_list->data)->name);
567 compute_kids (char *ts, Tree *tree, int *n)
572 return g_strdup_printf ("\t\tkids[%d] = %s;\n", (*n)++, ts);
573 } else if (tree->op && tree->op->arity) {
577 res = compute_kids (g_strdup_printf ("%s->left", ts),
579 if (tree->op->arity == 2)
580 res2 = compute_kids (g_strdup_printf ("%s->right", ts),
583 res = compute_kids (g_strdup_printf ("MBTREE_LEFT(%s)", ts),
585 if (tree->op->arity == 2)
586 res2 = compute_kids (g_strdup_printf ("MBTREE_RIGHT(%s)", ts),
590 return g_strconcat (res, res2, NULL);
603 output ("mono_burg_rule (MBState *state, int goal)\n{\n");
605 output ("\tg_return_val_if_fail (state != NULL, 0);\n");
606 output ("\tg_return_val_if_fail (goal > 0, 0);\n\n");
608 output ("\tswitch (goal) {\n");
610 for (nl = nonterm_list; nl; nl = nl->next) {
611 NonTerm *n = (NonTerm *)nl->data;
612 output ("\tcase MB_NTERM_%s:\n", n->name);
613 output ("\t\treturn mono_burg_decode_%s [state->rule_%s];\n",
617 output ("\tdefault: g_assert_not_reached ();\n");
619 output ("\treturn 0;\n");
624 output ("MBState **\n");
625 output ("mono_burg_kids (MBState *state, int rulenr, MBState *kids [])\n{\n");
626 output ("\tg_return_val_if_fail (state != NULL, NULL);\n");
627 output ("\tg_return_val_if_fail (kids != NULL, NULL);\n\n");
630 output ("MBTREE_TYPE **\n");
631 output ("mono_burg_kids (MBTREE_TYPE *tree, int rulenr, MBTREE_TYPE *kids [])\n{\n");
632 output ("\tg_return_val_if_fail (tree != NULL, NULL);\n");
633 output ("\tg_return_val_if_fail (kids != NULL, NULL);\n\n");
636 output ("\tswitch (rulenr) {\n");
638 n = g_list_length (rule_list);
639 sa = g_new0 (char *, n);
640 si = g_new0 (int, n);
642 /* compress the case statement */
643 for (l = rule_list, i = 0, c = 0; l; l = l->next) {
644 Rule *rule = (Rule *)l->data;
649 k = compute_kids ("state", rule->tree, &kn);
651 k = compute_kids ("tree", rule->tree, &kn);
653 for (j = 0; j < c; j++)
654 if (!strcmp (sa [j], k))
662 for (i = 0; i < c; i++) {
663 for (l = rule_list, j = 0; l; l = l->next, j++)
665 output ("\tcase %d:\n", j + 1);
666 output ("%s", sa [i]);
667 output ("\t\tbreak;\n");
670 output ("\tdefault:\n\t\tg_assert_not_reached ();\n");
672 output ("\treturn kids;\n");
683 for (l = rule_list, i = 0; l; l = l->next) {
684 Rule *rule = (Rule *)l->data;
687 output ("static void ");
689 emit_rule_string (rule, "");
692 output ("mono_burg_emit_%d (MBState *state, MBTREE_TYPE *tree, MBCGEN_TYPE *s)\n", i);
694 output ("mono_burg_emit_%d (MBTREE_TYPE *tree, MBCGEN_TYPE *s)\n", i);
696 output ("%s\n", rule->code);
702 output ("MBEmitFunc const mono_burg_func [] = {\n");
703 output ("\tNULL,\n");
704 for (l = rule_list, i = 0; l; l = l->next) {
705 Rule *rule = (Rule *)l->data;
707 output ("\tmono_burg_emit_%d,\n", i);
709 output ("\tNULL,\n");
721 for (l = rule_list, i = 0; l; l = l->next) {
722 Rule *rule = (Rule *)l->data;
725 output ("inline static guint16\n");
727 emit_rule_string (rule, "");
730 output ("mono_burg_cost_%d (MBState *state, MBCOST_DATA *data)\n", i + 1);
732 output ("mono_burg_cost_%d (MBTREE_TYPE *tree, MBCOST_DATA *data)\n", i + 1);
734 output ("%s\n", rule->cfunc);
746 for (l = nonterm_list; l; l = l->next) {
747 NonTerm *n = (NonTerm *)l->data;
750 output ("static void closure_%s (MBState *p, int c);\n", n->name);
755 for (l = nonterm_list; l; l = l->next) {
756 NonTerm *n = (NonTerm *)l->data;
759 output ("static void\n");
760 output ("closure_%s (MBState *p, int c)\n{\n", n->name);
761 for (rl = n->chain; rl; rl = rl->next) {
762 Rule *rule = (Rule *)rl->data;
764 emit_rule_string (rule, "\t");
765 emit_cond_assign (rule, rule->cost, "\t");
773 compute_nonterms (Tree *tree)
779 return g_strdup_printf ("MB_NTERM_%s, ", tree->nonterm->name);
781 return g_strconcat (compute_nonterms (tree->left),
782 compute_nonterms (tree->right), NULL);
793 if (predefined_terms) {
794 output ("guint8 mono_burg_arity [MBMAX_OPCODES];\n");
796 output ("void\nmono_burg_init (void)\n{\n");
798 for (l = term_list, i = 0; l; l = l->next) {
799 Term *t = (Term *)l->data;
801 output ("\tmono_burg_arity [%s] = %d; /* %s */\n", t->name, t->arity, t->name);
808 output ("const guint8 mono_burg_arity [] = {\n");
809 for (l = term_list, i = 0; l; l = l->next) {
810 Term *t = (Term *)l->data;
812 while (i < t->number) {
817 output ("\t%d, /* %s */\n", t->arity, t->name);
823 output ("const char *const mono_burg_term_string [] = {\n");
824 output ("\tNULL,\n");
825 for (l = term_list, i = 0; l; l = l->next) {
826 Term *t = (Term *)l->data;
827 output ("\t\"%s\",\n", t->name);
832 output ("const char * const mono_burg_rule_string [] = {\n");
833 output ("\tNULL,\n");
834 for (l = rule_list, i = 0; l; l = l->next) {
835 Rule *rule = (Rule *)l->data;
836 output ("\t\"%s: ", rule->lhs->name);
837 emit_tree_string (rule->tree);
842 n = g_list_length (rule_list);
843 sa = g_new0 (char *, n);
844 si = g_new0 (int, n);
846 /* compress the _nts array */
847 for (l = rule_list, i = 0, c = 0; l; l = l->next) {
848 Rule *rule = (Rule *)l->data;
849 char *s = compute_nonterms (rule->tree);
851 for (j = 0; j < c; j++)
852 if (!strcmp (sa [j], s))
857 output ("static const guint16 mono_burg_nts_%d [] = { %s0 };\n", c, s);
863 output ("const guint16 *const mono_burg_nts [] = {\n");
865 for (l = rule_list, i = 0; l; l = l->next) {
866 Rule *rule = (Rule *)l->data;
867 output ("\tmono_burg_nts_%d, ", si [i++]);
868 emit_rule_string (rule, "");
877 output ("typedef void (*MBEmitFunc) (MBState *state, MBTREE_TYPE *tree, MBCGEN_TYPE *s);\n\n");
879 output ("typedef void (*MBEmitFunc) (MBTREE_TYPE *tree, MBCGEN_TYPE *s);\n\n");
881 output ("extern const char * const mono_burg_term_string [];\n");
882 output ("extern const char * const mono_burg_rule_string [];\n");
883 output ("extern const guint16 *const mono_burg_nts [];\n");
884 output ("extern MBEmitFunc const mono_burg_func [];\n");
886 output ("MBState *mono_burg_label (MBTREE_TYPE *tree, MBCOST_DATA *data);\n");
887 output ("int mono_burg_rule (MBState *state, int goal);\n");
890 output ("MBState **mono_burg_kids (MBState *state, int rulenr, MBState *kids []);\n");
892 output ("MBTREE_TYPE **mono_burg_kids (MBTREE_TYPE *tree, int rulenr, MBTREE_TYPE *kids []);\n");
894 output ("extern void mono_burg_init (void);\n");
898 static void check_reach (NonTerm *n);
901 mark_reached (Tree *tree)
903 if (tree->nonterm && !tree->nonterm->reached)
904 check_reach (tree->nonterm);
906 mark_reached (tree->left);
908 mark_reached (tree->right);
912 check_reach (NonTerm *n)
917 for (l = n->rules; l; l = l->next) {
918 Rule *rule = (Rule *)l->data;
919 mark_reached (rule->tree);
928 for (l = term_list; l; l = l->next) {
929 Term *term = (Term *)l->data;
930 if (term->arity == -1)
931 g_warning ("unused terminal \"%s\"",term->name);
934 check_reach (((NonTerm *)nonterm_list->data));
936 for (l = nonterm_list; l; l = l->next) {
937 NonTerm *n = (NonTerm *)l->data;
939 g_warning ("unreachable nonterm \"%s\"", n->name);
947 "Usage is: monoburg -d file.h -s file.c [inputfile] \n");
952 warning_handler (const gchar *log_domain,
953 GLogLevelFlags log_level,
954 const gchar *message,
957 (void) fprintf ((FILE *) user_data, "** WARNING **: %s\n", message);
961 main (int argc, char *argv [])
964 char *deffile = NULL;
965 GList *infiles = NULL;
968 g_log_set_handler (NULL, G_LOG_LEVEL_WARNING, warning_handler, stderr);
970 for (i = 1; i < argc; i++){
971 if (argv [i][0] == '-'){
972 if (argv [i][1] == 'h') {
974 } else if (argv [i][1] == 'e') {
976 } else if (argv [i][1] == 'p') {
977 predefined_terms = 1;
978 } else if (argv [i][1] == 'd') {
979 deffile = argv [++i];
980 } else if (argv [i][1] == 's') {
982 } else if (argv [i][1] == 'c') {
983 default_cost = atoi (argv [++i]);
988 infiles = g_list_append (infiles, argv [i]);
993 if (!(deffd = fopen (deffile, "w"))) {
994 perror ("cant open header output file");
998 output ("#ifndef _MONO_BURG_DEFS_\n");
999 output ("#define _MONO_BURG_DEFS_\n\n");
1007 char *infile = (char *)l->data;
1008 if (!(inputfd = fopen (infile, "r"))) {
1009 perror ("cant open input file");
1028 g_error ("no start symbol found");
1036 output ("#endif /* _MONO_BURG_DEFS_ */\n");
1042 if (!(cfd = fopen (cfile, "w"))) {
1043 perror ("cant open c output file");
1044 (void) remove (deffile);
1051 output ("#include \"%s\"\n\n", deffile);
1056 emit_emitter_func ();