2 * monoburg.c: an iburg like code generator generator
5 * Dietmar Maurer (dietmar@ximian.com)
7 * (C) 2001 Ximian, Inc.
15 extern void yyparse (void);
17 static GHashTable *term_hash;
18 static GList *term_list;
19 static GHashTable *nonterm_hash;
20 static GList *nonterm_list;
21 static GList *rule_list;
22 static GList *prefix_list;
29 static int dag_mode = 0;
30 static int predefined_terms = 0;
31 static int default_cost = 0;
33 static void output (char *fmt, ...)
38 vfprintf (outputfd, fmt, ap);
43 create_rule (char *id, Tree *tree, char *code, char *cost, char *cfunc)
45 Rule *rule = g_new0 (Rule, 1);
48 cost = g_strdup_printf ("%d", default_cost);
50 rule->lhs = nonterm (id);
52 rule_list = g_list_append (rule_list, rule);
53 rule->cost = g_strdup (cost);
54 rule->cfunc = g_strdup (cfunc);
55 rule->code = g_strdup (code);
59 yyerror ("duplicated costs (constant costs and cost function)");
62 rule->cost = g_strdup_printf ("mono_burg_cost_%d (p, data)",
63 g_list_length (rule_list));
65 rule->cost = g_strdup_printf ("mono_burg_cost_%d (tree, data)",
66 g_list_length (rule_list));
70 rule->lhs->rules = g_list_append (rule->lhs->rules, rule);
73 tree->op->rules = g_list_append (tree->op->rules, rule);
75 tree->nonterm->chain = g_list_append (tree->nonterm->chain, rule);
79 create_tree (char *id, Tree *left, Tree *right)
81 int arity = (left != NULL) + (right != NULL);
83 Tree *tree = g_new0 (Tree, 1);
86 term = g_hash_table_lookup (term_hash, id);
88 // try if id has termprefix
91 for (pl = prefix_list; pl; pl = pl->next) {
92 char *pfx = (char *)pl->data;
93 if (!strncmp (pfx, id, strlen (pfx))) {
94 term = create_term (id, -1);
102 if (term->arity == -1)
105 if (term->arity != arity)
106 yyerror ("changed arity of terminal %s from %d to %d",
107 id, term->arity, arity);
113 tree->nonterm = nonterm (id);
120 check_term_num (char *key, Term *value, int num)
122 if (num != -1 && value->number == num)
123 yyerror ("duplicate terminal id \"%s\"", key);
127 create_term_prefix (char *id)
129 if (!predefined_terms)
130 yyerror ("%termprefix is only available with -p option");
132 prefix_list = g_list_prepend (prefix_list, g_strdup (id));
136 create_term (char *id, int num)
140 if (!predefined_terms && nonterm_list)
141 yyerror ("terminal definition after nonterminal definition");
144 yyerror ("invalid terminal number %d", num);
147 term_hash = g_hash_table_new (g_str_hash , g_str_equal);
149 g_hash_table_foreach (term_hash, (GHFunc) check_term_num, (gpointer) num);
151 term = g_new0 (Term, 1);
153 term->name = g_strdup (id);
157 term_list = g_list_append (term_list, term);
159 g_hash_table_insert (term_hash, term->name, term);
170 nonterm_hash = g_hash_table_new (g_str_hash , g_str_equal);
172 if ((nterm = g_hash_table_lookup (nonterm_hash, id)))
175 nterm = g_new0 (NonTerm, 1);
177 nterm->name = g_strdup (id);
178 nonterm_list = g_list_append (nonterm_list, nterm);
179 nterm->number = g_list_length (nonterm_list);
181 g_hash_table_insert (nonterm_hash, nterm->name, nterm);
187 start_nonterm (char *id)
189 static gboolean start_def;
192 yyerror ("start symbol redeclared");
199 emit_tree_string (Tree *tree)
202 output ("%s", tree->op->name);
203 if (tree->op->arity) {
205 emit_tree_string (tree->left);
208 emit_tree_string (tree->right);
213 output ("%s", tree->nonterm->name);
217 emit_rule_string (Rule *rule, char *fill)
219 output ("%s/* ", fill);
221 output ("%s: ", rule->lhs->name);
223 emit_tree_string (rule->tree);
231 GList *l = term_list;
235 Term *t = (Term *)l->data;
236 if (t->number == i) {
246 term_compare_func (Term *t1, Term *t2)
248 return t1->number - t2->number;
256 output ("#include <glib.h>\n");
259 output ("#ifndef MBTREE_TYPE\n#error MBTREE_TYPE undefined\n#endif\n");
260 output ("#ifndef MBTREE_OP\n#define MBTREE_OP(t) ((t)->op)\n#endif\n");
261 output ("#ifndef MBTREE_LEFT\n#define MBTREE_LEFT(t) ((t)->left)\n#endif\n");
262 output ("#ifndef MBTREE_RIGHT\n#define MBTREE_RIGHT(t) ((t)->right)\n#endif\n");
263 output ("#ifndef MBTREE_STATE\n#define MBTREE_STATE(t) ((t)->state)\n#endif\n");
264 output ("#ifndef MBCGEN_TYPE\n#define MBCGEN_TYPE int\n#endif\n");
265 output ("#ifndef MBALLOC_STATE\n#define MBALLOC_STATE g_new (MBState, 1)\n#endif\n");
266 output ("#ifndef MBCOST_DATA\n#define MBCOST_DATA gpointer\n#endif\n");
268 output ("#define MBMAXCOST 32768\n");
271 output ("#define MBCOND(x) if (!(x)) return MBMAXCOST;\n");
275 for (l = term_list; l; l = l->next) {
276 Term *t = (Term *)l->data;
278 t->number = next_term_num ();
280 term_list = g_list_sort (term_list, (GCompareFunc)term_compare_func);
282 for (l = term_list; l; l = l->next) {
283 Term *t = (Term *)l->data;
285 t->number = next_term_num ();
287 if (predefined_terms)
288 output ("#define MB_TERM_%s\t %s\n", t->name, t->name);
290 output ("#define MB_TERM_%s\t %d\n", t->name, t->number);
302 for (l = nonterm_list; l; l = l->next) {
303 NonTerm *n = (NonTerm *)l->data;
304 output ("#define MB_NTERM_%s\t%d\n", n->name, n->number);
306 output ("#define MB_MAX_NTERMS\t%d\n", g_list_length (nonterm_list));
316 output ("typedef struct _MBState MBState;\n");
317 output ("struct _MBState {\n");
318 output ("\tint\t\t op;\n");
321 output ("\tMBTREE_TYPE\t *tree;\n");
322 output ("\tgint32 reg1, reg2;\n");
325 output ("\tMBState\t\t*left, *right;\n");
326 output ("\tguint16\t\tcost[%d];\n", g_list_length (nonterm_list) + 1);
328 for (l = nonterm_list; l; l = l->next) {
329 NonTerm *n = (NonTerm *)l->data;
330 g_assert (g_list_length (n->rules) < 256);
331 i = g_list_length (n->rules);
335 output ("\tunsigned int\t rule_%s:%d;\n", n->name, j);
346 for (l = nonterm_list; l; l = l->next) {
347 NonTerm *n = (NonTerm *)l->data;
348 output ("const int mono_burg_decode_%s[] = {\n", n->name);
350 for (rl = n->rules; rl; rl = rl->next) {
351 Rule *rule = (Rule *)rl->data;
352 output ("\t%d,\n", g_list_index (rule_list, rule) + 1);
360 emit_tree_match (char *st, Tree *t)
363 int not_first = strcmp (st, "p->");
365 /* we can omit this check at the top level */
367 if (predefined_terms)
368 output ("\t\t\t%sop == %s /* %s */", st, t->op->name, t->op->name);
370 output ("\t\t\t%sop == %d /* %s */", st, t->op->number, t->op->name);
373 if (t->left && t->left->op) {
374 tn = g_strconcat (st, "left->", NULL);
378 emit_tree_match (tn, t->left);
382 if (t->right && t->right->op) {
383 tn = g_strconcat (st, "right->", NULL);
386 emit_tree_match (tn, t->right);
392 emit_rule_match (Rule *rule)
394 Tree *t = rule->tree;
396 if ((t->left && t->left->op) ||
397 (t->right && t->right->op)) {
398 output ("\t\tif (\n");
399 emit_tree_match ("p->", t);
400 output ("\n\t\t)\n");
405 emit_costs (char *st, Tree *t)
412 tn = g_strconcat (st, "left->", NULL);
413 emit_costs (tn, t->left);
418 tn = g_strconcat (st, "right->", NULL);
419 emit_costs (tn, t->right);
422 output ("%scost[MB_NTERM_%s] + ", st, t->nonterm->name);
426 emit_cond_assign (Rule *rule, char *cost, char *fill)
431 rc = g_strconcat ("c + ", cost, NULL);
436 output ("%sif (%s < p->cost[MB_NTERM_%s]) {\n", fill, rc, rule->lhs->name);
438 output ("%s\tp->cost[MB_NTERM_%s] = %s;\n", fill, rule->lhs->name, rc);
440 output ("%s\tp->rule_%s = %d;\n", fill, rule->lhs->name,
441 g_list_index (rule->lhs->rules, rule) + 1);
443 if (rule->lhs->chain)
444 output ("%s\tclosure_%s (p, %s);\n", fill, rule->lhs->name, rc);
446 output ("%s}\n", fill);
459 output ("static void\n");
460 output ("mono_burg_label_priv (MBTREE_TYPE *tree, MBCOST_DATA *data, MBState *p) {\n");
462 output ("static MBState *\n");
463 output ("mono_burg_label_priv (MBTREE_TYPE *tree, MBCOST_DATA *data) {\n");
466 output ("\tint arity;\n");
467 output ("\tint c;\n");
469 output ("\tMBState *p;\n");
470 output ("\tMBState *left = NULL, *right = NULL;\n\n");
472 output ("\tswitch (mono_burg_arity [MBTREE_OP(tree)]) {\n");
473 output ("\tcase 0:\n");
474 output ("\t\tbreak;\n");
475 output ("\tcase 1:\n");
477 output ("\t\tleft = MBALLOC_STATE;\n");
478 output ("\t\tmono_burg_label_priv (MBTREE_LEFT(tree), data, left);\n");
480 output ("\t\tleft = mono_burg_label_priv (MBTREE_LEFT(tree), data);\n");
481 output ("\t\tright = NULL;\n");
483 output ("\t\tbreak;\n");
484 output ("\tcase 2:\n");
486 output ("\t\tleft = MBALLOC_STATE;\n");
487 output ("\t\tmono_burg_label_priv (MBTREE_LEFT(tree), data, left);\n");
488 output ("\t\tright = MBALLOC_STATE;\n");
489 output ("\t\tmono_burg_label_priv (MBTREE_RIGHT(tree), data, right);\n");
491 output ("\t\tleft = mono_burg_label_priv (MBTREE_LEFT(tree), data);\n");
492 output ("\t\tright = mono_burg_label_priv (MBTREE_RIGHT(tree), data);\n");
496 output ("\tarity = (left != NULL) + (right != NULL);\n");
497 output ("\tg_assert (arity == mono_burg_arity [MBTREE_OP(tree)]);\n\n");
500 output ("\tp = MBALLOC_STATE;\n");
502 output ("\tmemset (p, 0, sizeof (MBState));\n");
503 output ("\tp->op = MBTREE_OP(tree);\n");
504 output ("\tp->left = left;\n");
505 output ("\tp->right = right;\n");
508 output ("\tp->tree = tree;\n");
510 for (l = nonterm_list, i = 0; l; l = l->next) {
511 output ("\tp->cost [%d] = 32767;\n", ++i);
515 output ("\tswitch (MBTREE_OP(tree)) {\n");
516 for (l = term_list; l; l = l->next) {
517 Term *t = (Term *)l->data;
520 if (predefined_terms)
521 output ("\tcase %s: /* %s */\n", t->name, t->name);
523 output ("\tcase %d: /* %s */\n", t->number, t->name);
525 for (rl = t->rules; rl; rl = rl->next) {
526 Rule *rule = (Rule *)rl->data;
527 Tree *t = rule->tree;
529 emit_rule_string (rule, "\t\t");
531 emit_rule_match (rule);
535 output ("\t\t\tc = ");
539 output ("%s;\n", rule->cost);
541 emit_cond_assign (rule, NULL, "\t\t\t");
546 output ("\t\tbreak;\n");
549 output ("\tdefault:\n");
550 output ("#ifdef MBGET_OP_NAME\n");
551 output ("\t\tg_error (\"unknown operator: %%s\", MBGET_OP_NAME(MBTREE_OP(tree)));\n");
553 output ("\t\tg_error (\"unknown operator: 0x%%04x\", MBTREE_OP(tree));\n");
558 output ("\tMBTREE_STATE(tree) = p;\n");
559 output ("\treturn p;\n");
564 output ("MBState *\n");
565 output ("mono_burg_label (MBTREE_TYPE *tree, MBCOST_DATA *data)\n{\n");
567 output ("\tMBState *p = MBALLOC_STATE;\n");
568 output ("\tmono_burg_label_priv (tree, data, p);\n");
570 output ("\tMBState *p = mono_burg_label_priv (tree, data);\n");
572 output ("\treturn p->rule_%s ? p : NULL;\n", ((NonTerm *)nonterm_list->data)->name);
577 compute_kids (char *ts, Tree *tree, int *n)
582 return g_strdup_printf ("\t\tkids[%d] = %s;\n", (*n)++, ts);
583 } else if (tree->op && tree->op->arity) {
587 res = compute_kids (g_strdup_printf ("%s->left", ts),
589 if (tree->op->arity == 2)
590 res2 = compute_kids (g_strdup_printf ("%s->right", ts),
593 res = compute_kids (g_strdup_printf ("MBTREE_LEFT(%s)", ts),
595 if (tree->op->arity == 2)
596 res2 = compute_kids (g_strdup_printf ("MBTREE_RIGHT(%s)", ts),
600 return g_strconcat (res, res2, NULL);
613 output ("mono_burg_rule (MBState *state, int goal)\n{\n");
615 output ("\tg_return_val_if_fail (state != NULL, 0);\n");
616 output ("\tg_return_val_if_fail (goal > 0, 0);\n\n");
618 output ("\tswitch (goal) {\n");
620 for (nl = nonterm_list; nl; nl = nl->next) {
621 NonTerm *n = (NonTerm *)nl->data;
622 output ("\tcase MB_NTERM_%s:\n", n->name);
623 output ("\t\treturn mono_burg_decode_%s [state->rule_%s];\n",
627 output ("\tdefault: g_assert_not_reached ();\n");
629 output ("\treturn 0;\n");
634 output ("MBState **\n");
635 output ("mono_burg_kids (MBState *state, int rulenr, MBState *kids [])\n{\n");
636 output ("\tg_return_val_if_fail (state != NULL, NULL);\n");
637 output ("\tg_return_val_if_fail (kids != NULL, NULL);\n\n");
640 output ("MBTREE_TYPE **\n");
641 output ("mono_burg_kids (MBTREE_TYPE *tree, int rulenr, MBTREE_TYPE *kids [])\n{\n");
642 output ("\tg_return_val_if_fail (tree != NULL, NULL);\n");
643 output ("\tg_return_val_if_fail (kids != NULL, NULL);\n\n");
646 output ("\tswitch (rulenr) {\n");
648 n = g_list_length (rule_list);
649 sa = g_new0 (char *, n);
650 si = g_new0 (int, n);
652 /* compress the case statement */
653 for (l = rule_list, i = 0, c = 0; l; l = l->next) {
654 Rule *rule = (Rule *)l->data;
659 k = compute_kids ("state", rule->tree, &kn);
661 k = compute_kids ("tree", rule->tree, &kn);
663 for (j = 0; j < c; j++)
664 if (!strcmp (sa [j], k))
672 for (i = 0; i < c; i++) {
673 for (l = rule_list, j = 0; l; l = l->next, j++)
675 output ("\tcase %d:\n", j + 1);
676 output ("%s", sa [i]);
677 output ("\t\tbreak;\n");
680 output ("\tdefault:\n\t\tg_assert_not_reached ();\n");
682 output ("\treturn kids;\n");
693 for (l = rule_list, i = 0; l; l = l->next) {
694 Rule *rule = (Rule *)l->data;
697 output ("static void ");
699 emit_rule_string (rule, "");
702 output ("mono_burg_emit_%d (MBState *state, MBTREE_TYPE *tree, MBCGEN_TYPE *s)\n", i);
704 output ("mono_burg_emit_%d (MBTREE_TYPE *tree, MBCGEN_TYPE *s)\n", i);
706 output ("%s\n", rule->code);
712 output ("MBEmitFunc const mono_burg_func [] = {\n");
713 output ("\tNULL,\n");
714 for (l = rule_list, i = 0; l; l = l->next) {
715 Rule *rule = (Rule *)l->data;
717 output ("\tmono_burg_emit_%d,\n", i);
719 output ("\tNULL,\n");
731 for (l = rule_list, i = 0; l; l = l->next) {
732 Rule *rule = (Rule *)l->data;
735 output ("inline static guint16\n");
737 emit_rule_string (rule, "");
740 output ("mono_burg_cost_%d (MBState *state, MBCOST_DATA *data)\n", i + 1);
742 output ("mono_burg_cost_%d (MBTREE_TYPE *tree, MBCOST_DATA *data)\n", i + 1);
744 output ("%s\n", rule->cfunc);
756 for (l = nonterm_list; l; l = l->next) {
757 NonTerm *n = (NonTerm *)l->data;
760 output ("static void closure_%s (MBState *p, int c);\n", n->name);
765 for (l = nonterm_list; l; l = l->next) {
766 NonTerm *n = (NonTerm *)l->data;
769 output ("static void\n");
770 output ("closure_%s (MBState *p, int c)\n{\n", n->name);
771 for (rl = n->chain; rl; rl = rl->next) {
772 Rule *rule = (Rule *)rl->data;
774 emit_rule_string (rule, "\t");
775 emit_cond_assign (rule, rule->cost, "\t");
783 compute_nonterms (Tree *tree)
789 return g_strdup_printf ("MB_NTERM_%s, ", tree->nonterm->name);
791 return g_strconcat (compute_nonterms (tree->left),
792 compute_nonterms (tree->right), NULL);
803 if (predefined_terms) {
804 output ("guint8 mono_burg_arity [MBMAX_OPCODES];\n");
806 output ("void\nmono_burg_init (void)\n{\n");
808 for (l = term_list, i = 0; l; l = l->next) {
809 Term *t = (Term *)l->data;
811 output ("\tmono_burg_arity [%s] = %d; /* %s */\n", t->name, t->arity, t->name);
818 output ("const guint8 mono_burg_arity [] = {\n");
819 for (l = term_list, i = 0; l; l = l->next) {
820 Term *t = (Term *)l->data;
822 while (i < t->number) {
827 output ("\t%d, /* %s */\n", t->arity, t->name);
833 output ("const char *const mono_burg_term_string [] = {\n");
834 output ("\tNULL,\n");
835 for (l = term_list, i = 0; l; l = l->next) {
836 Term *t = (Term *)l->data;
837 output ("\t\"%s\",\n", t->name);
842 output ("const char * const mono_burg_rule_string [] = {\n");
843 output ("\tNULL,\n");
844 for (l = rule_list, i = 0; l; l = l->next) {
845 Rule *rule = (Rule *)l->data;
846 output ("\t\"%s: ", rule->lhs->name);
847 emit_tree_string (rule->tree);
852 n = g_list_length (rule_list);
853 sa = g_new0 (char *, n);
854 si = g_new0 (int, n);
856 /* compress the _nts array */
857 for (l = rule_list, i = 0, c = 0; l; l = l->next) {
858 Rule *rule = (Rule *)l->data;
859 char *s = compute_nonterms (rule->tree);
861 for (j = 0; j < c; j++)
862 if (!strcmp (sa [j], s))
867 output ("static const guint16 mono_burg_nts_%d [] = { %s0 };\n", c, s);
873 output ("const guint16 *const mono_burg_nts [] = {\n");
875 for (l = rule_list, i = 0; l; l = l->next) {
876 Rule *rule = (Rule *)l->data;
877 output ("\tmono_burg_nts_%d, ", si [i++]);
878 emit_rule_string (rule, "");
887 output ("typedef void (*MBEmitFunc) (MBState *state, MBTREE_TYPE *tree, MBCGEN_TYPE *s);\n\n");
889 output ("typedef void (*MBEmitFunc) (MBTREE_TYPE *tree, MBCGEN_TYPE *s);\n\n");
891 output ("extern const char * const mono_burg_term_string [];\n");
892 output ("extern const char * const mono_burg_rule_string [];\n");
893 output ("extern const guint16 *const mono_burg_nts [];\n");
894 output ("extern MBEmitFunc const mono_burg_func [];\n");
896 output ("MBState *mono_burg_label (MBTREE_TYPE *tree, MBCOST_DATA *data);\n");
897 output ("int mono_burg_rule (MBState *state, int goal);\n");
900 output ("MBState **mono_burg_kids (MBState *state, int rulenr, MBState *kids []);\n");
902 output ("MBTREE_TYPE **mono_burg_kids (MBTREE_TYPE *tree, int rulenr, MBTREE_TYPE *kids []);\n");
904 output ("extern void mono_burg_init (void);\n");
908 static void check_reach (NonTerm *n);
911 mark_reached (Tree *tree)
913 if (tree->nonterm && !tree->nonterm->reached)
914 check_reach (tree->nonterm);
916 mark_reached (tree->left);
918 mark_reached (tree->right);
922 check_reach (NonTerm *n)
927 for (l = n->rules; l; l = l->next) {
928 Rule *rule = (Rule *)l->data;
929 mark_reached (rule->tree);
938 for (l = term_list; l; l = l->next) {
939 Term *term = (Term *)l->data;
940 if (term->arity == -1)
941 g_warning ("unused terminal \"%s\"",term->name);
944 check_reach (((NonTerm *)nonterm_list->data));
946 for (l = nonterm_list; l; l = l->next) {
947 NonTerm *n = (NonTerm *)l->data;
949 g_warning ("unreachable nonterm \"%s\"", n->name);
957 "Usage is: monoburg -d file.h -s file.c [inputfile] \n");
962 warning_handler (const gchar *log_domain,
963 GLogLevelFlags log_level,
964 const gchar *message,
967 (void) fprintf ((FILE *) user_data, "** WARNING **: %s\n", message);
971 main (int argc, char *argv [])
974 char *deffile = NULL;
975 GList *infiles = NULL;
978 g_log_set_handler (NULL, G_LOG_LEVEL_WARNING, warning_handler, stderr);
980 for (i = 1; i < argc; i++){
981 if (argv [i][0] == '-'){
982 if (argv [i][1] == 'h') {
984 } else if (argv [i][1] == 'e') {
986 } else if (argv [i][1] == 'p') {
987 predefined_terms = 1;
988 } else if (argv [i][1] == 'd') {
989 deffile = argv [++i];
990 } else if (argv [i][1] == 's') {
992 } else if (argv [i][1] == 'c') {
993 default_cost = atoi (argv [++i]);
998 infiles = g_list_append (infiles, argv [i]);
1003 if (!(deffd = fopen (deffile, "w"))) {
1004 perror ("cant open header output file");
1008 output ("#ifndef _MONO_BURG_DEFS_\n");
1009 output ("#define _MONO_BURG_DEFS_\n\n");
1017 char *infile = (char *)l->data;
1018 if (!(inputfd = fopen (infile, "r"))) {
1019 perror ("cant open input file");
1038 g_error ("no start symbol found");
1046 output ("#endif /* _MONO_BURG_DEFS_ */\n");
1052 if (!(cfd = fopen (cfile, "w"))) {
1053 perror ("cant open c output file");
1054 (void) remove (deffile);
1061 output ("#include \"%s\"\n\n", deffile);
1066 emit_emitter_func ();