7 #include <mono/metadata/image.h>
8 #include <mono/metadata/metadata.h>
9 #include <mono/metadata/assembly.h>
10 #include <mono/metadata/marshal.h>
11 #include <mono/metadata/class-internals.h>
12 #include <mono/metadata/metadata-internals.h>
15 #define DEBUG_PARSER(stmt) do { stmt; } while (0)
17 #define DEBUG_PARSER(stmt)
21 #define DEBUG_SCANNER(stmt) do { stmt; } while (0)
22 #define SCANNER_DEBUG 1
24 #define DEBUG_SCANNER(stmt)
32 identifier ::= ([a-z] | [A-Z]) ([a-z] | [A-Z] | [0-9] | [_-.])*
33 hexa_digit = [0-9] | [a-f] | [A-F]
34 number ::= hexadecimal | decimal
35 hexadecimal ::= (+-)?('0' [xX])? hexa_digit+
44 identifier '{' assembly_directive test_entry* '}'
50 validity patch (',' patch)*
62 ('set-byte' | 'set-ushort' | 'set-uint' | 'set-bit' | 'or-byte' | 'or-ushort' | 'or-uint' | 'truncate' ) expression
68 number | variable | function_call
71 fun_name '(' arg_list ')'
83 expression ',' arg_list
95 TODO For the sake of a simple implementation, tokens are space delimited.
111 INVALID_VALIDITY_TEST,
117 INVALID_VARIABLE_NAME,
118 INVALID_FUNCTION_NAME,
152 typedef struct _expression expression_t;
156 int start, end; /*stream range text is in [start, end[*/
186 expression_t *expression;
192 expression_t *expression;
196 patch_selector_t *selector;
197 patch_effect_t *effect;
213 GSList *patches; /*of test_patch_t*/
216 test_set_t *test_set;
221 /*******************************************************************************************************/
222 static guint32 expression_eval (expression_t *exp, test_entry_t *entry);
223 static expression_t* parse_expression (scanner_t *scanner);
226 test_validity_name (int validity)
229 case TEST_TYPE_VALID:
231 case TEST_TYPE_INVALID:
234 printf ("Invalid test type %d\n", validity);
235 exit (INVALID_VALIDITY_TEST);
240 read_whole_file_and_close (const char *name, int *file_size)
242 FILE *file = fopen (name, "ro");
247 printf ("Could not open file %s\n", name);
248 exit (INVALID_FILE_NAME);
251 fseek (file, 0, SEEK_END);
252 fsize = ftell (file);
253 fseek (file, 0, SEEK_SET);
255 res = g_malloc (fsize + 1);
257 fread (res, fsize, 1, file);
264 init_test_set (test_set_t *test_set)
266 MonoImageOpenStatus status;
269 test_set->assembly_data = read_whole_file_and_close (test_set->assembly, &test_set->assembly_size);
270 test_set->image = mono_image_open_from_data (test_set->assembly_data, test_set->assembly_size, FALSE, &status);
271 if (!test_set->image || status != MONO_IMAGE_OK) {
272 printf ("Could not parse image %s\n", test_set->assembly);
273 exit (INVALID_BAD_FILE);
280 make_test_name (test_entry_t *entry, test_set_t *test_set)
282 return g_strdup_printf ("%s-%s-%d.exe", test_validity_name (entry->validity), test_set->name, test_set->count++);
285 #define READ_VAR(KIND, PTR) GUINT32_FROM_LE((guint32)*((KIND*)(PTR)))
286 #define SET_VAR(KIND, PTR, VAL) do { *((KIND*)(PTR)) = GUINT32_TO_LE ((KIND)VAL); } while (0)
288 #define READ_BIT(PTR,OFF) ((((guint8*)(PTR))[(OFF / 8)] & (1 << ((OFF) % 8))) != 0)
289 #define SET_BIT(PTR,OFF) do { ((guint8*)(PTR))[(OFF / 8)] |= (1 << ((OFF) % 8)); } while (0)
292 get_pe_header (test_entry_t *entry)
294 return READ_VAR (guint32, entry->data + 0x3c) + 4;
298 translate_rva (test_entry_t *entry, guint32 rva)
300 guint32 pe_header = get_pe_header (entry);
301 guint32 sectionCount = READ_VAR (guint16, entry->data + pe_header + 2);
302 guint32 idx = pe_header + 244;
304 while (sectionCount-- > 0) {
305 guint32 size = READ_VAR (guint32, entry->data + idx + 8);
306 guint32 base = READ_VAR (guint32, entry->data + idx + 12);
307 guint32 offset = READ_VAR (guint32, entry->data + idx + 20);
309 if (rva >= base && rva <= base + size)
310 return (rva - base) + offset;
313 printf ("Could not translate RVA %x\n", rva);
318 get_cli_header (test_entry_t *entry)
320 guint32 offset = get_pe_header (entry) + 20; /*pe-optional-header*/
321 offset += 208; /*cli header entry offset in the pe-optional-header*/
322 return translate_rva (entry, READ_VAR (guint32, entry->data + offset));
326 get_cli_metadata_root (test_entry_t *entry)
328 guint32 offset = get_cli_header (entry);
329 offset += 8; /*metadata rva offset*/
330 return translate_rva (entry, READ_VAR (guint32, entry->data + offset));
334 pad4 (guint32 offset)
337 offset += 4 - (offset % 4);
342 get_metadata_stream_header (test_entry_t *entry, guint32 idx)
346 offset = get_cli_metadata_root (entry);
347 offset = pad4 (offset + 16 + READ_VAR (guint32, entry->data + offset + 12));
355 for (i = 0; i < 32; ++i) {
356 if (!READ_VAR (guint8, entry->data + offset++))
359 offset = pad4 (offset);
365 lookup_var (test_entry_t *entry, const char *name)
367 if (!strcmp ("file-size", name))
368 return entry->data_size;
369 if (!strcmp ("pe-signature", name))
370 return get_pe_header (entry) - 4;
371 if (!strcmp ("pe-header", name))
372 return get_pe_header (entry);
373 if (!strcmp ("pe-optional-header", name))
374 return get_pe_header (entry) + 20;
375 if (!strcmp ("section-table", name))
376 return get_pe_header (entry) + 244;
377 if (!strcmp ("cli-header", name))
378 return get_cli_header (entry);
379 if (!strcmp ("cli-metadata", name))
380 return get_cli_metadata_root (entry);
381 if (!strcmp ("tables-header", name)) {
382 guint32 metadata_root = get_cli_metadata_root (entry);
383 guint32 tilde_stream = get_metadata_stream_header (entry, 0);
384 guint32 offset = READ_VAR (guint32, entry->data + tilde_stream);
385 return metadata_root + offset;
388 printf ("Unknown variable in expression %s\n", name);
389 exit (INVALID_VARIABLE_NAME);
393 call_func (test_entry_t *entry, const char *name, GSList *args)
395 if (!strcmp ("read.ushort", name)) {
397 if (g_slist_length (args) != 1) {
398 printf ("Invalid number of args to read.ushort %d\b", g_slist_length (args));
399 exit (INVALID_ARG_COUNT);
401 offset = expression_eval (args->data, entry);
402 return READ_VAR (guint16, entry->data + offset);
404 if (!strcmp ("read.uint", name)) {
406 if (g_slist_length (args) != 1) {
407 printf ("Invalid number of args to read.uint %d\b", g_slist_length (args));
408 exit (INVALID_ARG_COUNT);
410 offset = expression_eval (args->data, entry);
411 return READ_VAR (guint32, entry->data + offset);
413 if (!strcmp ("translate.rva", name)) {
415 if (g_slist_length (args) != 1) {
416 printf ("Invalid number of args to translate.rva %d\b", g_slist_length (args));
417 exit (INVALID_ARG_COUNT);
419 rva = expression_eval (args->data, entry);
420 return translate_rva (entry, rva);
422 if (!strcmp ("translate.rva.ind", name)) {
424 if (g_slist_length (args) != 1) {
425 printf ("Invalid number of args to translate.rva.ind %d\b", g_slist_length (args));
426 exit (INVALID_ARG_COUNT);
428 rva = expression_eval (args->data, entry);
429 rva = READ_VAR (guint32, entry->data + rva);
430 return translate_rva (entry, rva);
432 if (!strcmp ("stream-header", name)) {
434 if (g_slist_length (args) != 1) {
435 printf ("Invalid number of args to stream-header %d\b", g_slist_length (args));
436 exit (INVALID_ARG_COUNT);
438 idx = expression_eval (args->data, entry);
439 return get_metadata_stream_header (entry, idx);
441 if (!strcmp ("table-row", name)) {
444 const MonoTableInfo *info;
445 if (g_slist_length (args) != 2) {
446 printf ("Invalid number of args to table-row %d\b", g_slist_length (args));
447 exit (INVALID_ARG_COUNT);
449 table = expression_eval (args->data, entry);
450 row = expression_eval (args->next->data, entry);
451 info = mono_image_get_table_info (entry->test_set->image, table);
452 data = info->base + row * info->row_size;
453 return data - entry->test_set->assembly_data;
456 printf ("Unknown function %s\n", name);
457 exit (INVALID_FUNCTION_NAME);
462 expression_eval (expression_t *exp, test_entry_t *entry)
465 case EXPRESSION_CONSTANT:
466 return exp->data.constant;
467 case EXPRESSION_VARIABLE:
468 return lookup_var (entry, exp->data.name);
470 return expression_eval (exp->data.bin.left, entry) + expression_eval (exp->data.bin.right, entry);
472 return expression_eval (exp->data.bin.left, entry) - expression_eval (exp->data.bin.right, entry);
473 case EXPRESSION_FUNC:
474 return call_func (entry, exp->data.func.name, exp->data.func.args);
476 printf ("Invalid expression type %d\n", exp->type);
477 exit (INVALID_EXPRESSION);
482 apply_selector (patch_selector_t *selector, test_entry_t *entry)
485 if (selector->expression)
486 value = expression_eval (selector->expression, entry);
487 switch (selector->type) {
488 case SELECTOR_ABS_OFFSET:
489 DEBUG_PARSER (printf("\tabsolute offset selector [%04x]\n", value));
492 printf ("Invalid selector type %d\n", selector->type);
493 exit (INVALID_SELECTOR);
498 apply_effect (patch_effect_t *effect, test_entry_t *entry, guint32 offset)
501 char *ptr = entry->data + offset;
502 if (effect->expression)
503 value = expression_eval (effect->expression, entry);
505 switch (effect->type) {
506 case EFFECT_SET_BYTE:
507 DEBUG_PARSER (printf("\tset-byte effect old value [%x] new value [%x]\n", READ_VAR (guint8, ptr), value));
508 SET_VAR (guint8, ptr, value);
510 case EFFECT_SET_USHORT:
511 DEBUG_PARSER (printf("\tset-ushort effect old value [%x] new value [%x]\n", READ_VAR (guint16, ptr), value));
512 SET_VAR (guint16, ptr, value);
514 case EFFECT_SET_UINT:
515 DEBUG_PARSER (printf("\tset-uint effect old value [%x] new value [%x]\n", READ_VAR (guint32, ptr), value));
516 SET_VAR (guint32, ptr, value);
518 case EFFECT_SET_TRUNC:
519 DEBUG_PARSER (printf("\ttrunc effect [%d]\n", offset));
520 entry->data_size = offset;
523 DEBUG_PARSER (printf("\tset-bit effect bit %d old value [%x]\n", value, READ_BIT (ptr, value)));
524 SET_BIT (ptr, value);
527 DEBUG_PARSER (printf("\tor-byte effect old value [%x] new value [%x]\n", READ_VAR (guint8, ptr), value));
528 SET_VAR (guint8, ptr, READ_VAR (guint8, ptr) | value);
530 case EFFECT_OR_USHORT:
531 DEBUG_PARSER (printf("\tor-ushort effect old value [%x] new value [%x]\n", READ_VAR (guint16, ptr), value));
532 SET_VAR (guint16, ptr, READ_VAR (guint16, ptr) | value);
535 DEBUG_PARSER (printf("\tor-uint effect old value [%x] new value [%x]\n", READ_VAR (guint32, ptr), value));
536 SET_VAR (guint32, ptr, READ_VAR (guint32, ptr) | value);
539 printf ("Invalid effect type %d\n", effect->type);
540 exit (INVALID_EFFECT);
545 apply_patch (test_entry_t *entry, test_patch_t *patch)
547 guint32 offset = apply_selector (patch->selector, entry);
548 apply_effect (patch->effect, entry, offset);
552 process_test_entry (test_set_t *test_set, test_entry_t *entry)
558 init_test_set (test_set);
559 entry->data = g_memdup (test_set->assembly_data, test_set->assembly_size);
560 entry->data_size = test_set->assembly_size;
561 entry->test_set = test_set;
563 DEBUG_PARSER (printf("(%d)%s\n", test_set->count, entry->validity == TEST_TYPE_VALID? "valid" : "invalid"));
564 for (tmp = entry->patches; tmp; tmp = tmp->next)
565 apply_patch (entry, tmp->data);
567 file_name = make_test_name (entry, test_set);
569 f = fopen (file_name, "wo");
570 fwrite (entry->data, entry->data_size, 1, f);
576 /*******************************************************************************************************/
579 patch_free (test_patch_t *patch)
581 free (patch->selector);
582 free (patch->effect);
587 test_set_free (test_set_t *set)
590 free (set->assembly);
591 free (set->assembly_data);
593 mono_image_close (set->image);
597 test_entry_free (test_entry_t *entry)
602 for (tmp = entry->patches; tmp; tmp = tmp->next)
603 patch_free (tmp->data);
604 g_slist_free (entry->patches);
608 /*******************************************************************************************************/
610 token_type_name (int type)
620 return "punctuation";
622 return "end of file";
624 return "unknown token type";
627 #define CUR_CHAR (scanner->input [scanner->idx])
630 is_eof (scanner_t *scanner)
632 return scanner->idx >= scanner->size;
638 return c == '{' || c == '}' || c == ',';
642 skip_spaces (scanner_t *scanner)
645 while (!is_eof (scanner) && isspace (CUR_CHAR)) {
646 if (CUR_CHAR == '\n')
650 if (CUR_CHAR == '#') {
651 while (!is_eof (scanner) && CUR_CHAR != '\n') {
659 token_text_dup (scanner_t *scanner, token_t *token)
661 int len = token->end - token->start;
663 char *str = g_memdup (scanner->input + token->start, len + 1);
670 dump_token (scanner_t *scanner, token_t *token)
672 char *str = token_text_dup (scanner, token);
674 printf ("token '%s' of type '%s' at line %d\n", str, token_type_name (token->type), token->line);
681 is_special_char (char c)
696 next_token (scanner_t *scanner)
698 int start, end, type;
700 skip_spaces (scanner);
701 start = scanner->idx;
702 while (!is_eof (scanner) && !isspace (CUR_CHAR)) {
703 if (scanner->idx == start) {
704 if (is_special_char (CUR_CHAR)) {
708 } else if (is_special_char (CUR_CHAR))
714 c = scanner->input [start];
715 if (start >= scanner->size)
717 else if (isdigit (c) || c == '\'')
719 else if (ispunct_char (c))
723 scanner->current.start = start;
724 scanner->current.end = end;
725 scanner->current.type = type;
726 scanner->current.line = scanner->line;
728 DEBUG_SCANNER (dump_token (scanner, &scanner->current));
732 scanner_new (const char *file_name)
736 res = g_new0 (scanner_t, 1);
737 res->input = read_whole_file_and_close (file_name, &res->size);
746 scanner_free (scanner_t *scanner)
748 free (scanner->input);
753 scanner_get_current_token (scanner_t *scanner)
755 return &scanner->current;
759 scanner_get_type (scanner_t *scanner)
761 return scanner_get_current_token (scanner)->type;
765 scanner_get_line (scanner_t *scanner)
767 return scanner_get_current_token (scanner)->line;
771 scanner_text_dup (scanner_t *scanner)
773 return token_text_dup (scanner, scanner_get_current_token (scanner));
777 scanner_text_parse_number (scanner_t *scanner, long *res)
779 char *text = scanner_text_dup (scanner);
782 if (text [0] == '\'') {
783 ok = strlen (text) != 3 || text [2] != '\'';
787 *res = strtol (text, &end, 0);
796 match_current_type (scanner_t *scanner, int type)
798 return scanner_get_type (scanner) == type;
802 match_current_text (scanner_t *scanner, const char *text)
804 token_t *t = scanner_get_current_token (scanner);
805 return !strncmp (scanner->input + t->start, text, t->end - t->start);
809 match_current_type_and_text (scanner_t *scanner, int type, const char *text)
811 return match_current_type (scanner, type) && match_current_text (scanner, text);
814 /*******************************************************************************************************/
815 #define FAIL(MSG, REASON) do { \
816 printf ("%s at line %d for rule %s\n", MSG, scanner_get_line (scanner), __FUNCTION__); \
820 #define EXPECT_TOKEN(TYPE) do { \
821 if (scanner_get_type (scanner) != TYPE) { \
822 printf ("Expected %s but got %s '%s' at line %d for rule %s\n", token_type_name (TYPE), token_type_name (scanner_get_type (scanner)), scanner_text_dup (scanner), scanner_get_line (scanner), __FUNCTION__); \
823 exit (INVALID_TOKEN_TYPE); \
827 #define CONSUME_SPECIFIC_PUNCT(TEXT) do { \
828 EXPECT_TOKEN (TOKEN_PUNC); \
829 if (!match_current_text (scanner, TEXT)) { \
830 char *__tmp = scanner_text_dup (scanner); \
831 printf ("Expected '%s' but got '%s' at line %d for rule %s\n", TEXT, __tmp, scanner_get_line (scanner), __FUNCTION__); \
833 exit (INVALID_PUNC_TEXT); \
835 next_token (scanner); \
838 #define CONSUME_IDENTIFIER(DEST) do { \
839 EXPECT_TOKEN (TOKEN_ID); \
840 DEST = scanner_text_dup (scanner); \
841 next_token (scanner); \
844 #define CONSUME_SPECIFIC_IDENTIFIER(TEXT) do { \
845 EXPECT_TOKEN (TOKEN_ID); \
846 if (!match_current_text (scanner, TEXT)) { \
847 char *__tmp = scanner_text_dup (scanner); \
848 printf ("Expected '%s' but got '%s' at line %d for rule %s\n", TEXT, __tmp, scanner_get_line (scanner), __FUNCTION__); \
850 exit (INVALID_ID_TEXT); \
852 next_token (scanner); \
855 #define CONSUME_NUMBER(DEST) do { \
857 EXPECT_TOKEN (TOKEN_NUM); \
858 if (scanner_text_parse_number (scanner, &__tmp_num)) { \
859 char *__tmp = scanner_text_dup (scanner); \
860 printf ("Expected a number but got '%s' at line %d for rule %s\n", __tmp, scanner_get_line (scanner), __FUNCTION__); \
862 exit (INVALID_NUMBER); \
865 next_token (scanner); \
868 #define LA_ID(TEXT) (scanner_get_type (scanner) == TOKEN_ID && match_current_text (scanner, TEXT))
869 #define LA_PUNCT(TEXT) (scanner_get_type (scanner) == TOKEN_PUNC && match_current_text (scanner, TEXT))
871 /*******************************************************************************************************/
874 parse_atom (scanner_t *scanner)
876 expression_t *atom = g_new0 (expression_t, 1);
877 if (scanner_get_type (scanner) == TOKEN_NUM) {
878 atom->type = EXPRESSION_CONSTANT;
879 CONSUME_NUMBER (atom->data.constant);
882 CONSUME_IDENTIFIER (name);
884 atom->data.func.name = name;
885 atom->type = EXPRESSION_FUNC;
886 CONSUME_SPECIFIC_IDENTIFIER ("(");
888 while (!LA_ID (")") && !match_current_type (scanner, TOKEN_EOF))
889 atom->data.func.args = g_slist_append (atom->data.func.args, parse_expression (scanner));
891 CONSUME_SPECIFIC_IDENTIFIER (")");
893 atom->data.name = name;
894 atom->type = EXPRESSION_VARIABLE;
902 parse_expression (scanner_t *scanner)
904 expression_t *exp = parse_atom (scanner);
906 while (LA_ID ("-") || LA_ID ("+")) {
908 CONSUME_IDENTIFIER (text);
909 expression_t *left = exp;
910 exp = g_new0 (expression_t, 1);
911 exp->type = !strcmp ("+", text) ? EXPRESSION_ADD: EXPRESSION_SUB;
912 exp->data.bin.left = left;
913 exp->data.bin.right = parse_atom (scanner);
919 static patch_selector_t*
920 parse_selector (scanner_t *scanner)
922 patch_selector_t *selector;
924 CONSUME_SPECIFIC_IDENTIFIER ("offset");
926 selector = g_new0 (patch_selector_t, 1);
927 selector->type = SELECTOR_ABS_OFFSET;
928 selector->expression = parse_expression (scanner);
932 static patch_effect_t*
933 parse_effect (scanner_t *scanner)
935 patch_effect_t *effect;
939 CONSUME_IDENTIFIER(name);
941 if (!strcmp ("set-byte", name))
942 type = EFFECT_SET_BYTE;
943 else if (!strcmp ("set-ushort", name))
944 type = EFFECT_SET_USHORT;
945 else if (!strcmp ("set-uint", name))
946 type = EFFECT_SET_UINT;
947 else if (!strcmp ("set-bit", name))
948 type = EFFECT_SET_BIT;
949 else if (!strcmp ("truncate", name))
950 type = EFFECT_SET_TRUNC;
951 else if (!strcmp ("or-byte", name))
952 type = EFFECT_OR_BYTE;
953 else if (!strcmp ("or-ushort", name))
954 type = EFFECT_OR_USHORT;
955 else if (!strcmp ("or-uint", name))
956 type = EFFECT_OR_UINT;
958 FAIL(g_strdup_printf ("Invalid effect kind, expected one of: (set-byte set-ushort set-uint set-bit or-byte or-ushort or-uint truncate) but got %s",name), INVALID_ID_TEXT);
960 effect = g_new0 (patch_effect_t, 1);
962 if (type != EFFECT_SET_TRUNC)
963 effect->expression = parse_expression (scanner);
968 parse_patch (scanner_t *scanner)
972 patch = g_new0 (test_patch_t, 1);
973 patch->selector = parse_selector (scanner);
974 patch->effect = parse_effect (scanner);
979 parse_validity (scanner_t *scanner)
983 CONSUME_IDENTIFIER (name);
985 if (!strcmp (name, "valid"))
986 validity = TEST_TYPE_VALID;
987 else if (!strcmp (name, "invalid"))
988 validity = TEST_TYPE_INVALID;
990 printf ("Expected either 'valid' or 'invalid' but got '%s' at the begining of a test entry at line %d\n", name, scanner_get_line (scanner));
991 exit (INVALID_VALIDITY_TEST);
999 parse_test_entry (scanner_t *scanner, test_set_t *test_set)
1001 test_entry_t entry = { 0 };
1003 entry.validity = parse_validity (scanner);
1007 CONSUME_SPECIFIC_PUNCT (",");
1008 entry.patches = g_slist_append (entry.patches, parse_patch (scanner));
1009 } while (match_current_type_and_text (scanner, TOKEN_PUNC, ","));
1011 process_test_entry (test_set, &entry);
1013 test_entry_free (&entry);
1017 parse_test (scanner_t *scanner)
1019 test_set_t set = { 0 };
1021 CONSUME_IDENTIFIER (set.name);
1022 CONSUME_SPECIFIC_PUNCT ("{");
1023 CONSUME_SPECIFIC_IDENTIFIER ("assembly");
1024 CONSUME_IDENTIFIER (set.assembly);
1026 DEBUG_PARSER (printf ("RULE %s using assembly %s\n", set.name, set.assembly));
1028 while (!match_current_type (scanner, TOKEN_EOF) && !match_current_type_and_text (scanner, TOKEN_PUNC, "}"))
1029 parse_test_entry (scanner, &set);
1031 CONSUME_SPECIFIC_PUNCT ("}");
1033 test_set_free (&set);
1038 parse_program (scanner_t *scanner)
1040 while (!match_current_type (scanner, TOKEN_EOF))
1041 parse_test (scanner);
1046 digest_file (const char *file)
1048 scanner_t *scanner = scanner_new (file);
1049 parse_program (scanner);
1050 scanner_free (scanner);
1054 main (int argc, char **argv)
1057 printf ("usage: gen-md.test file_to_process\n");
1061 mono_init_version ("gen-md-test", "v2.0.50727");
1062 mono_marshal_init ();
1064 digest_file (argv [1]);