1 /* todo: copyright/gpl stuff. */
4 #error "use it on your own risk"
14 #include <sys/types.h>
21 typedef unsigned char u8;
22 typedef signed char s8;
31 #define MAX_STRING_LEN 100
32 #define MAX_INSTRUCTION_LEN 40
34 typedef struct _instruction {
38 u8 bytes[MAX_INSTRUCTION_LEN];
41 char string[MAX_STRING_LEN];
56 struct _instruction *next;
59 typedef struct _statistics {
60 int maxInstructionLen;
63 static instruction* instruction_new(int offsetInFile, int offsetInCode) {
64 instruction *inst = malloc(sizeof(instruction));
67 memset(inst, 0, sizeof(instruction));
69 inst->offsetInFile = offsetInFile;
70 inst->offsetInCode = offsetInCode;
75 static void instruction_print(instruction *inst, const char *c) {
77 assert((inst->stringPos+len) < MAX_STRING_LEN);
78 for (int i=0; i<len; i++)
79 inst->string[inst->stringPos++] = c[i];
82 static void instruction_push_byte(instruction *inst, u8 byte) {
83 assert((inst->bytePos+1) < MAX_INSTRUCTION_LEN);
84 inst->bytes[inst->bytePos++] = byte;
87 static instruction *first_instruction;
88 static instruction *current_instruction;
89 static int current_call_label=0;
90 static int current_jump_label=0;
91 static statistics stat;
92 static int entry_point;
96 * BEGIN - interface functions
98 static void i_begin(int offsetInFile, int offsetInCode) {
99 if (first_instruction == NULL) {
100 first_instruction = current_instruction = instruction_new(offsetInFile, offsetInCode);
102 assert(current_instruction->next == 0);
103 current_instruction->next = instruction_new(offsetInFile, offsetInCode);
104 current_instruction = current_instruction->next;
108 static void i_print(const char *fmt, ...) {
109 assert(current_instruction != NULL);
112 char str[MAX_STRING_LEN];
114 memset(str, 0, MAX_STRING_LEN);
117 vsprintf(str, fmt, args);
118 instruction_print(current_instruction, str);
122 static u8 i_push(u8 byte) {
123 assert(current_instruction != NULL);
124 instruction_push_byte(current_instruction, byte);
128 static void i_set_relative_call(int relAddr) {
129 current_instruction->isCall=1;
130 current_instruction->callAddress = current_instruction->offsetInCode + current_instruction->bytePos + relAddr;
131 /*i_print("[call to 0x%08x] ", current_instruction->callAddress);*/
134 static void i_set_relative_jmp(int relAddr) {
135 current_instruction->isJump=1;
136 // FIXME: is this correct ?
137 current_instruction->jumpAddress = current_instruction->offsetInCode + current_instruction->bytePos + relAddr;
138 /*current_instruction->jumpAddress = current_instruction->offsetInCode + relAddr;*/
139 /*i_print("[jump to 0x%08x] ", current_instruction->jumpAddress);*/
142 static int i_get_jump_label(int absoluteCodeOffset) {
143 instruction *i = first_instruction;
145 if (i->offsetInCode == absoluteCodeOffset && i->isJumpTarget)
151 static int i_get_call_label(int absoluteCodeOffset) {
152 instruction *i = first_instruction;
154 if (i->offsetInCode == absoluteCodeOffset && i->isCallTarget)
161 static void i_dump() {
164 instruction *i = first_instruction;
166 if (i->offsetInCode == entry_point)
167 printf("\n\nENTRY_POINT:\n");
169 printf("\n\nfunction%d:\n", i->callLabel);
171 printf("label%d: \n", i->jumpLabel);
173 printf("\t0x%08x:", i->offsetInCode);
174 for (k=0; k<stat.maxInstructionLen; k++) {
176 printf(" %02x", i->bytes[k]);
181 printf(" %s [call function%d]\n", i->string, i_get_call_label(i->callAddress));
183 printf(" %s [jump label%d]\n", i->string, i_get_jump_label(i->jumpAddress));
185 printf(" %s\n", i->string);
190 static void i_mark_jump_target(int absoluteCodeOffset) {
191 // search in list for address and set isJumpTarget = true
192 instruction *i = first_instruction;
194 if (i->offsetInCode == absoluteCodeOffset) {
202 static void i_mark_call_target(int absoluteCodeOffset) {
203 // search in list for address and set isCallTarget = true
204 instruction *i = first_instruction;
206 if (i->offsetInCode == absoluteCodeOffset) {
215 static void i_mark_jump_targets() {
216 // search the whole list for jumps and call
217 // i_mark_jump_target() for every of them
218 instruction *i = first_instruction;
221 i_mark_jump_target(i->jumpAddress);
226 static void i_mark_call_targets() {
227 // search the whole list for calls and call
228 // i_mark_call_target() for every of them
229 instruction *i = first_instruction;
232 i_mark_call_target(i->callAddress);
237 static void i_enumerate_jump_targets() {
238 instruction *i = first_instruction;
241 i->jumpLabel = current_jump_label++;
246 static void i_enumerate_call_targets() {
247 instruction *i = first_instruction;
250 i->callLabel = current_call_label++;
255 static void i_make_statistics() {
256 instruction *i = first_instruction;
258 if (i->bytePos > stat.maxInstructionLen) {
259 stat.maxInstructionLen = i->bytePos;
265 static void i_end() {
266 i_mark_jump_targets();
267 i_mark_call_targets();
269 i_enumerate_jump_targets();
270 i_enumerate_call_targets();
275 static void i_cleanup() {
276 instruction *current_inst = first_instruction;
279 first_instruction = NULL;
280 while (current_inst != NULL) {
281 next = current_inst->next;
288 * END - interface functions
292 static u8 *data, *data_start;
293 static u32 base_data, base_code, size_code;
295 static char *imm_chw[] = {
296 "b", /* 08 bits */ "w", /* 16 bits */
297 "d", /* 32 bits */ "q", /* 64 bits */
298 "n" /* unsigned int. hax */ ,
299 "sn" /* signed int. hax */
302 static char *vm_reg[] = {
303 "FLAGS", "IP", "RESERVED", "RESERVED",
304 "RESERVED", "RESERVED", "RESERVED", "RESERVED"
307 static void print_debug(const char *s)
309 fprintf(stderr, PS "0x%08x (0x%04x)\n", s, (u32) data,
313 static void pspace(void) { i_print(" "); }
314 static void pcomma(void) { i_print(", "); }
316 static void preg(u8 reg)
318 if (reg > 7) { assert(0); }
319 i_print("r%1d", reg);
322 static void pvmreg(u8 reg)
324 if (reg > 7) { assert(0); }
325 i_print("%s", vm_reg[reg]);
328 static void pdref(u8 x)
330 i_print("%s", x ? "@" : "");
333 #define PIMM_READ(size) \
334 static void pimm##size(void) \
336 i_print(size != 64 ? "%x" : "%llx", *((u##size *) data)); \
337 for (int i=0; i<size/8; i++) \
346 static void pimm(u8 mod)
349 case 1: pimm16(); break;
350 case 2: pimm32(); break;
351 case 3: pimm64(); break;
352 case 0: default: assert(0);
356 #define PIDX_READ(size) \
357 static void pidx##size(void) \
359 u##size d = *((u##size *)data); \
360 u##size sign = (d >> (size-1))&1; \
361 u##size nat_bits = ((d>>(size-4))&0x7) * (size/8);\
362 u##size c_bits = size - nat_bits - 4;\
363 u##size c = (d>>nat_bits)&((1ULL<<c_bits)-1);\
364 u##size n = d & ((1ULL<<nat_bits)-1);\
365 i_print(size != 64 ? "{%c %dn %dc}" : "{%c %lldn %lldc}", sign ? '-' : '+', n, c); \
366 for (int i=0; i<size/8; i++) \
374 static void pidx(u8 mod)
377 case 1: pidx16(); break;
378 case 2: pidx32(); break;
379 case 3: pidx64(); break;
380 case 0: default: assert(0);
384 static void pimmc(u8 imm_type)
386 if (imm_type > 5) { assert(0); }
387 i_print("%s", imm_chw[imm_type]);
390 static void pflags(u8 flag)
393 case 0x0: i_print("eq"); break;
394 case 0x1: i_print("lte"); break;
395 case 0x2: i_print("gte"); break;
396 case 0x3: i_print("ulte"); break;
397 case 0x4: i_print("ugte"); break;
401 static const char *breakopts[] = {
402 "Runaway program break",
403 "Get virtual machine version",
404 "undefined. WTF?", // "Skip"?
408 "Set compiler version"
411 static void pbreak(void)
413 u8 i = i_push(*data++);
415 if (i > 6) { fprintf(stderr, "unknown break!"); assert(0);
416 } else i_print("\"%s\"", breakopts[i]);
419 static void pinsn(void)
421 u8 insn = i_push(*data++);
422 u8 opc = insn & 0x3f;
425 case 0x00: i_print("BREAK"); pbreak(); break;
426 case 0x01: case 0x03:{
427 u8 opindex = insn & 0x80;
428 u8 c3264 = insn & 0x40;
430 u8 b1 = i_push(*data++);
433 u8 ebcnative = b1 & 0x20;
434 u8 relabs = b1 & 0x10;
439 i_print("JMP%d%s", c3264 ? 64 : 32,
440 cond ? (flag ? "cs" : "cc") : "");
441 } else if (opc == 0x03) {
442 i_print("CALL%d%s", c3264 ? 64 : 32,
443 ebcnative ? "EX" : "");
445 i_print("%s", relabs ? "a" : ""); pspace();
447 if (!c3264) { // 32bit
452 if (op1==0) { // relative/absolute call/jmp
453 u32 imm32 = *(u32*)data;
458 if (relabs) { // relative call
460 i_set_relative_call(imm32);
462 i_set_relative_jmp(imm32);
463 } else { // absolute call
464 i_print("FIXME %s.%d", __FUNCTION__, __LINE__);
474 /*i_print(" msk: offset=%d", (data-startOfInstruction));*/
478 u8 cond = insn & 0x80;
479 u8 flag = insn & 0x40;
480 i_print("JMP8%s", cond ? (flag ? "cs" : "cc") : "");
483 int relOffset = 2*((int)(s8)i_push(*data++));
485 i_set_relative_jmp(relOffset);
487 *i_print(" msk-offset=%x", 2*(*data) + (data-startOfInstruction));
488 *i_print(" msk-offset=%08x", (startOfInstruction-data_start) + 2*(*data) + (data-startOfInstruction));
492 case 0x04: i_print("RET"); i_push(*data++); break;
494 u8 opindex = insn & 0x80;
495 u8 c3264 = insn & 0x40;
497 u8 b1 = i_push(*data++);
499 u8 op2 = (b1 & 0x70) >> 4;
500 u8 dref2 = b1 & 0x80;
502 i_print("CMP%d", !c3264 ? 32 : 64);
503 pflags(opc - 0x05); pspace();
505 pdref(dref2); preg(op2);
512 u8 opindex = insn & 0x80;
513 u8 c3264 = insn & 0x40;
515 u8 b1 = i_push(*data++);
518 u8 op2 = (b1 & 0x70) >> 4;
519 u8 dref2 = b1 & 0x80;
521 case 0x0a: i_print("NOT"); break;
522 case 0x0b: i_print("NEG"); break;
523 case 0x0c: i_print("ADD"); break;
524 case 0x0d: i_print("SUB"); break;
525 case 0x0e: i_print("MUL"); break;
526 case 0x0f: i_print("MULU"); break;
527 case 0x10: i_print("DIV"); break;
528 case 0x11: i_print("DIVU"); break;
529 case 0x12: i_print("MOD"); break;
530 case 0x13: i_print("MODU"); break;
531 case 0x14: i_print("AND"); break;
532 case 0x15: i_print("OR"); break;
533 case 0x16: i_print("XOR"); break;
534 case 0x17: i_print("SHL"); break;
535 case 0x18: i_print("SHR"); break;
536 case 0x19: i_print("ASHR"); break;
538 fprintf(stderr, "\nopcode: %x\n", opc);
541 i_print("%d", c3264 ? 64 : 32); pspace();
542 pdref(dref1); preg(op1); pcomma();
543 pdref(dref2); preg(op2);
550 u8 opindex = insn & 0x80;
551 u8 c3264 = insn & 0x40;
552 u8 opmod = opc - 0x1a; // 0b, 1w, 2d
554 u8 b1 = i_push(*data++);
557 u8 op2 = (b1 & 0x70) >> 4;
558 u8 dref2 = b1 & 0x80;
560 i_print("EXTND"); pimmc(opmod);
561 i_print("%d", c3264 ? 64 : 32); pspace();
562 pdref(dref1); preg(op1); pcomma();
563 pdref(dref2); preg(op2);
570 u8 b1 = i_push(*data++);
572 u8 op2 = (b1 & 0x70) >> 4;
574 i_print("STORESP"); pspace();
575 preg(op1); pcomma(); pvmreg(op2);
579 u8 c3264 = insn & 0x40;
580 u8 i1632 = !!(insn & 0x80);
582 u8 b1 = i_push(*data++);
585 u8 op1index = b1 & 0x10;
587 i_print("CMPI%d", !c3264 ? 32 : 64);
589 pflags(opc - 0x2d); pspace();
590 pdref(dref1); preg(op1);
604 case 0x1d: op1mod = 0; op2mod = 1; break; // MOVbw
605 case 0x1e: op1mod = 1; op2mod = 1; break; // MOVww
606 case 0x1f: op1mod = 2; op2mod = 1; break; // MOVdw
607 case 0x20: op1mod = 3; op2mod = 1; break; // MOVqw
608 case 0x21: op1mod = 0; op2mod = 2; break; // MOVbd
609 case 0x22: op1mod = 1; op2mod = 2; break; // MOVwd
610 case 0x23: op1mod = 2; op2mod = 2; break; // MOVdd
611 case 0x24: op1mod = 3; op2mod = 2; break; // MOVqd
613 case 0x25: op1mod = 5; op2mod = 1; break; // MOVsnw
614 case 0x26: op1mod = 5; op2mod = 2; break; // MOVsnq
616 case 0x28: op1mod = 3; op2mod = 3; break; // MOVqq
618 case 0x32: op1mod = 4; op2mod = 1; break; // MOVnw
619 case 0x33: op1mod = 4; op2mod = 2; break; // MOVnd
621 fprintf(stderr, "wtfopcode: %x\n", opc);
624 u8 mod = (insn & 0xc0) >> 6;
625 u8 op1index = mod & 0x2;
626 u8 op2index = mod & 0x1;
628 u8 b1 = i_push(*data++);
631 u8 op2 = (b1 & 0x70) >> 4;
632 u8 dref2 = b1 & 0x80;
634 i_print("MOV"); pimmc(op1mod); pimmc(op2mod); pspace();
635 pdref(dref1); preg(op1);
636 // op2mod defines index width for *both* indexes
638 pspace(); pidx(op2mod);
640 pcomma(); pdref(dref2); preg(op2);
642 pspace(); pidx(op2mod);
647 u8 mod = (insn & 0xc0) >> 6;
648 u8 b1 = i_push(*data++);
651 u8 width = (b1 & 0x30) >> 4;
652 u8 op1index = b1 & 0x40;
654 i_print("MOVI"); pimmc(width); pimmc(mod); pspace();
655 pdref(dref); preg(op1);
663 u8 mod = (insn & 0xc0) >> 6;
665 u8 b1 = i_push(*data++);
668 u8 op1index = (b1 & 0x40) >> 6;
670 i_print("MOVIn"); pimmc(mod); pspace();
671 pdref(dref); preg(op1);
679 u8 mod = (insn & 0xc0) >> 6;
680 u8 b1 = i_push(*data++);
683 u8 op1index = b1 & 0x40;
685 i_print("MOVREL"); pimmc(mod); pspace();
686 pdref(dref); preg(op1);
694 fprintf(stderr, "\nunknown opcode: 0x%02x\n", opc);
699 static void pheader(void)
701 /*printf("\n%08x: ", data - data_start);*/
705 int main(int argc, const char **argv)
708 fprintf(stderr, "usage: %s <pe-ebc>\n", argv[0]);
712 int fd = open(argv[1], O_RDONLY);
713 file_len = lseek(fd, 0, SEEK_END);
714 data_start = data = mmap(0, file_len, PROT_READ, MAP_SHARED, fd, 0);
717 if (memcmp(data, "MZ", 2) != 0) {
718 fprintf(stderr, "wrong DOS header: %c%c\n", *data, *(data + 1));
725 while (!found && ++data < (data + file_len)) {
726 if (memcmp(data, "PE", 2) == 0) {
733 fprintf(stderr, "no PE header found\n");
738 if (memcmp(data, "\xbc\x0e", 2) != 0) { // read '0xebc'
739 fprintf(stderr, "not an EBC image: 0x%02x%02x\n", *data,
745 while (!found && ++data < (data + file_len)) {
746 if (memcmp(data, "\x0b\x01", 2) == 0) { // read '0x10b'
747 print_debug("PE-opt");
753 fprintf(stderr, "no PE-opt header found\n");
757 fprintf(stderr, PS "0x%04x\n", "magic", *((u16 *) data));
760 fprintf(stderr, PS "0x%02x\n", "majorver", *((u8 *) data));
762 fprintf(stderr, PS "0x%02x\n", "minorver", *((u8 *) data));
765 // TODO: not sure if this is correct :/
766 size_code = *((u32 *) data);
767 print_debug("size_code");
768 fprintf(stderr, PS "0x%08x\n", "size_code", size_code);
769 data += 4 + 4 + 4; //u32, u32, u32
771 entry_point = *((u32 *) data) - *((u32 *)(data+4));
772 fprintf(stderr, PS "0x%08x\n", "entry_point", entry_point);
775 base_code = *((u32 *) data);
776 print_debug("base_code");
777 fprintf(stderr, PS "0x%08x\n", "base_code", base_code);
779 base_data = *((u32 *) data);
780 print_debug("base_data");
781 fprintf(stderr, PS "0x%08x\n", "base_data", base_data);
784 fprintf(stderr, PS "0x%08x\n", "???", *((u32 *) data));
786 data = data_start + base_code;
787 while (data <= (data_start + base_code + size_code)) {
788 i_begin((int)(data-data_start), (int)(data-data_start-base_code));