/* todo: copyright/gpl stuff. */ #ifndef __i386__ #error "use it on your own risk" #endif #include #include #include #include #include #include #include #include #include #include #define PS "%-10s: " typedef unsigned char u8; typedef signed char s8; typedef uint16_t u16; typedef int16_t s16; typedef uint32_t u32; typedef int32_t s32; typedef uint64_t u64; typedef int64_t s64; #define MAX_STRING_LEN 100 #define MAX_INSTRUCTION_LEN 40 typedef struct _instruction { int offsetInFile; int offsetInCode; u8 bytes[MAX_INSTRUCTION_LEN]; int bytePos; char string[MAX_STRING_LEN]; int stringPos; int isJump; int jumpAddress; int isCall; int callAddress; int isCallTarget; int isJumpTarget; int callLabel; int jumpLabel; struct _instruction *next; } instruction; typedef struct _statistics { int maxInstructionLen; } statistics; static instruction* instruction_new(int offsetInFile, int offsetInCode) { instruction *inst = malloc(sizeof(instruction)); assert(inst != NULL); memset(inst, 0, sizeof(instruction)); inst->offsetInFile = offsetInFile; inst->offsetInCode = offsetInCode; return inst; } static void instruction_print(instruction *inst, const char *c) { int len = strlen(c); assert((inst->stringPos+len) < MAX_STRING_LEN); for (int i=0; istring[inst->stringPos++] = c[i]; } static void instruction_push_byte(instruction *inst, u8 byte) { assert((inst->bytePos+1) < MAX_INSTRUCTION_LEN); inst->bytes[inst->bytePos++] = byte; } static instruction *first_instruction; static instruction *current_instruction; static int current_call_label=0; static int current_jump_label=0; static statistics stat; static int entry_point; /* * BEGIN - interface functions */ static void i_begin(int offsetInFile, int offsetInCode) { if (first_instruction == NULL) { first_instruction = current_instruction = instruction_new(offsetInFile, offsetInCode); } else { assert(current_instruction->next == 0); current_instruction->next = instruction_new(offsetInFile, offsetInCode); current_instruction = current_instruction->next; } } static void i_print(const char *fmt, ...) { assert(current_instruction != NULL); va_list args; char str[MAX_STRING_LEN]; memset(str, 0, MAX_STRING_LEN); va_start(args, fmt); vsprintf(str, fmt, args); instruction_print(current_instruction, str); va_end(args); } static u8 i_push(u8 byte) { assert(current_instruction != NULL); instruction_push_byte(current_instruction, byte); return byte; } static void i_set_relative_call(int relAddr) { current_instruction->isCall=1; current_instruction->callAddress = current_instruction->offsetInCode + current_instruction->bytePos + relAddr; /*i_print("[call to 0x%08x] ", current_instruction->callAddress);*/ } static void i_set_relative_jmp(int relAddr) { current_instruction->isJump=1; // FIXME: is this correct ? current_instruction->jumpAddress = current_instruction->offsetInCode + current_instruction->bytePos + relAddr; /*current_instruction->jumpAddress = current_instruction->offsetInCode + relAddr;*/ /*i_print("[jump to 0x%08x] ", current_instruction->jumpAddress);*/ } static int i_get_jump_label(int absoluteCodeOffset) { instruction *i = first_instruction; while (i != NULL) { if (i->offsetInCode == absoluteCodeOffset && i->isJumpTarget) return i->jumpLabel; i = i->next; } return -1; } static int i_get_call_label(int absoluteCodeOffset) { instruction *i = first_instruction; while (i != NULL) { if (i->offsetInCode == absoluteCodeOffset && i->isCallTarget) return i->callLabel; i = i->next; } return -1; } static void i_dump() { int k; instruction *i = first_instruction; while (i != NULL) { if (i->offsetInCode == entry_point) printf("\n\nENTRY_POINT:\n"); if (i->isCallTarget) printf("\n\nfunction%d:\n", i->callLabel); if (i->isJumpTarget) printf("label%d: \n", i->jumpLabel); printf("\t0x%08x:", i->offsetInCode); for (k=0; kbytePos) printf(" %02x", i->bytes[k]); else printf(" "); } if (i->isCall) printf(" %s [call function%d]\n", i->string, i_get_call_label(i->callAddress)); else if (i->isJump) printf(" %s [jump label%d]\n", i->string, i_get_jump_label(i->jumpAddress)); else printf(" %s\n", i->string); i = i->next; } } static void i_mark_jump_target(int absoluteCodeOffset) { // search in list for address and set isJumpTarget = true instruction *i = first_instruction; while (i != NULL) { if (i->offsetInCode == absoluteCodeOffset) { i->isJumpTarget++; return; } i = i->next; } assert(0); } static void i_mark_call_target(int absoluteCodeOffset) { // search in list for address and set isCallTarget = true instruction *i = first_instruction; while (i != NULL) { if (i->offsetInCode == absoluteCodeOffset) { i->isCallTarget++; return; } i = i->next; } assert(0); } static void i_mark_jump_targets() { // search the whole list for jumps and call // i_mark_jump_target() for every of them instruction *i = first_instruction; while (i != NULL) { if (i->isJump) i_mark_jump_target(i->jumpAddress); i = i->next; } } static void i_mark_call_targets() { // search the whole list for calls and call // i_mark_call_target() for every of them instruction *i = first_instruction; while (i != NULL) { if (i->isCall) i_mark_call_target(i->callAddress); i = i->next; } } static void i_enumerate_jump_targets() { instruction *i = first_instruction; while (i != NULL) { if (i->isJumpTarget) i->jumpLabel = current_jump_label++; i = i->next; } } static void i_enumerate_call_targets() { instruction *i = first_instruction; while (i != NULL) { if (i->isCallTarget) i->callLabel = current_call_label++; i = i->next; } } static void i_make_statistics() { instruction *i = first_instruction; while (i != NULL) { if (i->bytePos > stat.maxInstructionLen) { stat.maxInstructionLen = i->bytePos; } i = i->next; } } static void i_end() { i_mark_jump_targets(); i_mark_call_targets(); i_enumerate_jump_targets(); i_enumerate_call_targets(); i_make_statistics(); } static void i_cleanup() { instruction *current_inst = first_instruction; instruction *next; first_instruction = NULL; while (current_inst != NULL) { next = current_inst->next; free(current_inst); current_inst = next; } } /* * END - interface functions */ static int file_len; static u8 *data, *data_start; static u32 base_data, base_code, size_code; static char *imm_chw[] = { "b", /* 08 bits */ "w", /* 16 bits */ "d", /* 32 bits */ "q", /* 64 bits */ "n" /* unsigned int. hax */ , "sn" /* signed int. hax */ }; static char *vm_reg[] = { "FLAGS", "IP", "RESERVED", "RESERVED", "RESERVED", "RESERVED", "RESERVED", "RESERVED" }; static void print_debug(const char *s) { fprintf(stderr, PS "0x%08x (0x%04x)\n", s, (u32) data, data - data_start); } static void pspace(void) { i_print(" "); } static void pcomma(void) { i_print(", "); } static void preg(u8 reg) { if (reg > 7) { assert(0); } i_print("r%1d", reg); } static void pvmreg(u8 reg) { if (reg > 7) { assert(0); } i_print("%s", vm_reg[reg]); } static void pdref(u8 x) { i_print("%s", x ? "@" : ""); } #define PIMM_READ(size) \ static void pimm##size(void) \ { \ i_print(size != 64 ? "%x" : "%llx", *((u##size *) data)); \ for (int i=0; i> (size-1))&1; \ u##size nat_bits = ((d>>(size-4))&0x7) * (size/8);\ u##size c_bits = size - nat_bits - 4;\ u##size c = (d>>nat_bits)&((1ULL< 5) { assert(0); } i_print("%s", imm_chw[imm_type]); } static void pflags(u8 flag) { switch (flag) { case 0x0: i_print("eq"); break; case 0x1: i_print("lte"); break; case 0x2: i_print("gte"); break; case 0x3: i_print("ulte"); break; case 0x4: i_print("ugte"); break; } } static const char *breakopts[] = { "Runaway program break", "Get virtual machine version", "undefined. WTF?", // "Skip"? "Debug breakpoint", "System call", "Create thunk", "Set compiler version" }; static void pbreak(void) { u8 i = i_push(*data++); pspace(); if (i > 6) { fprintf(stderr, "unknown break!"); assert(0); } else i_print("\"%s\"", breakopts[i]); } static void pinsn(void) { u8 insn = i_push(*data++); u8 opc = insn & 0x3f; switch (opc) { case 0x00: i_print("BREAK"); pbreak(); break; case 0x01: case 0x03:{ u8 opindex = insn & 0x80; u8 c3264 = insn & 0x40; u8 b1 = i_push(*data++); u8 cond = b1 & 0x80; u8 flag = b1 & 0x40; u8 ebcnative = b1 & 0x20; u8 relabs = b1 & 0x10; u8 op1 = b1 & 0x7; u8 dref1 = b1 & 0x8; if (opc == 0x01) { i_print("JMP%d%s", c3264 ? 64 : 32, cond ? (flag ? "cs" : "cc") : ""); } else if (opc == 0x03) { i_print("CALL%d%s", c3264 ? 64 : 32, ebcnative ? "EX" : ""); } i_print("%s", relabs ? "a" : ""); pspace(); if (!c3264) { // 32bit pdref(dref1); preg(op1); if (opindex) { pspace(); if (op1==0) { // relative/absolute call/jmp u32 imm32 = *(u32*)data; i_push(*data++); i_push(*data++); i_push(*data++); i_push(*data++); if (relabs) { // relative call if (opc == 0x03) i_set_relative_call(imm32); else i_set_relative_jmp(imm32); } else { // absolute call i_print("FIXME %s.%d", __FUNCTION__, __LINE__); } } else { pidx32(); /*pimm32();*/ } } } else { // 64bit pimm64(); } /*i_print(" msk: offset=%d", (data-startOfInstruction));*/ } break; case 0x02:{ u8 cond = insn & 0x80; u8 flag = insn & 0x40; i_print("JMP8%s", cond ? (flag ? "cs" : "cc") : ""); pspace(); //pimm8(); int relOffset = 2*((int)(s8)i_push(*data++)); i_set_relative_jmp(relOffset); /* *i_print(" msk-offset=%x", 2*(*data) + (data-startOfInstruction)); *i_print(" msk-offset=%08x", (startOfInstruction-data_start) + 2*(*data) + (data-startOfInstruction)); */ } break; case 0x04: i_print("RET"); i_push(*data++); break; case 0x05 ... 0x09:{ u8 opindex = insn & 0x80; u8 c3264 = insn & 0x40; u8 b1 = i_push(*data++); u8 op1 = b1 & 0x7; u8 op2 = (b1 & 0x70) >> 4; u8 dref2 = b1 & 0x80; i_print("CMP%d", !c3264 ? 32 : 64); pflags(opc - 0x05); pspace(); preg(op1); pcomma(); pdref(dref2); preg(op2); if (opindex) { pspace(); pimm16(); } } break; case 0x0a ... 0x19:{ u8 opindex = insn & 0x80; u8 c3264 = insn & 0x40; u8 b1 = i_push(*data++); u8 op1 = b1 & 0x7; u8 dref1 = b1 & 0x8; u8 op2 = (b1 & 0x70) >> 4; u8 dref2 = b1 & 0x80; switch (opc) { case 0x0a: i_print("NOT"); break; case 0x0b: i_print("NEG"); break; case 0x0c: i_print("ADD"); break; case 0x0d: i_print("SUB"); break; case 0x0e: i_print("MUL"); break; case 0x0f: i_print("MULU"); break; case 0x10: i_print("DIV"); break; case 0x11: i_print("DIVU"); break; case 0x12: i_print("MOD"); break; case 0x13: i_print("MODU"); break; case 0x14: i_print("AND"); break; case 0x15: i_print("OR"); break; case 0x16: i_print("XOR"); break; case 0x17: i_print("SHL"); break; case 0x18: i_print("SHR"); break; case 0x19: i_print("ASHR"); break; default: fprintf(stderr, "\nopcode: %x\n", opc); assert(0); } i_print("%d", c3264 ? 64 : 32); pspace(); pdref(dref1); preg(op1); pcomma(); pdref(dref2); preg(op2); if (opindex) { pspace(); pimm16(); } } break; case 0x1a ... 0x1c:{ u8 opindex = insn & 0x80; u8 c3264 = insn & 0x40; u8 opmod = opc - 0x1a; // 0b, 1w, 2d u8 b1 = i_push(*data++); u8 op1 = b1 & 0x7; u8 dref1 = b1 & 0x8; u8 op2 = (b1 & 0x70) >> 4; u8 dref2 = b1 & 0x80; i_print("EXTND"); pimmc(opmod); i_print("%d", c3264 ? 64 : 32); pspace(); pdref(dref1); preg(op1); pcomma(); pdref(dref2); preg(op2); if (opindex) { pspace(); pimm16(); } } break; case 0x2a:{ u8 b1 = i_push(*data++); u8 op1 = b1 & 0x7; u8 op2 = (b1 & 0x70) >> 4; i_print("STORESP"); pspace(); preg(op1); pcomma(); pvmreg(op2); } break; case 0x2d ... 0x31:{ u8 c3264 = insn & 0x40; u8 i1632 = !!(insn & 0x80); u8 b1 = i_push(*data++); u8 op1 = b1 & 0x7; u8 dref1 = b1 & 0x8; u8 op1index = b1 & 0x10; i_print("CMPI%d", !c3264 ? 32 : 64); pimmc(i1632 + 1); pflags(opc - 0x2d); pspace(); pdref(dref1); preg(op1); if (op1index) { pspace(); pimm16(); } pcomma(); if (i1632) pimm32(); else pimm16(); } break; case 0x1d ... 0x28: case 0x32 ... 0x33:{ u8 op1mod = 0; u8 op2mod = 0; switch (opc) { case 0x1d: op1mod = 0; op2mod = 1; break; // MOVbw case 0x1e: op1mod = 1; op2mod = 1; break; // MOVww case 0x1f: op1mod = 2; op2mod = 1; break; // MOVdw case 0x20: op1mod = 3; op2mod = 1; break; // MOVqw case 0x21: op1mod = 0; op2mod = 2; break; // MOVbd case 0x22: op1mod = 1; op2mod = 2; break; // MOVwd case 0x23: op1mod = 2; op2mod = 2; break; // MOVdd case 0x24: op1mod = 3; op2mod = 2; break; // MOVqd case 0x25: op1mod = 5; op2mod = 1; break; // MOVsnw case 0x26: op1mod = 5; op2mod = 2; break; // MOVsnq case 0x28: op1mod = 3; op2mod = 3; break; // MOVqq case 0x32: op1mod = 4; op2mod = 1; break; // MOVnw case 0x33: op1mod = 4; op2mod = 2; break; // MOVnd default: fprintf(stderr, "wtfopcode: %x\n", opc); assert(0); } u8 mod = (insn & 0xc0) >> 6; u8 op1index = mod & 0x2; u8 op2index = mod & 0x1; u8 b1 = i_push(*data++); u8 op1 = b1 & 0x7; u8 dref1 = b1 & 0x8; u8 op2 = (b1 & 0x70) >> 4; u8 dref2 = b1 & 0x80; i_print("MOV"); pimmc(op1mod); pimmc(op2mod); pspace(); pdref(dref1); preg(op1); // op2mod defines index width for *both* indexes if (op1index) { pspace(); pidx(op2mod); } pcomma(); pdref(dref2); preg(op2); if (op2index) { pspace(); pidx(op2mod); } } break; case 0x37:{ u8 mod = (insn & 0xc0) >> 6; u8 b1 = i_push(*data++); u8 op1 = b1 & 0x7; u8 dref = b1 & 0x8; u8 width = (b1 & 0x30) >> 4; u8 op1index = b1 & 0x40; i_print("MOVI"); pimmc(width); pimmc(mod); pspace(); pdref(dref); preg(op1); if (op1index) { pspace(); pidx16(); } pcomma(); pimm(mod); } break; case 0x38:{ u8 mod = (insn & 0xc0) >> 6; u8 b1 = i_push(*data++); u8 op1 = b1 & 0x7; u8 dref = b1 & 0x8; u8 op1index = (b1 & 0x40) >> 6; i_print("MOVIn"); pimmc(mod); pspace(); pdref(dref); preg(op1); if (op1index) { pspace(); pidx16(); } pcomma(); pimm(mod); } break; case 0x39:{ u8 mod = (insn & 0xc0) >> 6; u8 b1 = i_push(*data++); u8 op1 = b1 & 0x7; u8 dref = b1 & 0x8; u8 op1index = b1 & 0x40; i_print("MOVREL"); pimmc(mod); pspace(); pdref(dref); preg(op1); if (op1index) { pspace(); pidx16(); } pcomma(); pimm(mod); } break; default: fprintf(stderr, "\nunknown opcode: 0x%02x\n", opc); assert(0); } } static void pheader(void) { /*printf("\n%08x: ", data - data_start);*/ /*i_print("\n");*/ } int main(int argc, const char **argv) { if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(1); } int fd = open(argv[1], O_RDONLY); file_len = lseek(fd, 0, SEEK_END); data_start = data = mmap(0, file_len, PROT_READ, MAP_SHARED, fd, 0); close(fd); if (memcmp(data, "MZ", 2) != 0) { fprintf(stderr, "wrong DOS header: %c%c\n", *data, *(data + 1)); exit(2); } else { print_debug("data"); } int found = 0; while (!found && ++data < (data + file_len)) { if (memcmp(data, "PE", 2) == 0) { print_debug("PE"); found = 1; } } if (!found) { fprintf(stderr, "no PE header found\n"); exit(3); } data += 4; if (memcmp(data, "\xbc\x0e", 2) != 0) { // read '0xebc' fprintf(stderr, "not an EBC image: 0x%02x%02x\n", *data, *(data + 1)); exit(4); } found = 0; while (!found && ++data < (data + file_len)) { if (memcmp(data, "\x0b\x01", 2) == 0) { // read '0x10b' print_debug("PE-opt"); found = 1; } } if (!found) { fprintf(stderr, "no PE-opt header found\n"); exit(3); } fprintf(stderr, PS "0x%04x\n", "magic", *((u16 *) data)); data += 2; //u16 fprintf(stderr, PS "0x%02x\n", "majorver", *((u8 *) data)); data++; //u8 fprintf(stderr, PS "0x%02x\n", "minorver", *((u8 *) data)); data++; //u8 // TODO: not sure if this is correct :/ size_code = *((u32 *) data); print_debug("size_code"); fprintf(stderr, PS "0x%08x\n", "size_code", size_code); data += 4 + 4 + 4; //u32, u32, u32 entry_point = *((u32 *) data) - *((u32 *)(data+4)); fprintf(stderr, PS "0x%08x\n", "entry_point", entry_point); data += 4; //u32 base_code = *((u32 *) data); print_debug("base_code"); fprintf(stderr, PS "0x%08x\n", "base_code", base_code); data += 4; base_data = *((u32 *) data); print_debug("base_data"); fprintf(stderr, PS "0x%08x\n", "base_data", base_data); data += 4; //u32 fprintf(stderr, PS "0x%08x\n", "???", *((u32 *) data)); data = data_start + base_code; while (data <= (data_start + base_code + size_code)) { i_begin((int)(data-data_start), (int)(data-data_start-base_code)); pheader(); pinsn(); } printf("\n"); i_end(); i_dump(); i_cleanup(); return 0; }