From: Martin Perner Date: Mon, 20 Dec 2010 21:20:29 +0000 (+0100) Subject: [bench] jit try of vm X-Git-Tag: bootrom_v1~55 X-Git-Url: http://wien.tomnetworks.com/gitweb/?p=calu.git;a=commitdiff_plain;h=7b04055757bc6a71bf5ae5b35cba6fb61471bb01 [bench] jit try of vm --- diff --git a/3_test/deepjit.s b/3_test/deepjit.s new file mode 100644 index 0000000..190999b --- /dev/null +++ b/3_test/deepjit.s @@ -0,0 +1,657 @@ +.data +.org 0x10 +inputdata: +;8 * 8 4 +.fill 1, 0x382A3834 +;1 X * 8 +.fill 1, 0x31582A38 +;+ D X - +.fill 1, 0x2B44582D +;P \xF8 J D +.fill 1, 0x50F84A44 +;+ * 8 6 +.fill 1, 0x2B2A3836 +;\000 \020 I D +.fill 1, 0x00204944 +;~ < \000 \000 +.fill 1, 0x7E3C0000 +;8 P \005 J +.fill 1, 0x3850054A +;* 8 +.fill 1, 0x2A38 + +stack: +.fill 256, 0 + +;needed for jumps +;assuming that no more than 42 instr are used +instrtable: +.fill 42, 0 + +prog_eof: +.fill 1, 0xE701FFFC;ldw r0, 0-4(r3) +.fill 1, 0xEB00000A;ret+ + +prog_mul: +.fill 1, 0xE1998020;subi r3, r3, 4 +.fill 1, 0xe7318000;ldw r6, 0(r3) +.fill 1, 0xe739fffc;ldw r7, 0-4(r3) +.fill 1, 0xed400004;ldis r8, 0 +.fill 1, 0xe1038000;mov r0, r7 +.fill 1, 0xe2800008;andx r0, 1 +.fill 1, 0x00443001;adddnz r8, r8, r6 +.fill 1, 0x01bb8008;subinz r7, r7, 1 +.fill 1, 0x113b8000;addizs r7, r7, 0 +;loop: +.fill 1, 0x00443001;adddnz r8, r8, r6 +.fill 1, 0x00443001;adddnz r8, r8, r6 +.fill 1, 0xe1bb8010;subi r7, r7, 2 +.fill 1, 0x0b7ffe83;brnz+ loop +.fill 1, 0xe7c1fffc;stw r8, 0-4(r3) + +prog_consts: +.fill 1, 0xed400004;ldis r6, CONST +.fill 1, 0xe7b18000;stw r6, 0(r3) +.fill 1, 0xe1198020;addi r3, r3, 4 + +prog_add: +.fill 1, 0xe1998020;subi r3, r3, 4 +.fill 1, 0xe7318000;ldw r6, 0(r3) +.fill 1, 0xe739fffc;ldw r7, 0-4(r3) +.fill 1, 0xe03bb000;add r7, r7, r6 +.fill 1, 0xe7b9fffc;stw r7, 0-4(r3) + +prog_sub: +.fill 1, 0xe1998020;subi r3, r3, 4 +.fill 1, 0xe7318000;ldw r6, 0(r3) +.fill 1, 0xe739fffc;ldw r7, 0-4(r3) +.fill 1, 0xe0bbb000;sub r7, r7, r6 +.fill 1, 0xe7b9fffc;stw r7, 0-4(r3) + +prog_lessthan: +.fill 1, 0xe1998020;subi r3, r3, 4 +.fill 1, 0xe7318000;ldw r6, 0(r3) +.fill 1, 0xe739fffc;ldw r7, 0-4(r3) +.fill 1, 0xed400004;ldis r8, 0 +.fill 1, 0xec3b0000;cmp r7, r6 +.fill 1, 0xbd4007fc;ldislt r8, 0xFF +.fill 1, 0xe7c1fffc;stw r8, 0-4(r3) + +prog_dup: +.fill 1, 0xe731fffc;ldw r6, 0-4(r3) +.fill 1, 0xe7b18000;stw r6, 0(r3) +.fill 1, 0xe1198020;addi r3, r3, 4 + +prog_jmp: +.fill 1, 0xe1998020;subi r3, r3, 4 +.fill 1, 0xe7318000;ldw r6, 0(r3) +.fill 1, 0xecb00000;cmpi r6,0 +;static calced +.fill 1, 0xbb000103;breq- vm_next +.fill 1, 0xeb000003;br+ #CONST + +prog_imm: +.fill 1, 0xed400000;ldil r6, CONST +.fill 1, 0xed400002;ldih r6, CONST +.fill 1, 0xe7b18000;stw r6, 0(r3) +.fill 1, 0xe1198020;addi r3, r3, 4 + +prog_pop: +.fill 1, 0xe1998020;subi r3, r3, 4 + +prog_xch: +.fill 1, 0xe731fffc;ldw r6, 0-4(r3) +.fill 1, 0xe739fff8;ldw r7, 0-8(r3) +.fill 1, 0xe7b1fff8;stw r6, 0-8(r3) +.fill 1, 0xe7b9fffc;stw r7, 0-4(r3) + +prog_not: +.fill 1, 0xe731fffc;ldw r6, 0-4(r3) +.fill 1, 0xe4b7fffa;not r6 +.fill 1, 0xe7b1fffc;stw r6, 0-4(r3) + +.text +main: + ;set address of input + ldil r1, inputdata@lo + ldih r1, inputdata@hi + + ;set address of program start + ldil r2, prog_start@lo + ldih r2, prog_start@hi + + ;set address to instruction table + ldil r3, instrtable@lo + ldih r3, instrtable@hi + + ;set address to defer table + ldil r9, instrtable@lo + ldih r9, instrtable@hi + + + ;call jit compiler + call+ jit + + ;set address to stack + ldil r3, stack@lo + ldih r3, stack@hi + + ;call jit'ed prog + call+ prog_start + + br+ main + +;first version only supports backward jumps +jit: + ;r1 ... address to input, every byte is a new input + ; includes pc implicitly + ;r2 ... address to program start + ;r3 ... address of instruction table + ;r4 ... gets loaded with instr. prog. addr. + ;r5 ... input + ;r9 ... address to actual entry in defer table + ;r10... address to defer table + + ;backup defer table address + mov r10, r9 + ;decrement address to input by 1 + subi r1, r1, 1 + +vm_default: +vm_loop: + ;increment input address + addi r1, r1, 1 + + ;store address of next instruction in table + stw r2, 0(r3) + ;increment instr. table + addi r3, r3, 4 + + ;load input + ldb r5, 0(r1) + ;we need to multiply input by 4 to get correct address offset + lls r0, r5, 2 + ;calc position in jumptable + ldw r0, jumptable(r0) + ;jump to instr + brr r0 + +vm_eof: + ;load address of program + ldil r4, prog_eof@lo + ldih r4, prog_eof@hi + ;program instruction (2) + ldw r0, 0(r4) + stx r0, 0(r2) + ldw r0, 4(r4) + stx r0, 4(r2) + + ;end of program + ;now it is time to clear up the defer table + + ldil r7, prog_jmp@lo + ldih r7, prog_jmp@hi + ;load branch template + ldw r7, 16(r7) + + ;if actual and base are equal, no entry + cmp r9, r10 + ;return + reteq- + +vm_defer: + ;load pointer to where to jump to + ldw r6, 0(r10) + ;load where to jump to + ldw r6, 0(r6) + ;load where to save from defer table + stw r8, 4(r10) + + ;generate branch + sub r11, r6, r8 + ;set the upper 16 bit 0 + andx r11, 0xFFFF + ;shift to the position of imm in br + lls r11, r11, 7 + or r6, r7, r11 + stw r6, 0(r8) + + addi r10, r10, 8 + cmp r10, r9 + reteq+ + brnq- vm_defer + +;case * +;42 +vm_mul: + ;load address of program + ldil r4, prog_mul@lo + ldih r4, prog_mul@hi + + ;program instruction (14) + ldw r0, 0(r4) + stx r0, 0(r2) + ldw r0, 4(r4) + stx r0, 4(r2) + ldw r0, 8(r4) + stx r0, 8(r2) + ldw r0, 12(r4) + stx r0, 12(r2) + ldw r0, 16(r4) + stx r0, 16(r2) + ldw r0, 20(r4) + stx r0, 20(r2) + ldw r0, 24(r4) + stx r0, 24(r2) + ldw r0, 28(r4) + stx r0, 28(r2) + ldw r0, 32(r4) + stx r0, 32(r2) + ldw r0, 36(r4) + stx r0, 36(r2) + ldw r0, 40(r4) + stx r0, 40(r2) + ldw r0, 44(r4) + stx r0, 44(r2) + ldw r0, 48(r4) + stx r0, 48(r2) + ldw r0, 52(r4) + stx r0, 52(r2) + + ;increment address + addi r2, r2, 56 + + br+ vm_loop + +;case + +;43 +vm_add: + ;load address of program + ldil r4, prog_add@lo + ldih r4, prog_add@hi + + ;program instruction (5) + ldw r0, 0(r4) + stx r0, 0(r2) + ldw r0, 4(r4) + stx r0, 4(r2) + ldw r0, 8(r4) + stx r0, 8(r2) + ldw r0, 12(r4) + stx r0, 12(r2) + ldw r0, 16(r4) + stx r0, 16(r2) + + ;increment address + addi r2, r2, 20 + + br+ vm_loop + +;case - +;45 +vm_sub: + ;load address of program + ldil r4, prog_sub@lo + ldih r4, prog_sub@hi + + ;program instruction (5) + ldw r0, 0(r4) + stx r0, 0(r2) + ldw r0, 4(r4) + stx r0, 4(r2) + ldw r0, 8(r4) + stx r0, 8(r2) + ldw r0, 12(r4) + stx r0, 12(r2) + ldw r0, 16(r4) + stx r0, 16(r2) + + ;increment address + addi r2, r2, 20 + + br+ vm_loop + +;case 0 1 2 3 4 5 6 7 8 9 +;48-57 +vm_consts: + ;load address of program + ldil r4, prog_consts@lo + ldih r4, prog_consts@hi + + ;program instruction (3) + ldw r0, 0(r4) + ;the first instr. loads r6 with the number + ;thus we shall emulate this + + ;call number + subi r6, r5, 48 + ;shift 3 bits left, as the immediate in ldi has + ;an offset of 3 + lls r6, r6, 3 + ;now 'add' this to the ldi + or r0, r0, r6 + + ;store this 'dynamic' instruction + stx r0, 0(r2) + ldw r0, 4(r4) + stx r0, 4(r2) + ldw r0, 8(r4) + stx r0, 8(r2) + + ;increment address + addi r2, r2, 12 + + br+ vm_loop + +;case < +;60 +vm_lessthan: + ;load address of program + ldil r4, prog_lessthan@lo + ldih r4, prog_lessthan@hi + + ;program instruction (7) + ldw r0, 0(r4) + stx r0, 0(r2) + ldw r0, 4(r4) + stx r0, 4(r2) + ldw r0, 8(r4) + stx r0, 8(r2) + ldw r0, 12(r4) + stx r0, 12(r2) + ldw r0, 16(r4) + stx r0, 16(r2) + ldw r0, 20(r4) + stx r0, 20(r2) + ldw r0, 24(r4) + stx r0, 24(r2) + + ;increment address + addi r2, r2, 28 + + br+ vm_loop + +;case D +;68 +vm_dup: + ;load address of program + ldil r4, prog_dup@lo + ldih r4, prog_dup@hi + + ;program instruction (3) + ldw r0, 0(r4) + stx r0, 0(r2) + ldw r0, 4(r4) + stx r0, 4(r2) + ldw r0, 8(r4) + stx r0, 8(r2) + + ;increment address + addi r2, r2, 12 + + br+ vm_loop + +;case I +;73 +vm_imm: + ;the following instructions calculate the immediate + ;load new high byte + ldb r6, 4(r1) + ;shift high byte + lls r6, r6, 8 + ;load 2nd byte + ldb r7, 3(r1) + ;add to high byte + add r6, r6, r7 + ;shift + lls r6, r6, 8 + ;load + ldb r7, 2(r1) + ;add + add r6, r6, r7 + ;shift + lls r6, r6, 8 + ;load + ldb r7, 1(r1) + ;add + add r6, r6, r7 + + ;now we will generate ldih/l which will store this + ;immediate into a register + + ;load address of program + ldil r4, prog_imm@lo + ldih r4, prog_imm@hi + + ;save r6 to r7 + mov r7, r6 + + ;generate 1st instr + ldw r0, 0(r4) + andx r6, 0xFFFF + lls r6, r6, 3 + or r0, r0, r6 + stx r0, 0(r2) + + ;generate 2nd instr + ldw r0, 4(r4) + andxh r7, 0xFFFF + lrs r7, r7, 13 + or r0, r0, r7 + stx r0, 4(r2) + + ;now we program the instructions that will save the + ;immediate onto the stack and increment the later + + ldw r0, 8(r4) + stx r0, 8(r2) + ldw r0, 12(r4) + stx r0, 12(r2) + + ;increment address + addi r2, r2, 16 + + ;pc+4 + addi r1, r1, 4 + br+ vm_loop + +;case J +;74 +vm_jmp: + ;gfreit mi net ... + ;gespeicherte instrs sollten input indepentent sein + ;jumptable verwenden + ;fuer forward jumps muss deferrer table gemacht werden *puke* + + ;load address of program + ldil r4, prog_jmp@lo + ldih r4, prog_jmp@hi + + ;program instruction (3) + ;decrement sp + ;subi r3, r3, 4 + ldw r0, 0(r4) + stx r0, 0(r2) + ;load sp + ;ldw r6, 0(r3) + ldw r0, 4(r4) + stx r0, 4(r2) + ;compare to 0 + ;cmpi r6,0 + ldw r0, 8(r4) + stx r0, 8(r2) + + ;breq+ vm_next + ;is statically known + ldw r0, 12(r4) + stx r0, 12(r2) + + ;r8 has now the current base + ldw r8, 0(r3) + ;we add the offset to this instruction + addi r8, r8, 12 + + + ;we know calculate the jump destination + ;set r6 to 0 (to clear upper bytes) + ldis r6, 0 + ;load pc+1 input + ldb r6, 1(r1) + ;compare input with neg. max of 8 bit + cmpi r6, 0x80 + brlt- vm_possign + + + + ;generate negativ offset + ldis r7, 0xFF00 + add r6, r3, r7 + ;r0 now has the target address + ldw r0, 0(r6) + ;we calc the offset + sub r8, r8, r0 + ;set the upper 16 bit 0 + andx r8, 0xFFFF + ;shift to the position of imm in br + lls r8, r8, 7 + ;load template br + ldw r0, 16(r4) + or r0, r0, r8 + stw r0, 16(r2) + + ;increment address + addi r2, r2, 20 + + br+ vm_loop + + +vm_possign: + ;we know save the address in the instrtable where the addr to jump to stands + ;the value doesn't exists at the moment, but it will at evaluation + + ;save position to save the instr into defer table + stw r8, 4(r9) + + ;todo: check if -1 is needed + subi r6, r6, 1 + ;multiply with 2 to get offset right + lls r6, r6, 2 + ;add to current base + add r6, r3, r6 + ;save the address to defer table + stw r6, 0(r9) + ;increment defer table address + addi r9, r9, 8 + ;increment address + addi r2, r2, 20 + br+ vm_loop + +;case P +;80 +vm_pop: + ;load address of program + ldil r4, prog_pop@lo + ldih r4, prog_pop@hi + + ;program instruction (1) + ldw r0, 0(r4) + stx r0, 0(r2) + + ;increment address + addi r2, r2, 4 + + br+ vm_loop + +;case X +;88 +vm_xch: + ;load address of program + ldil r4, prog_xch@lo + ldih r4, prog_xch@hi + + ;program instruction (4) + ldw r0, 0(r4) + stx r0, 0(r2) + ldw r0, 4(r4) + stx r0, 4(r2) + ldw r0, 8(r4) + stx r0, 8(r2) + ldw r0, 12(r4) + stx r0, 12(r2) + + ;increment address + addi r2, r2, 16 + + br+ vm_loop + +;case ~ +;126 +vm_not: + ;load address of program + ldil r4, prog_not@lo + ldih r4, prog_not@hi + + ;program instruction (3) + ldw r0, 0(r4) + stx r0, 0(r2) + ldw r0, 4(r4) + stx r0, 4(r2) + ldw r0, 8(r4) + stx r0, 8(r2) + + ;increment address + addi r2, r2, 12 + + br+ vm_loop + +prog_start: + +.data +jumptable: +;0 +.fill 1, vm_eof +.fill 41, vm_default +;42 +.fill 1, vm_mul +;43 +.fill 1, vm_add +;44 +.fill 1, vm_default +;45 +.fill 1, vm_sub +;46-47 +.fill 2, vm_default +;48-57 +.fill 10, vm_consts +;58-59 +.fill 2, vm_default +;60 +.fill 1, vm_lessthan +;61-67 +.fill 7, vm_default +;68 +.fill 1, vm_dup +;69-72 +.fill 4, vm_default +;73 +.fill 1, vm_imm +;74 +.fill 1, vm_jmp +;75-79 +.fill 5, vm_default +;80 +.fill 1, vm_pop +;81-87 +.fill 7, vm_default +;88 +.fill 1, vm_xch +;89-125 +.fill 37, vm_default +;126 +.fill 1, vm_not +;127-255 +.fill 129, vm_default + +;we assume not more than 3 entries +defertable: +.fill 6, 0