#define PROGINSTR stw r0, PDATA(r13)
#include "dt_inc.s"
.data
.org 0x10
inputdata:
;8 * 8 4
.fill 1, 0x382A3834
;1 X * 8
.fill 1, 0x31582A38
;+ D X -
.fill 1, 0x2B44582D
;P \xF8 J D
.fill 1, 0x50F84A44
;+ * 8 6
.fill 1, 0x2B2A3836
;\000 \020 I D
.fill 1, 0x00204944
;~ < \000 \000
.fill 1, 0x7E3C0000
;8 P \005 J
.fill 1, 0x3850054A
;* 8
.fill 1, 0x2A38

;needed for jumps
;assuming that no more than 42 instr are used
instrtable:
.fill 42, 0

prog_eof:
.ifill pop r7
.ifill ret+

prog_mul:
.ifill pop r7
.ifill ldis r8, 0;0xed400004
.ifill mov r0, r7;0xe1038000
.ifill andx r0, 1;0xe2800008
.ifill adddnz r8, r8, r6;0x00443001
.ifill subinz r7, r7, 1;0x01bb8008
.ifill addizs r7, r7, 0;0x113b8000
;loop:
.ifill adddnz r8, r8, r6;0x00443001
.ifill adddnz r8, r8, r6;0x00443001
.ifill subi r7, r7, 2;0xe1bb8010
.fill 0x0b7ffe83;brnz+ loop
.ifill mov r6, r8

prog_consts:
.ifill push r6
.fill 0xed300004;ldis r6, CONST

prog_add:
.ifill pop r7
.ifill add r6, r7, r6;0xe03bb000

prog_sub:
.ifill pop r7
.ifill sub r7, r7, r6;0xe0bbb000
.ifill mov r6, r7

prog_lessthan:
.ifill pop r7
.ifill cmp r7, r6;0xec3b0000
.ifill movdlt r6, r14
.ifill movge r6, r15

prog_dup:
.ifill push r6

prog_jmp:
.ifill cmpi r6,0;0xecb00000
.ifill pop r6
;static calced
.fill 1, 0x0b000003;brne+ CONST

prog_imm:
.ifill push r6
.fill 1, 0xed300000;ldil r6, CONST
.fill 1, 0xed300002;ldih r6, CONST

prog_pop:
.ifill pop r6

prog_xch:
.ifill pop r7
.ifill push r6
.ifill mov r6, r7

prog_not:
.ifill not r6;0xe4b7fffa

.text
.org 0
start:
	call+ main
	call+ main
	ret


main:

	call+ u_init
	call+ u_recv_byte

	; benchprolog
	call t_init
	call t_stop
	ldis r1, 0
	call t_valset
	call t_start
	; /benchprolog

	;set address of input
	ldis r1, inputdata@lo
	ldih r1, inputdata@hi

	;set address of program start
	ldis r2, (prog_start/4)@lo
	ldih r2, (prog_start/4)@hi

	;set address to instruction table
	ldis r3, instrtable@lo
	ldih r3, instrtable@hi

	;set address to defer table
	ldis r9, defertable@lo
	ldih r9, defertable@hi

	ldis r13, PBASE@lo
	ldih r13, PBASE@hi

	;set programmer address
	stw r2, PADDR(r13)

	;call jit compiler
	call+ jit

	;set address to stack
	;ldil r3, stack@lo
	;ldih r3, stack@hi

	;make r15 a 0-register
	ldis r15, 0
	;make r14 a 8-bit -1-register
	ldis r14, 0xFF

	;call jit'ed prog
	call+ prog_start

	; benchepilog
	push r6
	call+ t_init
	call+ t_stop
	call+ t_valget
	subi r0, r0, 0xc ; offset abziehen
	pop r3
	push r0
	push r3
	; /benchepilog

	;send result
	call+ u_init
	pop r1
	call u_send_byte
	call u_send_newline
	pop r1
	call u_send_uint
	call u_send_newline

	br+ main

;first version only supports backward jumps
jit:
	;r1 ... address to input, every byte is a new input
	;       includes pc implicitly
	;r2 ... address to program start
	;r3 ... address of instruction table
	;r4 ... gets loaded with instr. prog. addr.
	;r5 ... input
	;r9 ... address to actual entry in defer table
	;r10... address to defer table
	;r13 .. programmer address

	;load address of program
	ldil r14, prog_mul@lo
	ldih r14, prog_mul@hi

	ldil r15, prog_consts@lo
	ldih r15, prog_consts@hi

	;backup defer table address
	mov r10, r9
	br+ vm_loop_1

vm_default:	
vm_loop:
	;increment input address
	addi r1, r1, 1
vm_loop_1:
	;store address of next instruction in table
	stw r2, 0(r3)
	;increment instr. table
	addi r3, r3, 4

	;load input
	ldb r5, 0(r1)
	;we need to multiply input by 4 to get correct address offset
	lls r0, r5, 2
	;calc position in jumptable
	ldw r0, jumptable(r0)
	;jump to instr
	brr r0

vm_eof:
	;load address of program
	ldil r4, prog_eof@lo
	ldih r4, prog_eof@hi
	;program instruction (2)
	ldw r0, 0(r4)
	stw r0, PDATA(r13)
	ldw r0, 4(r4)
	stw r0, PDATA(r13)

	;end of program
	;now it is time to clear up the defer table

	ldil r7, prog_jmp@lo
	ldih r7, prog_jmp@hi
	;load branch template
	ldw r7, 8(r7)

	;if actual and base are equal, no entry
	cmp r9, r10
	;return
	reteq-

vm_defer:
	;load pointer to where to jump to
	ldw r6, 0(r10)
	;load where to jump to
	ldw r6, 0(r6)
	;load where to save from defer table
	stw r8, 4(r10)

	;generate branch
	sub r11, r6, r8
	;lrs r11, r11, 2
	;set the upper 16 bit 0
	andx r11, 0xFFFF
	;shift to the position of imm in br
	lls r11, r11, 7
	or r6, r7, r11
	
	stw r8, PADDR(r13)	
	stw r6, PDATA(r13)

	addi r10, r10, 8
	cmp r10, r9
	reteq+
	br+ vm_defer

;case *
;42
vm_mul:
	;program instruction (14)
	ldw r0, 0(r14)
	PROGINSTR
	ldw r0, 4(r14)
	PROGINSTR
	ldw r0, 8(r14)
	PROGINSTR
	ldw r0, 12(r14)
	PROGINSTR
	ldw r0, 16(r14)
	PROGINSTR
	ldw r0, 20(r14)
	PROGINSTR
	ldw r0, 24(r14)
	PROGINSTR
	ldw r0, 28(r14)
	PROGINSTR
	ldw r0, 32(r14)
	PROGINSTR
	ldw r0, 36(r14)
	PROGINSTR
	ldw r0, 40(r14)
	PROGINSTR
	ldw r0, 44(r14)
	PROGINSTR

	;increment address
	addi r2, r2, 12

	br+ vm_loop

;case +
;43
vm_add:
	;load address of program
	ldil r4, prog_add@lo
	ldih r4, prog_add@hi

	;program instruction (5)
	ldw r0, 0(r4)
	PROGINSTR
	ldw r0, 4(r4)
	PROGINSTR

	;increment address
	addi r2, r2, 2

	br+ vm_loop

;case -
;45
vm_sub:
	;load address of program
	ldil r4, prog_sub@lo
	ldih r4, prog_sub@hi

	;program instruction (5)
	ldw r0, 0(r4)
	PROGINSTR
	ldw r0, 4(r4)
	PROGINSTR
	ldw r0, 8(r4)
	PROGINSTR

	;increment address
	addi r2, r2, 3

	br+ vm_loop

;case 0 1 2 3 4 5 6 7 8 9
;48-57
vm_consts:
	;program instruction (3)
	ldw r0, 0(r15)
	PROGINSTR
	ldw r0, 4(r15)
	;the first instr. loads r6 with the number
	;thus we shall emulate this

	;call number
	subi r6, r5, 48
	;shift 3 bits left, as the immediate in ldi has
	;an offset of 3
	lls r6, r6, 3
	;now 'add' this to the ldi
	or r0, r0, r6

	;store this 'dynamic' instruction
	PROGINSTR

	;increment address
	addi r2, r2, 2

	br+ vm_loop

;case <
;60
vm_lessthan:
	;load address of program
	ldil r4, prog_lessthan@lo
	ldih r4, prog_lessthan@hi

	;program instruction (6)
	ldw r0, 0(r4)
	PROGINSTR
	ldw r0, 4(r4)
	PROGINSTR
	ldw r0, 8(r4)
	PROGINSTR
	ldw r0, 12(r4)
	PROGINSTR

	;increment address
	addi r2, r2, 4

	br+ vm_loop

;case D
;68
vm_dup:
	ldil r4, prog_dup@lo
	ldih r4, prog_dup@hi

	;program instruction (3)
	ldw r0, 0(r4)
	PROGINSTR

	;increment address
	addi r2, r2, 1

	br+ vm_loop

;case I
;73
vm_imm:
	;the following instructions calculate the immediate
	;load new high byte
	ldb r6, 4(r1)
	;shift high byte
	lls r6, r6, 8
	;load 2nd byte
	ldb r7, 3(r1)
	;add to high byte
	add r6, r6, r7
	;shift
	lls r6, r6, 8
	;load
	ldb r7, 2(r1)
	;add
	add r6, r6, r7
	;shift
	lls r6, r6, 8
	;load
	ldb r7, 1(r1)
	;add
	add r6, r6, r7

	;now we will generate ldih/l which will store this
	;immediate into a register

	;load address of program
	ldil r4, prog_imm@lo
	ldih r4, prog_imm@hi

	ldw r0, 0(r4)
	PROGINSTR

	;save r6 to r7
	mov r7, r6

	;generate 1st instr
	ldw r0, 4(r4)
	andx r6, 0xFFFF
	lls r6, r6, 3
	or r0, r0, r6
	PROGINSTR

	;generate 2nd instr
	ldw r0, 8(r4)
	andxh r7, 0xFFFF
	lrs r7, r7, 13
	or r0, r0, r7
	PROGINSTR

	;increment address
	addi r2, r2, 3

	;pc+4
	addi r1, r1, 4
	br+ vm_loop

;case J
;74
vm_jmp:
	;gfreit mi net ...
	;gespeicherte instrs sollten input indepentent sein
	;jumptable verwenden
	;fuer forward jumps muss deferrer table gemacht werden *puke*

	;load address of program
	ldil r4, prog_jmp@lo
	ldih r4, prog_jmp@hi

	;compare to 0
	;cmpi r6,0
	ldw r0, 0(r4)
	PROGINSTR

	;program instruction (2)
	;pop r6
	ldw r0, 4(r4)
	PROGINSTR

	;we add the offset to this instruction
	addi r8, r2, 2


	;we know calculate the jump destination
	;set r6 to 0 (to clear upper bytes)
	ldis r6, 0
	;load pc+1 input
	ldb r6, 1(r1)
	;compare input with neg. max of 8 bit
	cmpi r6, 0x80
	brlt- vm_possign


	;generate negativ offset
	ldis r7, 0xFF00
	;r6 is now the 'real' negativ number
	or r6, r6, r7
	;todo: testing showed (at least once) we are off by 2 instr.
	;addi r6, r6, 2
	;multiply by to get the offset
	lls r6, r6, 2
	;generate address in table
	add r6, r3, r6
	;r0 now has the target address
	;todo: 0-4?
	ldw r0, 0(r6)
	;we calc the offset
	sub r8, r0, r8
	;we shift 2 bits out, because rel. br takes instr.
	;count and not address amount ...
	;lrs r8, r8, 2
	;set the upper 16 bit 0
	andx r8, 0xFFFF
	;shift to the position of imm in br
	lls r8, r8, 7
	;load template br
	ldw r0, 8(r4)
	or r0, r0, r8
	PROGINSTR

	;increment address
	addi r2, r2, 3

	br+ vm_loop


vm_possign:
	;we know save the address in the instrtable where the addr to jump to stands
	;the value doesn't exists at the moment, but it will at evaluation

	;save position to save the instr into defer table
	stw r8, 4(r9)

	;we need one instruction to have the correct offset (?)
	PROGINSTR

	;todo: check if -1 is needed
	;subi r6, r6, 1
	;multiply with 2 to get offset right
	lls r6, r6, 2
	;add to current base
	add r6, r3, r6
	;save the address to defer table
	stw r6, 0(r9)
	;increment defer table address
	addi r9, r9, 8
	;increment address
	addi r2, r2, 3
	br+ vm_loop

;case P
;80
vm_pop:
	;load address of program
	ldil r4, prog_pop@lo
	ldih r4, prog_pop@hi

	;program instruction (1)
	ldw r0, 0(r4)
	PROGINSTR

	;increment address
	addi r2, r2, 1

	br+ vm_loop

;case X
;88
vm_xch:
	;load address of program
	ldil r4, prog_xch@lo
	ldih r4, prog_xch@hi

	;program instruction (4)
	ldw r0, 0(r4)
	PROGINSTR
	ldw r0, 4(r4)
	PROGINSTR
	ldw r0, 8(r4)
	PROGINSTR

	;increment address
	addi r2, r2, 3

	br+ vm_loop

;case ~
;126
vm_not:
	;load address of program
	ldil r4, prog_not@lo
	ldih r4, prog_not@hi

	;program instruction (3)
	ldw r0, 0(r4)
	PROGINSTR

	;increment address
	addi r2, r2, 1

	br+ vm_loop


.data
jumptable:
;0
.fill 1, vm_eof/4
.fill 41, vm_default/4
;42
.fill 1, vm_mul/4
;43
.fill 1, vm_add/4
;44
.fill 1, vm_default/4
;45
.fill 1, vm_sub/4
;46-47
.fill 2, vm_default/4
;48-57
.fill 10, vm_consts/4
;58-59
.fill 2, vm_default/4
;60
.fill 1, vm_lessthan/4
;61-67
.fill 7, vm_default/4
;68
.fill 1, vm_dup/4
;69-72
.fill 4, vm_default/4
;73
.fill 1, vm_imm/4
;74
.fill 1, vm_jmp/4
;75-79
.fill 5, vm_default/4
;80
.fill 1, vm_pop/4
;81-87
.fill 7, vm_default/4
;88
.fill 1, vm_xch/4
;89-125
.fill 37, vm_default/4
;126
.fill 1, vm_not/4
;127-255
.fill 129, vm_default/4

;we assume not more than 3 entries
defertable:
.fill 6, 0