+; `Deep Thought', a softcore CPU implemented on a FPGA
+;
+; Copyright (C) 2010 Markus Hofstaetter <markus.manrow@gmx.at>
+; Copyright (C) 2010 Martin Perner <e0725782@student.tuwien.ac.at>
+; Copyright (C) 2010 Stefan Rebernig <stefan.rebernig@gmail.com>
+; Copyright (C) 2010 Manfred Schwarz <e0725898@student.tuwien.ac.at>
+; Copyright (C) 2010 Bernhard Urban <lewurm@gmail.com>
+;
+; This program is free software: you can redistribute it and/or modify
+; it under the terms of the GNU General Public License as published by
+; the Free Software Foundation, either version 3 of the License, or
+; (at your option) any later version.
+;
+; This program is distributed in the hope that it will be useful,
+; but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+; GNU General Public License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with this program. If not, see <http://www.gnu.org/licenses/>.
+
#define PROGINSTR stw r0, PDATA(r13)
+#include "dt_inc.s"
.data
.org 0x10
inputdata:
;* 8
.fill 1, 0x2A38
-stack:
-.fill 256, 0
-
;needed for jumps
;assuming that no more than 42 instr are used
instrtable:
.fill 42, 0
prog_eof:
-.ifill pop r0
+.ifill pop r7
.ifill ret+
prog_mul:
-.ifill pop r6
.ifill pop r7
.ifill ldis r8, 0;0xed400004
.ifill mov r0, r7;0xe1038000
.ifill adddnz r8, r8, r6;0x00443001
.ifill subi r7, r7, 2;0xe1bb8010
.fill 0x0b7ffe83;brnz+ loop
-.ifill push r8
+.ifill mov r6, r8
prog_consts:
-.fill 0xed300004;ldis r6, CONST
.ifill push r6
+.fill 0xed300004;ldis r6, CONST
prog_add:
-.ifill pop r6
.ifill pop r7
-.ifill add r7, r7, r6;0xe03bb000
-.ifill push r7
+.ifill add r6, r7, r6;0xe03bb000
prog_sub:
-.ifill pop r6
.ifill pop r7
.ifill sub r7, r7, r6;0xe0bbb000
-.ifill push r7
+.ifill mov r6, r7
prog_lessthan:
-.ifill pop r6
.ifill pop r7
.ifill cmp r7, r6;0xec3b0000
-.ifill pushlt r14
-.ifill pushge r15
+.ifill movdlt r6, r14
+.ifill movge r6, r15
prog_dup:
-.ifill fetch r6
.ifill push r6
prog_jmp:
-.ifill pop r6
.ifill cmpi r6,0;0xecb00000
+.ifill pop r6
;static calced
-.fill 1, 0x1b000103;breq- vm_next
-.fill 1, 0xeb000003;br+ CONST
+.fill 1, 0x0b000003;brne+ CONST
prog_imm:
-.fill 1, 0xed400000;ldil r6, CONST
-.fill 1, 0xed400002;ldih r6, CONST
.ifill push r6
+.fill 1, 0xed300000;ldil r6, CONST
+.fill 1, 0xed300002;ldih r6, CONST
prog_pop:
-.ifill disc r6
+.ifill pop r6
prog_xch:
-.ifill pop r6
.ifill pop r7
.ifill push r6
-.ifill push r7
+.ifill mov r6, r7
prog_not:
-.ifill pop r6
.ifill not r6;0xe4b7fffa
-.ifill push r6
.text
- .define UART_BASE, 0x2000
- .define UART_STATUS, 0x0
- .define UART_RECV, 0xc
- .define UART_TRANS, 0x8
-
- .define UART_TRANS_EMPTY, 0x1
- .define UART_RECV_NEW, 0x2
-
- .define PBASE, 0x2030
- .define PADDR, 0x4
- .define PDATA, 0x8
+.org 0
+start:
+ call+ main
+ call+ main
+ ret
main:
- ldi r10, UART_BASE@lo
- ldih r10, UART_BASE@hi
-;recv byte
-u_recv_byte:
- ldw r3, UART_STATUS(r10)
- andx r3, UART_RECV_NEW
- brzs+ u_recv_byte; branch if zero
- xor r0, r0, r0
- ldw r0, UART_RECV(r10)
-;recv byte
+ call+ u_init
+ call+ u_recv_byte
+
+ ; benchprolog
+ call t_init
+ call t_stop
+ ldis r1, 0
+ call t_valset
+ call t_start
+ ; /benchprolog
;set address of input
- ldil r1, inputdata@lo
+ ldis r1, inputdata@lo
ldih r1, inputdata@hi
;set address of program start
- ldil r2, prog_start@lo
- ldih r2, prog_start@hi
+ ldis r2, (prog_start/4)@lo
+ ldih r2, (prog_start/4)@hi
;set address to instruction table
- ldil r3, instrtable@lo
+ ldis r3, instrtable@lo
ldih r3, instrtable@hi
;set address to defer table
- ldil r9, defertable@lo
+ ldis r9, defertable@lo
ldih r9, defertable@hi
- ldil r13, PBASE@lo
+ ldis r13, PBASE@lo
ldih r13, PBASE@hi
;set programmer address
stw r2, PADDR(r13)
-
;call jit compiler
call+ jit
;call jit'ed prog
call+ prog_start
-;send result
-u_send_byte:
- ldi r10, UART_BASE@lo
- ldih r10, UART_BASE@hi
- ldw r9, UART_STATUS(r10)
- andx r9, UART_TRANS_EMPTY
- brnz+ u_send_byte ; branch if not zero
- stb r0, UART_TRANS(r10)
-;send result
+ ; benchepilog
+ push r6
+ call+ t_init
+ call+ t_stop
+ call+ t_valget
+ subi r0, r0, 0xc ; offset abziehen
+ pop r3
+ push r0
+ push r3
+ ; /benchepilog
+
+ ;send result
+ call+ u_init
+ pop r1
+ call u_send_byte
+ call u_send_newline
+ pop r1
+ call u_send_uint
+ call u_send_newline
br+ main
;backup defer table address
mov r10, r9
- ;decrement address to input by 1
- subi r1, r1, 1
+ br+ vm_loop_1
vm_default:
vm_loop:
;increment input address
addi r1, r1, 1
-
+vm_loop_1:
;store address of next instruction in table
stw r2, 0(r3)
;increment instr. table
ldil r7, prog_jmp@lo
ldih r7, prog_jmp@hi
;load branch template
- ldw r7, 12(r7)
+ ldw r7, 8(r7)
;if actual and base are equal, no entry
cmp r9, r10
;generate branch
sub r11, r6, r8
- lrs r11, r11, 2
+ ;lrs r11, r11, 2
;set the upper 16 bit 0
andx r11, 0xFFFF
;shift to the position of imm in br
addi r10, r10, 8
cmp r10, r9
reteq+
- brnq- vm_defer
+ br+ vm_defer
;case *
;42
PROGINSTR
ldw r0, 44(r14)
PROGINSTR
- ldw r0, 48(r14)
- PROGINSTR
;increment address
- addi r2, r2, 52
+ addi r2, r2, 12
br+ vm_loop
PROGINSTR
ldw r0, 4(r4)
PROGINSTR
- ldw r0, 8(r4)
- PROGINSTR
- ldw r0, 12(r4)
- PROGINSTR
;increment address
- addi r2, r2, 16
+ addi r2, r2, 2
br+ vm_loop
PROGINSTR
ldw r0, 8(r4)
PROGINSTR
- ldw r0, 12(r4)
- PROGINSTR
;increment address
- addi r2, r2, 16
+ addi r2, r2, 3
br+ vm_loop
vm_consts:
;program instruction (3)
ldw r0, 0(r15)
+ PROGINSTR
+ ldw r0, 4(r15)
;the first instr. loads r6 with the number
;thus we shall emulate this
;store this 'dynamic' instruction
PROGINSTR
- ldw r0, 4(r15)
- PROGINSTR
;increment address
- addi r2, r2, 8
+ addi r2, r2, 2
br+ vm_loop
PROGINSTR
ldw r0, 12(r4)
PROGINSTR
- ldw r0, 16(r4)
- PROGINSTR
;increment address
- addi r2, r2, 20
+ addi r2, r2, 4
br+ vm_loop
;program instruction (3)
ldw r0, 0(r4)
PROGINSTR
- ldw r0, 4(r4)
- PROGINSTR
;increment address
- addi r2, r2, 8
+ addi r2, r2, 1
br+ vm_loop
ldil r4, prog_imm@lo
ldih r4, prog_imm@hi
+ ldw r0, 0(r4)
+ PROGINSTR
+
;save r6 to r7
mov r7, r6
;generate 1st instr
- ldw r0, 0(r4)
+ ldw r0, 4(r4)
andx r6, 0xFFFF
lls r6, r6, 3
or r0, r0, r6
PROGINSTR
;generate 2nd instr
- ldw r0, 4(r4)
+ ldw r0, 8(r4)
andxh r7, 0xFFFF
lrs r7, r7, 13
or r0, r0, r7
PROGINSTR
- ;now we program the instructions that will save the
- ;immediate onto the stack and increment the later
-
- ldw r0, 8(r4)
- PROGINSTR
-
;increment address
- addi r2, r2, 12
+ addi r2, r2, 3
;pc+4
addi r1, r1, 4
ldil r4, prog_jmp@lo
ldih r4, prog_jmp@hi
- ;program instruction (2)
- ;pop r6
- ldw r0, 0(r4)
- PROGINSTR
-
;compare to 0
;cmpi r6,0
- ldw r0, 4(r4)
+ ldw r0, 0(r4)
PROGINSTR
- ;breq+ vm_next
- ;is statically known
- ldw r0, 8(r4)
+ ;program instruction (2)
+ ;pop r6
+ ldw r0, 4(r4)
PROGINSTR
;we add the offset to this instruction
- addi r8, r2, 12
+ addi r8, r2, 2
;we know calculate the jump destination
sub r8, r0, r8
;we shift 2 bits out, because rel. br takes instr.
;count and not address amount ...
- lrs r8, r8, 2
+ ;lrs r8, r8, 2
;set the upper 16 bit 0
andx r8, 0xFFFF
;shift to the position of imm in br
lls r8, r8, 7
;load template br
- ldw r0, 12(r4)
+ ldw r0, 8(r4)
or r0, r0, r8
PROGINSTR
;increment address
- addi r2, r2, 16
+ addi r2, r2, 3
br+ vm_loop
;increment defer table address
addi r9, r9, 8
;increment address
- addi r2, r2, 16
+ addi r2, r2, 3
br+ vm_loop
;case P
PROGINSTR
;increment address
- addi r2, r2, 4
+ addi r2, r2, 1
br+ vm_loop
PROGINSTR
ldw r0, 8(r4)
PROGINSTR
- ldw r0, 12(r4)
- PROGINSTR
;increment address
- addi r2, r2, 16
+ addi r2, r2, 3
br+ vm_loop
;program instruction (3)
ldw r0, 0(r4)
PROGINSTR
- ldw r0, 4(r4)
- PROGINSTR
- ldw r0, 8(r4)
- PROGINSTR
;increment address
- addi r2, r2, 12
+ addi r2, r2, 1
br+ vm_loop
-prog_start:
.data
jumptable:
;0
-.fill 1, vm_eof
-.fill 41, vm_default
+.fill 1, vm_eof/4
+.fill 41, vm_default/4
;42
-.fill 1, vm_mul
+.fill 1, vm_mul/4
;43
-.fill 1, vm_add
+.fill 1, vm_add/4
;44
-.fill 1, vm_default
+.fill 1, vm_default/4
;45
-.fill 1, vm_sub
+.fill 1, vm_sub/4
;46-47
-.fill 2, vm_default
+.fill 2, vm_default/4
;48-57
-.fill 10, vm_consts
+.fill 10, vm_consts/4
;58-59
-.fill 2, vm_default
+.fill 2, vm_default/4
;60
-.fill 1, vm_lessthan
+.fill 1, vm_lessthan/4
;61-67
-.fill 7, vm_default
+.fill 7, vm_default/4
;68
-.fill 1, vm_dup
+.fill 1, vm_dup/4
;69-72
-.fill 4, vm_default
+.fill 4, vm_default/4
;73
-.fill 1, vm_imm
+.fill 1, vm_imm/4
;74
-.fill 1, vm_jmp
+.fill 1, vm_jmp/4
;75-79
-.fill 5, vm_default
+.fill 5, vm_default/4
;80
-.fill 1, vm_pop
+.fill 1, vm_pop/4
;81-87
-.fill 7, vm_default
+.fill 7, vm_default/4
;88
-.fill 1, vm_xch
+.fill 1, vm_xch/4
;89-125
-.fill 37, vm_default
+.fill 37, vm_default/4
;126
-.fill 1, vm_not
+.fill 1, vm_not/4
;127-255
-.fill 129, vm_default
+.fill 129, vm_default/4
;we assume not more than 3 entries
defertable: