asm_vm_call_method_float:
asm_vm_call_method_double:
- save %sp, -144, %sp /* 16 reg-save + 2 */
+ save %sp,-144,%sp /* 16 reg-save + 2 */
- /* todo: copy fp registers */
+ /* %i2 not needed after _nocopy -> calculate in branch delay */
- brlez %i1, calljava_argsloaded
- dec %i1
- ldx [%i2 + offvmargdata], %o0
- brlez %i1, calljava_argsloaded
+ brlez %i1, calljava_nocopy
+
+ dec %i1 /* branch delay */
+ ldx [%i2 + offvmargdata],%o0
+ ldd [%i2 + offvmargdata],fa0
+ brlez %i1,calljava_nocopy
- dec %i1
- ldx [%i2 + (offvmargdata+sizevmarg*1)], %o1
- brlez %i1, calljava_argsloaded
+ dec %i1 /* branch delay */
+ ldx [%i2 + (offvmargdata+sizevmarg*1)],%o1
+ ldd [%i2 + (offvmargdata+sizevmarg*1)],fa1
+ brlez %i1,calljava_nocopy
dec %i1
- ldx [%i2 + (offvmargdata+sizevmarg*2)], %o2
- brlez %i1, calljava_argsloaded
+ ldx [%i2 + (offvmargdata+sizevmarg*2)],%o2
+ ldd [%i2 + (offvmargdata+sizevmarg*2)],fa2
+ brlez %i1,calljava_nocopy
dec %i1
- ldx [%i2 + (offvmargdata+sizevmarg*3)], %o3
- brlez %i1, calljava_argsloaded
+ ldx [%i2 + (offvmargdata+sizevmarg*3)],%o3
+ ldd [%i2 + (offvmargdata+sizevmarg*3)],fa3
+ brlez %i1,calljava_nocopy
dec %i1
- ldx [%i2 + (offvmargdata+sizevmarg*4)], %o4
+ ldx [%i2 + (offvmargdata+sizevmarg*4)],%o4
+ ldd [%i2 + (offvmargdata+sizevmarg*4)],fa4
- /* todo: use more out registers ? */
-
-calljava_argsloaded:
- /* todo: stack frame layout!! */
-
+calljava_argsloaded:
brlez %i1, calljava_nocopy
- sllx %i1, 3, %l0 /* remaining args * 8 */
- mov %sp, %l1 /* right above window save area */
- sub %sp, %l0, %sp /* allocate more stack space */
+ nop
+ sllx %i1,3,%l0 /* remaining args * 8 */
+ add %sp,16*8+bias,%l1 /* right above window save area */
+ sub %sp,%l0,%sp /* allocate more stack space */
calljava_copyloop:
- ldx [%i2 + (offvmargdata+sizevmarg*5)], %l0
- stx %l0, [%l1]
- inc sizevmarg, %i2 /* src++ */
- inc 8, %l1 /* dst++ */
- dec %i1 /* arg_count-- */
- bnz %xcc, calljava_copyloop
+ ldx [%i2 + (offvmargdata+sizevmarg*5)],%l0
+ stx %l0,[%l1]
+ inc sizevmarg,%i2 /* src++ */
+ subcc %i1,1,%i1 /* arg_count-- */
+ bnz %xcc, calljava_copyloop /* use cc from previous instr */
+ inc 8,%l1 /* dst++ (delay) */
+
calljava_nocopy:
/* set pv, like a java method does */
asm_call_jit_compiler:
- save %sp,-208,%sp /* regsave(16) + argslots(6) + 4 float args */
+ /* stacksave for regsave(16) + argslots(6) + float args */
+ save %sp,-((16+6+FLT_ARG_CNT)*8),%sp
SAVE_FLOAT_ARGUMENT_REGISTERS(22)
mov pv_callee,%g4
/* save bigger stack frame for float args and temps */
- save %sp,(FLT_ARG_CNT+FLT_TMP_CNT+CSTACK_CNT)*8,%sp
+ save %sp,-((FLT_ARG_CNT+FLT_TMP_CNT+CSTACK_CNT)*8),%sp
SAVE_FLOAT_ARGUMENT_REGISTERS(CSTACK_CNT)
SAVE_FLOAT_TEMPORARY_REGISTERS(CSTACK_CNT+FLT_ARG_CNT)
#define fa1 %f18
#define fa2 %f20
#define fa3 %f22
-#define ft7 %f24
-#define ft8 %f26
-#define ft9 %f28
-#define ft10 %f30
+#define fa4 %f24
+#define ft7 %f26
+#define ft8 %f28
+#define ft9 %f30
#define bias 2047
std fa0,[%sp + bias + ((0+(off))*8)] ; \
std fa1,[%sp + bias + ((1+(off))*8)] ; \
std fa2,[%sp + bias + ((2+(off))*8)] ; \
- std fa3,[%sp + bias + ((3+(off))*8)] ;
+ std fa3,[%sp + bias + ((3+(off))*8)] ; \
+ std fa4,[%sp + bias + ((4+(off))*8)] ;
#define RESTORE_FLOAT_ARGUMENT_REGISTERS(off) \
ldd [%sp + bias + ((0+(off))*8)],fa0 ; \
ldd [%sp + bias + ((1+(off))*8)],fa1 ; \
ldd [%sp + bias + ((2+(off))*8)],fa2 ; \
- ldd [%sp + bias + ((3+(off))*8)],fa3 ;
+ ldd [%sp + bias + ((3+(off))*8)],fa3 ; \
+ ldd [%sp + bias + ((4+(off))*8)],fa4 ;
#define SAVE_FLOAT_TEMPORARY_REGISTERS(off) \
std ft0,[%sp + bias + ((0+(off))*8)] ; \
std ft7,[%sp + bias + ((7+(off))*8)] ; \
std ft8,[%sp + bias + ((8+(off))*8)] ; \
std ft9,[%sp + bias + ((9+(off))*8)] ; \
- std ft10,[%sp + bias + ((10+(off))*8)] ;
#define RESTORE_FLOAT_TEMPORARY_REGISTERS(off) \
ldd [%sp + bias + ((0+(off))*8)],ft0 ; \
ldd [%sp + bias + ((7+(off))*8)],ft7 ; \
ldd [%sp + bias + ((8+(off))*8)],ft8 ; \
ldd [%sp + bias + ((9+(off))*8)],ft9 ; \
- ldd [%sp + bias + ((10+(off))*8)],ft10 ;