Merged revisions 7501-7598 via svnmerge from
[cacao.git] / src / vm / jit / powerpc64 / asmpart.S
index 2daba784890b4164c4b66c87338ffc720e9d00aa..49961cd1b47a7a273025fd9ba331d2bda83677b2 100644 (file)
    Changes: Christian Thalinger
             Edwin Steiner
 
-   $Id: asmpart.S 5776 2006-10-13 17:06:39Z tbfg $
+   $Id: asmpart.S 7596 2007-03-28 21:05:53Z twisti $
 
 */
 
 
 #include "config.h"
 
+#define __ASSEMBLY__
+
 #include "md-abi.h"
 #include "md-asm.h"
 
 
 /* export functions ***********************************************************/
 
-#ifdef ENABLE_LIBJVM
-       .globl asm_vm_call_method
-       .globl asm_vm_call_method_int
-       .globl asm_vm_call_method_long
-       .globl asm_vm_call_method_float
-       .globl asm_vm_call_method_double
-#else
-       .globl .asm_vm_call_method
-       .globl .asm_vm_call_method_int
-       .globl .asm_vm_call_method_long
-       .globl .asm_vm_call_method_float
-       .globl .asm_vm_call_method_double
-#endif
        .globl asm_vm_call_method_exception_handler
+       .globl asm_vm_call_method_end
 
        .globl asm_call_jit_compiler
 
 
        .globl asm_patcher_wrapper
 
+#if defined(ENABLE_REPLACEMENT)
        .globl asm_replacement_out
        .globl .asm_replacement_in
+#endif
 
-       .globl .asm_cacheflush          /* no function descriptor needed, only called direct */
+       .globl asm_cacheflush
        .globl asm_criticalsections
-       .globl .asm_getclassvalues_atomic
+       .globl asm_getclassvalues_atomic
 
 
 /* asm_vm_call_method **********************************************************
 *         void *arg1, void *arg2, void *arg3, void *arg4);                     *
 *                                                                              *
 *******************************************************************************/
-
-       .align 2
-
-       .long   0                         /* catch type all                       */
-       .long   0                         /* exception handler pc                 */
-       .long   0                         /* end pc                               */
-       .long   0                         /* start pc                             */
-       .long   1                         /* extable size                         */
-       .long   0                         /* line number table start              */
-       .long   0                         /* line number table size               */
-       .long   0                         /* fltsave                              */
-       .long   0                         /* intsave                              */
-       .long   0                         /* isleaf                               */
-       .long   0                         /* IsSync                               */
-       .long   0                         /* frame size                           */
-       .long   0                         /* codeinfo pointer                     */
+       /* this is the method header see src/vm/jit/methodheader.h */
+
+       .align  8
+
+       .quad   0                           /* catch type all                     */
+       .quad   0                           /* handler pc                         */
+       .quad   0                           /* end pc                             */
+       .quad   0                           /* start pc                           */
+       .long   1                           /* extable size                       */
+       .long   0                           /* ALIGNMENT PADDING                  */
+       .quad   0                           /* line number table  start           */
+       .quad   0                           /* line number table  size            */
+       .long   0                           /* ALIGNMENT PADDING                  */
+       .long   0                           /* fltsave                            */
+       .long   0                           /* intsave                            */
+       .long   0                           /* isleaf                             */
+       .long   0                           /* IsSync                             */
+       .long   0                           /* frame size                         */
+       .quad   0                           /* codeinfo pointer                   */
 
 #ifdef ENABLE_LIBJVM
+       
+       .globl asm_vm_call_method
+       .globl asm_vm_call_method_int
+       .globl asm_vm_call_method_long
+       .globl asm_vm_call_method_float
+       .globl asm_vm_call_method_double
        .section ".opd","aw"
        .align 3
 
 #else
        asm_vm_call_method:
        .globl asm_vm_call_method
+       asm_vm_call_method_int:
+       .globl asm_vm_call_method_int
+       asm_vm_call_method_long:
+       .globl asm_vm_call_method_long
+       asm_vm_call_method_float:
+       .globl asm_vm_call_method_float
+       asm_vm_call_method_double:
+       .globl asm_vm_call_method_double
 #endif
 
 .asm_vm_call_method:
        li      t0,0                      /* initialize integer argument counter  */
        li      t1,0                      /* initialize float argument counter    */
        li      t6,0                      /* initialize integer register counter  */
-#if defined(__DARWIN__)
-       li      t7,0                      /* initialize stack slot counter        */
-#endif
-
-       mflr    r0                        /* save link register (PIC code)        */
-       bl      L_asm_vm_call_method_get_pc
-L_asm_vm_call_method_get_pc:
-       mflr    t3                        /* t3 contains the current pc           */
-       mtlr    r0
+       li      t3,8                      /* initialize PA counter*/
 
 L_register_copy:
        addi    itmp1,itmp1,sizevmarg     /* goto next argument block             */
@@ -201,6 +202,7 @@ L_register_copy:
        mr.     itmp2,itmp2
        beq     L_register_copy_done
 
+       addi    t3,t3,-1                  /* uses a PA slot                       */
        lwz     itmp3,offvmargtype+4(itmp1)
        andi.   r0,itmp3,0x0002           /* is this a float/double type?         */
        bne     L_register_handle_float
@@ -242,9 +244,6 @@ L_register_do_copy_longint:
        mtctr   itmp3
        addi    t0,t0,1                   /* integer argument counter             */
        addi    t6,t6,1                   /* integer argument register counter    */
-#if defined(__DARWIN__)
-       addi    t7,t7,1                   /* stack slot counter                   */
-#endif
        bctr
 
 L_register_handle_float:
@@ -285,14 +284,15 @@ L_register_do_copy_floatdouble:
        ld      itmp3,0(itmp3)            /* load function address                */
        mtctr   itmp3
        addi    t1,t1,1                   /* float argument counter               */
-#if defined(__DARWIN__)
-       addi    t7,t7,1                   /* stack slot counter                   */
-       addi    t6,t6,1                   /* skip 1 integer argument register     */
-#endif
+       mr.     t3,t3                     /* are we still in PA ?                 */
+       blt     L_float_not_uses_PA 
+       addi    t6,t6,1                   /* if so it uses an interger arg reg    */
+L_float_not_uses_PA:
        bctr
 
 
 L_register_copy_done:
+       subi    sp,sp,PA_SIZE             /* PA_SIZE are used by definition       */
                                          /* calculate remaining arguments        */
        sub     itmp3,t4,t0               /* - integer arguments in registers     */
        sub     itmp3,itmp3,t1            /* - float arguments in registers       */
@@ -304,29 +304,24 @@ L_register_copy_done:
 
        slwi    t4,itmp3,3                /* XXX use 8-bytes slots for now        */
        addi    t4,t4,LA_SIZE             /* add size of linkage area             */
-
-#if defined(__DARWIN__)
-       slwi    t5,t7,2                   /* add stack space for arguments        */
-       add     t4,t4,t5
-#endif
-
        sub     sp,sp,t4
 
        mr      t6,sp                     /* use t6 as temporary sp               */
        addi    t6,t6,LA_SIZE             /* skip linkage area                    */
-#if defined(__DARWIN__)
-       add     t6,t6,t5                  /* skip stack space for arguments       */
-#endif
 
        addi    itmp1,itmp1,-sizevmarg    /* initialize pointer (smaller code)    */
        addi    itmp2,itmp2,1             /* initialize argument count            */
+       li      t3,8                      /* initialize PA counter                */
+       addi    t6,t6,-8                  /* make code simpler                    */
        
 L_stack_copy_loop:
        addi    itmp1,itmp1,sizevmarg     /* goto next argument block             */
        addi    itmp2,itmp2,-1            /* argument count - 1                   */
        mr.     itmp2,itmp2
        beq     L_stack_copy_done
-       
+       addi    t6,t6,8                   /* increase stack */
+L_stack_not_uses_PA:
+
        lwz     itmp3,offvmargtype+4(itmp1)
        andi.   r0,itmp3,0x0002           /* is this a float/double type?         */
        bne     L_stack_handle_float
@@ -339,22 +334,13 @@ L_stack_handle_int:
        andi.   r0,itmp3,0x0001           /* is this a 2-word type?               */
        bne     L_stack_handle_long
 
-       lwz     itmp3,offvmargdata+4(itmp1) /* get integer argument               */
-       stw     itmp3,0(t6)               /* and store it on the stack            */
-       addi    t6,t6,4                   /* increase temporary sp by 1 slot      */
+       lwa     itmp3,offvmargdata+4(itmp1) /* get integer argument               */
+       std     itmp3,0(t6)               /* and store it on the stack            */
        b       L_stack_copy_loop
 
 L_stack_handle_long:
-#if !defined(__DARWIN__)
-       addi    t6,t6,4                   /* align stack to 8-bytes               */
-       rlwinm  t6,t6,0,30,28             /* clear lower 4-bits                   */
-#endif
-
-       lwz     itmp3,offvmargdata+0(itmp1) /* get long argument                  */
-       stw     itmp3,0(t6)               /* and store it on the stack            */
-       lwz     itmp3,offvmargdata+4(itmp1)
-       stw     itmp3,4(t6)
-       addi    t6,t6,8                   /* increase temporary sp by 2 slots     */
+       ld      itmp3,offvmargdata+0(itmp1) /* get long argument                  */
+       std     itmp3,0(t6)               /* and store it on the stack            */
        b       L_stack_copy_loop
                
 L_stack_handle_float:
@@ -366,19 +352,12 @@ L_stack_handle_float:
        bne     L_stack_handle_double
 
        lfs     ftmp3,offvmargdata(itmp1) /* get float argument                   */
-       stfs    ftmp3,0(t6)               /* and store it on the stack            */
-       addi    t6,t6,4                   /* increase temporary sp by 1 slot      */
+       stfd    ftmp3,0(t6)               /* and store it on the stack            */
        b       L_stack_copy_loop
 
 L_stack_handle_double:
-#if !defined(__DARWIN__)
-       addi    t6,t6,4                   /* align stack to 8-bytes               */
-       rlwinm  t6,t6,0,30,28             /* clear lower 4-bits                   */
-#endif
-
        lfd     ftmp3,offvmargdata(itmp1) /* get double argument                  */
        stfd    ftmp3,0(t6)               /* and store it on the stack            */
-       addi    t6,t6,8                   /* increase temporary sp by 2 slots     */
        b       L_stack_copy_loop
 
 L_stack_copy_done:
@@ -419,8 +398,8 @@ L_asm_vm_call_method_return:
        ld      pv,11*8(sp)               /* save PV register                     */
 
        ld      itmp3,12*8(sp)
-       lfd     ftmp1,14*8(sp)            /* registers f14-f31 are callee saved   */
-       lfd     ftmp2,16*8(sp)
+       lfd     ftmp1,13*8(sp)            /* registers f14-f31 are callee saved   */
+       lfd     ftmp2,14*8(sp)
 
 #if defined(__DARWIN__)
        lwz     t1,18*4(r1)
@@ -438,7 +417,7 @@ L_asm_vm_call_method_return:
        lfd     ft4,34*4(r1)
        lfd     ft5,36*4(r1)
 #else
-       RESTORE_TEMPORARY_REGISTERS(18)   /* the offset has to be even            */
+       RESTORE_TEMPORARY_REGISTERS(15)   /* the offset has to be even            */
 #endif
 
        ld     r0,40*8+LA_LR_OFFSET(r1)
@@ -469,28 +448,28 @@ L_jumptable_int:
        .align  4
 
 L_handle_a0:
-       lwz     a0,offvmargdata+4(itmp1)
+       lwa     a0,offvmargdata+4(itmp1)
        b       L_register_copy
 L_handle_a1:
-       lwz     a1,offvmargdata+4(itmp1)
+       lwa     a1,offvmargdata+4(itmp1)
        b       L_register_copy
 L_handle_a2:
-       lwz     a2,offvmargdata+4(itmp1)
+       lwa     a2,offvmargdata+4(itmp1)
        b       L_register_copy
 L_handle_a3:
-       lwz     a3,offvmargdata+4(itmp1)
+       lwa     a3,offvmargdata+4(itmp1)
        b       L_register_copy
 L_handle_a4:
-       lwz     a4,offvmargdata+4(itmp1)
+       lwa     a4,offvmargdata+4(itmp1)
        b       L_register_copy
 L_handle_a5:
-       lwz     a5,offvmargdata+4(itmp1)
+       lwa     a5,offvmargdata+4(itmp1)
        b       L_register_copy
 L_handle_a6:
-       lwz     a6,offvmargdata+4(itmp1)
+       lwa     a6,offvmargdata+4(itmp1)
        b       L_register_copy
 L_handle_a7:
-       lwz     a7,offvmargdata+4(itmp1)
+       lwa     a7,offvmargdata+4(itmp1)
        b       L_register_copy
 
 
@@ -547,14 +526,11 @@ L_jumptable_float:
        .quad   L_handle_fa5
        .quad   L_handle_fa6
        .quad   L_handle_fa7
-
-#if defined(__DARWIN__)
        .quad   L_handle_fa8
        .quad   L_handle_fa9
        .quad   L_handle_fa10
        .quad   L_handle_fa11
        .quad   L_handle_fa12
-#endif
 
        .text
        .align  4
@@ -583,8 +559,6 @@ L_handle_fa6:
 L_handle_fa7:
        lfs     fa7,offvmargdata(itmp1)
        b       L_register_copy
-
-#if defined(__DARWIN__)
 L_handle_fa8:
        lfs     fa8,offvmargdata(itmp1)
        b       L_register_copy
@@ -600,8 +574,6 @@ L_handle_fa11:
 L_handle_fa12:
        lfs     fa12,offvmargdata(itmp1)
        b       L_register_copy
-#endif
-
 
        .data
        .align  8
@@ -615,14 +587,11 @@ L_jumptable_double:
        .quad   L_handle_fda5
        .quad   L_handle_fda6
        .quad   L_handle_fda7
-
-#if defined(__DARWIN__)
        .quad   L_handle_fda8
        .quad   L_handle_fda9
        .quad   L_handle_fda10
        .quad   L_handle_fda11
        .quad   L_handle_fda12
-#endif
 
        .text
        .align  4
@@ -651,8 +620,6 @@ L_handle_fda6:
 L_handle_fda7:
        lfd     fa7,offvmargdata(itmp1)
        b       L_register_copy
-
-#if defined(__DARWIN__)
 L_handle_fda8:
        lfd     fa8,offvmargdata(itmp1)
        b       L_register_copy
@@ -668,8 +635,9 @@ L_handle_fda11:
 L_handle_fda12:
        lfd     fa12,offvmargdata(itmp1)
        b       L_register_copy
-#endif
 
+asm_vm_call_method_end:
+       nop
 
 /* asm_call_jit_compiler *******************************************************
 
@@ -758,10 +726,10 @@ L_asm_call_jit_compiler:                /* required for PIC code              */
 
 L_asm_call_jit_compiler_exception:
        mflr    r0
-       stw     r0,LA_LR_OFFSET(sp)
-       stwu    sp,-LA_SIZE_ALIGNED(sp)     /* preserve linkage area              */
+       std     r0,LA_LR_OFFSET(sp)
+       stdu    sp,-LA_SIZE_ALIGNED(sp)     /* preserve linkage area              */
        bl      exceptions_get_and_clear_exception
-       lwz     xpc,LA_SIZE_ALIGNED+LA_LR_OFFSET(sp)
+       l     xpc,LA_SIZE_ALIGNED+LA_LR_OFFSET(sp)
        mtlr    xpc     
        addi    sp,sp,LA_SIZE_ALIGNED
 
@@ -895,7 +863,7 @@ ex_int1:
 #else
        addi    t3,t3,(ex_int2-ex_int1)@l
 #endif
-       slwi    t2,t2,3                     /* t2 = register count * 8            */
+       slwi    t2,t2,2                     /* t2 = register count * 4            */
        subf    t3,t2,t3                    /* t3 = IntSave - t2                  */
        mtctr   t3
        bctr
@@ -911,7 +879,7 @@ ex_int1:
        ld      s8,-1*8(t1)
 
 ex_int2:
-       subf    t1,t2,t1                    /* t1 = t1 - register count * 8       */
+       subf    t1,t2,t1                    /* t1 = t1 - register count * 4       */
        lwz     t2,FltSave(pv)
        bl      ex_flt1
 ex_flt1:
@@ -921,7 +889,7 @@ ex_flt1:
 #else
        addi    t3,t3,(ex_flt2-ex_flt1)@l
 #endif
-       slwi    t2,t2,3                     /* t2 = register count * 8            */
+       slwi    t2,t2,2                     /* t2 = register count * 4            */
        subf    t3,t2,t3                    /* t3 = FltSave - t2                  */
        mtctr   t3
        bctr
@@ -1111,6 +1079,7 @@ L_asm_patcher_wrapper_exception:
        addi    sp,sp,LA_SIZE+PA_SIZE+ARG_CNT*8+TMP_CNT*8+4*8+8*8
        b       L_asm_handle_exception
 
+#if defined(ENABLE_REPLACEMENT)
 
 /* asm_replacement_out *********************************************************
 
@@ -1325,26 +1294,37 @@ asm_replacement_out:
        mtctr   itmp3
        bctr
 
-/*********************************************************************/
-/*
+#endif /* defined(ENABLE_REPLACEMENT) */
+
+/* asm_cacheflush **************************************************************
+       copied from linux/arch/ppc64/kernel/vdso64/cacheflush.S
+       assumes 128 byte cache line size.
+       All registers used may be trashed for fun and profit.
+*******************************************************************************/
+
+       .section ".opd","aw"
+       .align 3
 asm_cacheflush:
-       .quad .asm_cacheflush,.TOC.@tocbase,0
-       .previous
-       .size asm_cacheflush,24
-       .type .asm_cacheflush,@function
-       .globl .asm_cacheflush
-*/
+               .quad   .asm_cacheflush,.TOC.@tocbase,0
+               .previous
+               .size asm_cacheflush, 24
+               .type .asm_cacheflush,@function
+               .globl .asm_cacheflush 
 .asm_cacheflush:
+       /* construct the AND mask */
+       li      r6,   0xffffffffffff8000
+       ori     r6,r6,0x000000000000ff80
+
        add     r4,r3,r4
-       rldimi  r3,r3,0,26
-       addi    r4,r4,31
-       rldimi  r4,r4,0,26
+       and.    r3,r3,r6
+       addi    r4,r4,127
+       and.    r4,r4,r6
        mr      r5,r3
 1:
        cmpld   r3,r4
        bge     0f
        dcbst   0,r3
-       addi    r3,r3,32
+       addi    r3,r3,128
        b       1b
 0:
        sync
@@ -1352,15 +1332,26 @@ asm_cacheflush:
        cmpld   r5,r4
        bge     0f
        icbi    0,r5
-       addi    r5,r5,32
+       addi    r5,r5,128
        b       1b
 0:
        sync
        isync
        blr
 
-
+/*
+               asm_getclassvalues_atomic 
+*/
+       .section ".opd","aw"
+       .align 3
+asm_getclassvalues_atomic:
+               .quad   .asm_getclassvalues_atomic,.TOC.@tocbase,0
+               .previous
+               .size asm_getclassvalues_atomic, 24
+               .type .asm_getclassvalues_atomic,@function
+               .globl .asm_getclassvalues_atomic
 .asm_getclassvalues_atomic:
+
 _crit_restart:
 _crit_begin:
        lwz     r6,offbaseval(r3)