* src/vm/jit/powerpc64/linux/md-abi.h: Added PA_SIZE and
[cacao.git] / src / vm / jit / powerpc64 / asmpart.S
index 03d939c5107ddac00d97b9b29c89922a6a0cd460..3cba6abcf87368637bb5e107d5db9402fad26b85 100644 (file)
@@ -31,7 +31,7 @@
    Changes: Christian Thalinger
             Edwin Steiner
 
-   $Id: asmpart.S 5162 2006-07-19 13:07:00Z tbfg $
+   $Id: asmpart.S 5261 2006-08-22 15:49:25Z tbfg $
 
 */
 
 #include "vm/jit/methodheader.h"
 #include "vm/jit/powerpc64/offsets.h"
 
+.section ".toc","aw"
+.section ".text"
 
-       .text
-
-       .align 2
+       .align 2        
 
 
 /* export functions ***********************************************************/
@@ -71,9 +71,9 @@
        .globl asm_patcher_wrapper
 
        .globl asm_replacement_out
-       .globl asm_replacement_in
+       .globl .asm_replacement_in
 
-       .globl asm_cacheflush
+       .globl .asm_cacheflush          /* no function descriptor needed, only called direct */
        .globl asm_criticalsections
        .globl asm_getclassvalues_atomic
 
        .long   0                         /* frame size                           */
        .long   0                         /* codeinfo pointer                     */
 
+.section ".opd","aw"
+.align 3
+
 asm_vm_call_method:
 asm_vm_call_method_int:
 asm_vm_call_method_long:
 asm_vm_call_method_float:
 asm_vm_call_method_double:
-       mflr    r0
-       stw     r0,LA_LR_OFFSET(r1)
-       stwu    r1,-40*4(r1)
+       .quad   .asm_vm_call_method,.TOC.@tocbase,0
+       .previous
+       .size asm_vm_call_method, 24
+       .type .asm_vm_call_method,@function
+       .globl .asm_vm_call_method
 
-       stw     s0,8*4(sp)                /* save used callee saved registers     */
-       stw     a0,9*4(sp)                /* save method pointer for compiler     */
+.asm_vm_call_method:
+       mflr    r0
+       std     r0,LA_LR_OFFSET(sp)
+       stdu    sp,-40*8(sp)
+       
+       std     s0,8*8(sp)                /* save used callee saved registers     */
+       std     a0,9*8(sp)                /* save method pointer for compiler     */
 
 #if defined(__DARWIN__)
-       stw     itmp1,10*4(sp)            /* register r11 is callee saved         */
+       std     itmp1,10*8(sp)            /* register r11 is callee saved         */
 #endif
-       stw     pv,11*4(sp)               /* save PV register                     */
+       std     pv,11*8(sp)               /* save PV register                     */
 
-       stw     itmp3,12*4(sp)            /* registers r14-r31 are callee saved   */
-       stfd    ftmp1,14*4(sp)            /* registers f14-f31 are callee saved   */
-       stfd    ftmp2,16*4(sp)
+       std     itmp3,12*8(sp)            /* registers r14-r31 are callee saved   */
+       stfd    ftmp1,13*8(sp)            /* registers f14-f31 are callee saved   */
+       stfd    ftmp2,14*8(sp)
 
 #if defined(__DARWIN__)
-       stw     t1,18*4(r1)
-       stw     t2,19*4(r1)
-       stw     t3,20*4(r1)
-       stw     t4,21*4(r1)
-       stw     t5,22*4(r1)
-       stw     t6,23*4(r1)
-       stw     t7,24*4(r1)
-
-       stfd    ft0,26*4(r1)
-       stfd    ft1,28*4(r1)
-       stfd    ft2,30*4(r1)
-       stfd    ft3,32*4(r1)
-       stfd    ft4,34*4(r1)
-       stfd    ft5,36*4(r1)
+       std     t1,15*8(r1)
+       std     t2,16*8(r1)
+       std     t3,17*8(r1)
+       std     t4,18*8(r1)
+       std     t5,19*8(r1)
+       std     t6,20*8(r1)
+       std     t7,21*8(r1)
+
+       stfd    ft0,22*8(r1)
+       stfd    ft1,23*8(r1)
+       stfd    ft2,24*8(r1)
+       stfd    ft3,25*8(r1)
+       stfd    ft4,26*8(r1)
+       stfd    ft5,27*8(r1)
 #else
-       SAVE_TEMPORARY_REGISTERS(18)      /* the offset has to be even            */
+       SAVE_TEMPORARY_REGISTERS(15)      /* the offset has to be even            */
 #endif
 
        mr      itmp2,a1                  /* arg count                            */
@@ -192,13 +202,16 @@ L_register_handle_int:
        addis   itmp3,t3,ha16(L_jumptable_int - L_asm_vm_call_method_get_pc)
        la      itmp3,lo16(L_jumptable_int - L_asm_vm_call_method_get_pc)(itmp3)
 #else
-       lis     itmp3,L_jumptable_int@ha
-       addi    itmp3,itmp3,L_jumptable_int@l
+       lis     itmp3,L_jumptable_int@highest           /* load 64bit address   */
+       ori     itmp3,itmp3,L_jumptable_int@higher
+       rldicr  itmp3,itmp3,32,31
+       oris    itmp3,itmp3,L_jumptable_int@h
+       ori     itmp3,itmp3,L_jumptable_int@l
 #endif
 
        slwi    t2,t6,2                   /* multiple of 4-bytes                  */
        add     itmp3,itmp3,t2            /* calculate address of jumptable       */
-       lwz     itmp3,0(itmp3)            /* load function address                */
+       ld     itmp3,0(itmp3)             /* load function address                */
        mtctr   itmp3
        addi    t0,t0,1                   /* integer argument counter             */
        addi    t6,t6,1                   /* integer argument register counter    */
@@ -374,27 +387,30 @@ L_stack_handle_double:
        b       L_stack_copy_loop
 
 L_stack_copy_done:
-       lwz     itmp1,9*4(s0)             /* pass method pointer via tmp1         */
+       ld      itmp1,9*8(s0)             /* pass method pointer via tmp1         */
 
 #if defined(__DARWIN__)
        addis   mptr,t3,ha16(L_asm_call_jit_compiler - L_asm_vm_call_method_get_pc)
        la      mptr,lo16(L_asm_call_jit_compiler - L_asm_vm_call_method_get_pc)(mptr)
 #else
-       lis     mptr,L_asm_call_jit_compiler@ha
-       addi    mptr,mptr,L_asm_call_jit_compiler@l
+       lis     mptr,L_asm_call_jit_compiler@highest    /* load 64bit address   */
+       ori     mptr,mptr,L_asm_call_jit_compiler@higher
+       rldicr  mptr,mptr,32,31
+       oris    mptr,mptr,L_asm_call_jit_compiler@h
+       ori     mptr,mptr,L_asm_call_jit_compiler@l
 #endif
-       stw     mptr,7*4(s0)
-       addi    mptr,s0,7*4
+       std     mptr,7*8(s0)
+       addi    mptr,s0,7*8
 
-       lwz     pv,0*4(mptr)
+       ld      pv,0*8(mptr)
        mtctr   pv
        bctrl
 1:
        mflr    itmp1
 #if defined(__DARWIN__)
-       addi    pv,itmp1,lo16(asm_vm_call_method - 1b)
+       addi    pv,itmp1,lo16(.asm_vm_call_method - 1b)
 #else
-       addi    pv,itmp1,(asm_vm_call_method - 1b)@l
+       addi    pv,itmp1,(.asm_vm_call_method - 1b)@l
 #endif
 
 L_asm_vm_call_method_return:
@@ -442,7 +458,7 @@ asm_vm_call_method_exception_handler:
 
 
        .data
-       .align  2
+       .align  4
 
 L_jumptable_int:
        .quad   L_handle_a0
@@ -455,7 +471,7 @@ L_jumptable_int:
        .quad   L_handle_a7
 
        .text
-       .align  2
+       .align  4
 
 L_handle_a0:
        lwz     a0,offvmargdata+4(itmp1)
@@ -692,74 +708,75 @@ L_handle_fda12:
 asm_call_jit_compiler:
 L_asm_call_jit_compiler:                /* required for PIC code              */
        mflr    r0
-       stw     r0,LA_LR_OFFSET(r1)         /* save return address                */
-       stwu    r1,-(LA_SIZE + 5*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8)(r1)
+       std     r0,LA_LR_OFFSET(sp)         /* save return address                */
+       stdu    r1,-(LA_SIZE+PA_SIZE+ARG_CNT*8)(sp)
 
 #if defined(__DARWIN__)
-       stw     a0,(LA_WORD_SIZE+5+0)*4(r1)
-       stw     a1,(LA_WORD_SIZE+5+1)*4(r1)
-       stw     a2,(LA_WORD_SIZE+5+2)*4(r1)
-       stw     a3,(LA_WORD_SIZE+5+3)*4(r1)
-       stw     a4,(LA_WORD_SIZE+5+4)*4(r1)
-       stw     a5,(LA_WORD_SIZE+5+5)*4(r1)
-       stw     a6,(LA_WORD_SIZE+5+6)*4(r1)
-       stw     a7,(LA_WORD_SIZE+5+7)*4(r1)
-
-       stfd    fa0,(LA_WORD_SIZE+5+8)*4(r1)
-       stfd    fa1,(LA_WORD_SIZE+5+10)*4(r1)
-       stfd    fa2,(LA_WORD_SIZE+5+12)*4(r1)
-       stfd    fa3,(LA_WORD_SIZE+5+14)*4(r1)
-       stfd    fa4,(LA_WORD_SIZE+5+16)*4(r1)
-       stfd    fa5,(LA_WORD_SIZE+5+18)*4(r1)
-       stfd    fa6,(LA_WORD_SIZE+5+20)*4(r1)
-       stfd    fa7,(LA_WORD_SIZE+5+22)*4(r1)
-       stfd    fa8,(LA_WORD_SIZE+5+24)*4(r1)
-       stfd    fa9,(LA_WORD_SIZE+5+26)*4(r1)
-       stfd    fa10,(LA_WORD_SIZE+5+28)*4(r1)
-       stfd    fa11,(LA_WORD_SIZE+5+30)*4(r1)
-       stfd    fa12,(LA_WORD_SIZE+5+32)*4(r1)
+       stw     a0,LA_SIZE+(5+0)*8(r1)
+       stw     a1,LA_SIZE+(5+1)*8(r1)
+       stw     a2,LA_SIZE+(5+2)*8(r1)
+       stw     a3,LA_SIZE+(5+3)*8(r1)
+       stw     a4,LA_SIZE+(5+4)*8(r1)
+       stw     a5,LA_SIZE+(5+5)*8(r1)
+       stw     a6,LA_SIZE+(5+6)*8(r1)
+       stw     a7,LA_SIZE+(5+7)*8(r1)
+
+       stfd    fa0,LA_SIZE+(5+8)*8(r1)
+       stfd    fa1,LA_SIZE+(5+10)*8(r1)
+       stfd    fa2,LA_SIZE+(5+12)*8(r1)
+       stfd    fa3,LA_SIZE+(5+14)*8(r1)
+       stfd    fa4,LA_SIZE+(5+16)*8(r1)
+       stfd    fa5,LA_SIZE+(5+18)*8(r1)
+       stfd    fa6,LA_SIZE+(5+20)*8(r1)
+       stfd    fa7,LA_SIZE+(5+22)*8(r1)
+       stfd    fa8,LA_SIZE+(5+24)*8(r1)
+       stfd    fa9,LA_SIZE+(5+26)*8(r1)
+       stfd    fa10,LA_SIZE+(5+28)*8(r1)
+       stfd    fa11,LA_SIZE+(5+30)*8(r1)
+       stfd    fa12,LA_SIZE+(5+32)*8(r1)
 #else
-       SAVE_ARGUMENT_REGISTERS(LA_WORD_SIZE+1)
+       SAVE_ARGUMENT_REGISTERS(LA_SIZE_IN_POINTERS+PA_SIZE_IN_POINTERS)
 #endif
 
        mr      a0,itmp1
        mr      a1,mptr
-       addi    a2,sp,(LA_SIZE + 5*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8)
-       lwz     a3,(LA_SIZE + 5*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8)+LA_LR_OFFSET(sp)
+       addi    a2,sp,(LA_SIZE + PA_SIZE+ ARG_CNT*8)
+       ld      a3,(LA_SIZE + PA_SIZE + ARG_CNT*8)+LA_LR_OFFSET(sp)
        bl      jit_asm_compile
+       ori     r0,r0,0                     /* nop needed after jump to function desc. */
        mr      pv,v0                       /* move address to pv register        */
 
 #if defined(__DARWIN__)
-       lwz     a0,(LA_WORD_SIZE+5+0)*4(r1)
-       lwz     a1,(LA_WORD_SIZE+5+1)*4(r1)
-       lwz     a2,(LA_WORD_SIZE+5+2)*4(r1)
-       lwz     a3,(LA_WORD_SIZE+5+3)*4(r1)
-       lwz     a4,(LA_WORD_SIZE+5+4)*4(r1)
-       lwz     a5,(LA_WORD_SIZE+5+5)*4(r1)
-       lwz     a6,(LA_WORD_SIZE+5+6)*4(r1)
-       lwz     a7,(LA_WORD_SIZE+5+7)*4(r1)
-
-       lfd     fa0,(LA_WORD_SIZE+5+8)*4(r1)
-       lfd     fa1,(LA_WORD_SIZE+5+10)*4(r1)
-       lfd     fa2,(LA_WORD_SIZE+5+12)*4(r1)
-       lfd     fa3,(LA_WORD_SIZE+5+14)*4(r1)
-       lfd     fa4,(LA_WORD_SIZE+5+16)*4(r1)
-       lfd     fa5,(LA_WORD_SIZE+5+18)*4(r1)
-       lfd     fa6,(LA_WORD_SIZE+5+20)*4(r1)
-       lfd     fa7,(LA_WORD_SIZE+5+22)*4(r1)
-       lfd     fa8,(LA_WORD_SIZE+5+24)*4(r1)
-       lfd     fa9,(LA_WORD_SIZE+5+26)*4(r1)
-       lfd     fa10,(LA_WORD_SIZE+5+28)*4(r1)
-       lfd     fa11,(LA_WORD_SIZE+5+30)*4(r1)
-       lfd     fa12,(LA_WORD_SIZE+5+32)*4(r1)
+       lwz     a0,LA_SIZE+(+5+0)*8(r1)
+       lwz     a1,LA_SIZE+(+5+1)*8(r1)
+       lwz     a2,LA_SIZE+(+5+2)*8(r1)
+       lwz     a3,LA_SIZE+(+5+3)*8(r1)
+       lwz     a4,LA_SIZE+(+5+4)*8(r1)
+       lwz     a5,LA_SIZE+(+5+5)*8(r1)
+       lwz     a6,LA_SIZE+(+5+6)*8(r1)
+       lwz     a7,LA_SIZE+(+5+7)*8(r1)
+
+       lfd     fa0,LA_SIZE+(+5+8)*8(r1)
+       lfd     fa1,LA_SIZE+(+5+10)*8(r1)
+       lfd     fa2,LA_SIZE+(+5+12)*8(r1)
+       lfd     fa3,LA_SIZE+(+5+14)*8(r1)
+       lfd     fa4,LA_SIZE+(+5+16)*8(r1)
+       lfd     fa5,LA_SIZE+(+5+18)*8(r1)
+       lfd     fa6,LA_SIZE+(+5+20)*8(r1)
+       lfd     fa7,LA_SIZE+(+5+22)*8(r1)
+       lfd     fa8,LA_SIZE+(+5+24)*8(r1)
+       lfd     fa9,LA_SIZE+(+5+26)*8(r1)
+       lfd     fa10,LA_SIZE+(+5+28)*8(r1)
+       lfd     fa11,LA_SIZE+(+5+30)*8(r1)
+       lfd     fa12,LA_SIZE+(+5+32)*8(r1)
 #else
-       RESTORE_ARGUMENT_REGISTERS(LA_WORD_SIZE+1)
+       RESTORE_ARGUMENT_REGISTERS(LA_SIZE_IN_POINTERS+PA_SIZE_IN_POINTERS)
 #endif
 
-       lwz     itmp1,(LA_SIZE + 5*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8)+LA_LR_OFFSET(r1)
-       mtlr    itmp1
+       ld     itmp1,(LA_SIZE + PA_SIZE + ARG_CNT*8)+LA_LR_OFFSET(sp)
+       mtlr   itmp1
 
-       addi    sp,sp,(LA_SIZE + 5*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8)
+       addi    sp,sp,(LA_SIZE + PA_SIZE + ARG_CNT*8)
 
        mr.     pv,pv                       /* test for exception                 */
        beq     L_asm_call_jit_compiler_exception
@@ -821,14 +838,14 @@ L_asm_handle_exception:                 /* required for PIC code              */
        li      a4,1                        /* set maybe-leaf flag                */
 
 L_asm_handle_exception_stack_loop:
-       addi    sp,sp,-(LA_WORD_SIZE+4+5)*4 /* allocate stack                     */
+       addi    sp,sp,-(LA_SIZE_IN_POINTERS+4+5)*8 /* allocate stack                     */
        stw     xptr,LA_SIZE+4*4(sp)        /* save exception pointer             */
        stw     xpc,LA_SIZE+5*4(sp)         /* save exception pc                  */
        stw     pv,LA_SIZE+6*4(sp)          /* save data segment pointer          */
        mflr    r0                          /* save return address                */
        stw     r0,LA_SIZE+5*4(sp)
        add     a3,a3,sp                    /* calculate Java sp into a3...       */
-       addi    a3,a3,(LA_WORD_SIZE+4+5)*4
+       addi    a3,a3,LA_SIZE+(4+5)*8
        stw     a4,LA_SIZE+8*4(sp)          /* save maybe-leaf flag               */
 
        mr      a0,xptr                     /* pass exception pointer             */
@@ -846,7 +863,7 @@ L_asm_handle_exception_stack_loop:
        lwz     r0,LA_SIZE+5*4(sp)          /* restore return address             */
        mtlr    r0
        lwz     a4,LA_SIZE+8*4(sp)          /* get maybe-leaf flag                */
-       addi    sp,sp,(LA_WORD_SIZE+4+5)*4  /* free stack frame                   */
+       addi    sp,sp,LA_SIZE+(4+5)*8  /* free stack frame                   */
 
        mr.     a4,a4
        beq     L_asm_handle_exception_no_leaf
@@ -864,12 +881,12 @@ L_asm_handle_exception_no_leaf:
        bctr
 
 L_asm_handle_exception_not_catched:
-       lwz     xptr,LA_SIZE+4*4(sp)        /* restore exception pointer          */
-       lwz     pv,LA_SIZE+6*4(sp)          /* restore data segment pointer       */
-       lwz     r0,LA_SIZE+5*4(sp)          /* restore return address             */
+       lwz     xptr,LA_SIZE+4*8(sp)        /* restore exception pointer          */
+       lwz     pv,LA_SIZE+6*8(sp)          /* restore data segment pointer       */
+       lwz     r0,LA_SIZE+5*8(sp)          /* restore return address             */
        mtlr    r0
-       lwz     a4,LA_SIZE+8*4(sp)          /* get maybe-leaf flag                */
-       addi    sp,sp,(LA_WORD_SIZE+4+5)*4  /* free stack frame                   */
+       lwz     a4,LA_SIZE+8*8(sp)          /* get maybe-leaf flag                */
+       addi    sp,sp,LA_SIZE+(4+5)*8  /* free stack frame                   */
 
        mr.     a4,a4
        beq     L_asm_handle_exception_no_leaf_stack
@@ -913,7 +930,7 @@ ex_int1:
        lwz     s6,-4*4(t0)
        lwz     s7,-3*4(t0)
        lwz     s8,-2*4(t0)
-       lwz     s9,-1*4(t0)
+       /*lwz     s9,-1*4(t0) XXX */
 
 ex_int2:
        subf    t0,t1,t0                    /* t0 = t0 - register count * 4       */
@@ -991,143 +1008,143 @@ asm_abstractmethoderror:
    XXX
 
    Stack layout:
-     20   return address into JIT code (patch position)
-     16   pointer to virtual java_objectheader
-     12   machine code (which is patched back later)
-      8   unresolved class/method/field reference
-      4   data segment displacement from load instructions
+     40   return address into JIT code (patch position)
+     32   pointer to virtual java_objectheader
+     24   machine code (which is patched back later)
+     16   unresolved class/method/field reference
+      8   data segment displacement from load instructions
       0   patcher function pointer to call (pv is saved here afterwards)
 
 *******************************************************************************/
 
 asm_patcher_wrapper:
        mflr    r0                    /* get Java return address (leaf)           */
-       stw     r0,6*4(sp)            /* store it in the stub stackframe          */
+       std     r0,6*8(sp)            /* store it in the stub stackframe          */
                                      /* keep stack 16-bytes aligned: 6+1+37 = 44 */
-       stwu    sp,-(LA_SIZE+(5+58)*4)(sp)
+       stdu    sp,-(LA_SIZE+PA_SIZE+ARG_CNT*8+TMP_CNT*8+4*8)(sp)
 
 #if defined(__DARWIN__)
-       stw     a0,LA_SIZE+(5+0)*4(r1)      /* save argument registers            */
-       stw     a1,LA_SIZE+(5+1)*4(r1)      /* preserve linkage area (24 bytes)   */
-       stw     a2,LA_SIZE+(5+2)*4(r1)      /* and 4 bytes for 4 argument         */
-       stw     a3,LA_SIZE+(5+3)*4(r1)
-       stw     a4,LA_SIZE+(5+4)*4(r1)
-       stw     a5,LA_SIZE+(5+5)*4(r1)
-       stw     a6,LA_SIZE+(5+6)*4(r1)
-       stw     a7,LA_SIZE+(5+7)*4(r1)
-
-       stfd    fa0,LA_SIZE+(5+8)*4(sp)
-       stfd    fa1,LA_SIZE+(5+10)*4(sp)
-       stfd    fa2,LA_SIZE+(5+12)*4(sp)
-       stfd    fa3,LA_SIZE+(5+14)*4(sp)
-       stfd    fa4,LA_SIZE+(5+16)*4(sp)
-       stfd    fa5,LA_SIZE+(5+18)*4(sp)
-       stfd    fa6,LA_SIZE+(5+20)*4(sp)
-       stfd    fa7,LA_SIZE+(5+22)*4(sp)
-       stfd    fa8,LA_SIZE+(5+24)*4(sp)
-       stfd    fa9,LA_SIZE+(5+26)*4(sp)
-       stfd    fa10,LA_SIZE+(5+28)*4(sp)
-       stfd    fa11,LA_SIZE+(5+30)*4(sp)
-       stfd    fa12,LA_SIZE+(5+32)*4(sp)
-
-       stw     t0,(LA_WORD_SIZE+5+33)*4(r1)
-       stw     t1,(LA_WORD_SIZE+5+34)*4(r1)
-       stw     t2,(LA_WORD_SIZE+5+35)*4(r1)
-       stw     t3,(LA_WORD_SIZE+5+36)*4(r1)
-       stw     t4,(LA_WORD_SIZE+5+37)*4(r1)
-       stw     t5,(LA_WORD_SIZE+5+38)*4(r1)
-       stw     t6,(LA_WORD_SIZE+5+39)*4(r1)
-       stw     t7,(LA_WORD_SIZE+5+40)*4(r1)
-
-       stfd    ft0,(LA_WORD_SIZE+5+42)*4(r1)
-       stfd    ft1,(LA_WORD_SIZE+5+44)*4(r1)
-       stfd    ft2,(LA_WORD_SIZE+5+46)*4(r1)
-       stfd    ft3,(LA_WORD_SIZE+5+48)*4(r1)
-       stfd    ft4,(LA_WORD_SIZE+5+50)*4(r1)
-       stfd    ft5,(LA_WORD_SIZE+5+52)*4(r1)
+       stw     a0,LA_SIZE+(5+0)*8(r1)      /* save argument registers            */
+       stw     a1,LA_SIZE+(5+1)*8(r1)      /* preserve linkage area (24 bytes)   */
+       stw     a2,LA_SIZE+(5+2)*8(r1)      /* and 4 bytes for 4 argument         */
+       stw     a3,LA_SIZE+(5+3)*8(r1)
+       stw     a4,LA_SIZE+(5+4)*8(r1)
+       stw     a5,LA_SIZE+(5+5)*8(r1)
+       stw     a6,LA_SIZE+(5+6)*8(r1)
+       stw     a7,LA_SIZE+(5+7)*8(r1)
+
+       stfd    fa0,LA_SIZE+(5+8)*8(sp)
+       stfd    fa1,LA_SIZE+(5+10)*8(sp)
+       stfd    fa2,LA_SIZE+(5+12)*8(sp)
+       stfd    fa3,LA_SIZE+(5+14)*8(sp)
+       stfd    fa4,LA_SIZE+(5+16)*8(sp)
+       stfd    fa5,LA_SIZE+(5+18)*8(sp)
+       stfd    fa6,LA_SIZE+(5+20)*8(sp)
+       stfd    fa7,LA_SIZE+(5+22)*8(sp)
+       stfd    fa8,LA_SIZE+(5+24)*8(sp)
+       stfd    fa9,LA_SIZE+(5+26)*8(sp)
+       stfd    fa10,LA_SIZE+(5+28)*8(sp)
+       stfd    fa11,LA_SIZE+(5+30)*8(sp)
+       stfd    fa12,LA_SIZE+(5+32)*8(sp)       /* XXX */
+
+       stw     t0,LA_SIZE+(+5+33)*8(r1)
+       stw     t1,LA_SIZE+(+5+34)*8(r1)
+       stw     t2,LA_SIZE+(+5+35)*8(r1)
+       stw     t3,LA_SIZE+(+5+36)*8(r1)
+       stw     t4,LA_SIZE+(+5+37)*8(r1)
+       stw     t5,LA_SIZE+(+5+38)*8(r1)
+       stw     t6,LA_SIZE+(+5+39)*8(r1)
+       stw     t7,LA_SIZE+(+5+40)*8(r1)
+
+       stfd    ft0,LA_SIZE+(+5+42)*8(r1)
+       stfd    ft1,LA_SIZE+(+5+44)*8(r1)
+       stfd    ft2,LA_SIZE+(+5+46)*8(r1)
+       stfd    ft3,LA_SIZE+(+5+48)*8(r1)
+       stfd    ft4,LA_SIZE+(+5+50)*8(r1)
+       stfd    ft5,LA_SIZE+(+5+52)*8(r1)
 #else
-       SAVE_ARGUMENT_REGISTERS(LA_WORD_SIZE+1) /* save 8 int/8 float arguments   */
-       SAVE_TEMPORARY_REGISTERS(LA_WORD_SIZE+1+24)
+       SAVE_ARGUMENT_REGISTERS(LA_SIZE_IN_POINTERS+PA_SIZE_IN_POINTERS) /* save 8 int/8 float arguments   */
+       SAVE_TEMPORARY_REGISTERS(LA_SIZE_IN_POINTERS+PA_SIZE_IN_POINTERS+ARG_CNT)
 #endif
 
-       stw     itmp1,LA_SIZE+(5+54)*4(sp)
-       stw     itmp2,LA_SIZE+(5+55)*4(sp)
-       stw     pv,LA_SIZE+(5+56)*4(sp)
+       std     itmp1,LA_SIZE+PA_SIZE+(ARG_CNT+TMP_CNT)*8+1*8(sp)
+       std     itmp2,LA_SIZE+PA_SIZE+(ARG_CNT+TMP_CNT)*8+2*8(sp)
+       std     pv,LA_SIZE+PA_SIZE+(ARG_CNT+TMP_CNT)*8+3*8(sp)
 
-       addi    a0,sp,LA_SIZE+(5+58)*4      /* pass SP of patcher stub            */
+       addi    a0,sp,LA_SIZE+PA_SIZE+ARG_CNT*8+TMP_CNT*8+4*8      /* pass SP of patcher stub            */
        mr      a1,pv                       /* pass PV                            */
        mr      a2,r0                       /* pass RA (correct for leafs)        */
        bl      patcher_wrapper
-       stw     v0,LA_SIZE+(5+57)*4(sp)     /* save return value                  */
+       std     v0,LA_SIZE+PA_SIZE+(ARG_CNT+TMP_CNT)*8+4*8(sp)     /* save return value                  */
 
 #if defined(__DARWIN__)
-       lwz     a0,LA_SIZE+(5+0)*4(r1)
-       lwz     a1,LA_SIZE+(5+1)*4(r1)
-       lwz     a2,LA_SIZE+(5+2)*4(r1)
-       lwz     a3,LA_SIZE+(5+3)*4(r1)
-       lwz     a4,LA_SIZE+(5+4)*4(r1)
-       lwz     a5,LA_SIZE+(5+5)*4(r1)
-       lwz     a6,LA_SIZE+(5+6)*4(r1)
-       lwz     a7,LA_SIZE+(5+7)*4(r1)
-
-       lfd     fa0,LA_SIZE+(5+8)*4(sp)
-       lfd     fa1,LA_SIZE+(5+10)*4(sp)
-       lfd     fa2,LA_SIZE+(5+12)*4(sp)
-       lfd     fa3,LA_SIZE+(5+14)*4(sp)
-       lfd     fa4,LA_SIZE+(5+16)*4(sp)
-       lfd     fa5,LA_SIZE+(5+18)*4(sp)
-       lfd     fa6,LA_SIZE+(5+20)*4(sp)
-       lfd     fa7,LA_SIZE+(5+22)*4(sp)
-       lfd     fa8,LA_SIZE+(5+24)*4(sp)
-       lfd     fa9,LA_SIZE+(5+26)*4(sp)
-       lfd     fa10,LA_SIZE+(5+28)*4(sp)
-       lfd     fa11,LA_SIZE+(5+30)*4(sp)
-       lfd     fa12,LA_SIZE+(5+32)*4(sp)
-
-       lwz     t0,(LA_WORD_SIZE+5+33)*4(r1)
-       lwz     t1,(LA_WORD_SIZE+5+34)*4(r1)
-       lwz     t2,(LA_WORD_SIZE+5+35)*4(r1)
-       lwz     t3,(LA_WORD_SIZE+5+36)*4(r1)
-       lwz     t4,(LA_WORD_SIZE+5+37)*4(r1)
-       lwz     t5,(LA_WORD_SIZE+5+38)*4(r1)
-       lwz     t6,(LA_WORD_SIZE+5+39)*4(r1)
-       lwz     t7,(LA_WORD_SIZE+5+40)*4(r1)
-
-       lfd     ft0,(LA_WORD_SIZE+5+42)*4(r1)
-       lfd     ft1,(LA_WORD_SIZE+5+44)*4(r1)
-       lfd     ft2,(LA_WORD_SIZE+5+46)*4(r1)
-       lfd     ft3,(LA_WORD_SIZE+5+48)*4(r1)
-       lfd     ft4,(LA_WORD_SIZE+5+50)*4(r1)
-       lfd     ft5,(LA_WORD_SIZE+5+52)*4(r1)
+       lwz     a0,LA_SIZE+(5+0)*8(r1)
+       lwz     a1,LA_SIZE+(5+1)*8(r1)
+       lwz     a2,LA_SIZE+(5+2)*8(r1)
+       lwz     a3,LA_SIZE+(5+3)*8(r1)
+       lwz     a4,LA_SIZE+(5+4)*8(r1)
+       lwz     a5,LA_SIZE+(5+5)*8(r1)
+       lwz     a6,LA_SIZE+(5+6)*8(r1)
+       lwz     a7,LA_SIZE+(5+7)*8(r1)
+
+       lfd     fa0,LA_SIZE+(5+8)*8(sp)
+       lfd     fa1,LA_SIZE+(5+10)*8(sp)
+       lfd     fa2,LA_SIZE+(5+12)*8(sp)
+       lfd     fa3,LA_SIZE+(5+14)*8(sp)
+       lfd     fa4,LA_SIZE+(5+16)*8(sp)
+       lfd     fa5,LA_SIZE+(5+18)*8(sp)
+       lfd     fa6,LA_SIZE+(5+20)*8(sp)
+       lfd     fa7,LA_SIZE+(5+22)*8(sp)
+       lfd     fa8,LA_SIZE+(5+24)*8(sp)
+       lfd     fa9,LA_SIZE+(5+26)*8(sp)
+       lfd     fa10,LA_SIZE+(5+28)*8(sp)
+       lfd     fa11,LA_SIZE+(5+30)*8(sp)
+       lfd     fa12,LA_SIZE+(5+32)*8(sp)
+
+       lwz     t0,LA_SIZE+(+5+33)*8(r1)
+       lwz     t1,LA_SIZE+(+5+34)*8(r1)
+       lwz     t2,LA_SIZE+(+5+35)*8(r1)
+       lwz     t3,LA_SIZE+(+5+36)*8(r1)
+       lwz     t4,LA_SIZE+(+5+37)*8(r1)
+       lwz     t5,LA_SIZE+(+5+38)*8(r1)
+       lwz     t6,LA_SIZE+(+5+39)*8(r1)
+       lwz     t7,LA_SIZE+(+5+40)*8(r1)
+
+       lfd     ft0,LA_SIZE+(+5+42)*8(r1)
+       lfd     ft1,LA_SIZE+(+5+44)*8(r1)
+       lfd     ft2,LA_SIZE+(+5+46)*8(r1)
+       lfd     ft3,LA_SIZE+(+5+48)*8(r1)
+       lfd     ft4,LA_SIZE+(+5+50)*8(r1)
+       lfd     ft5,LA_SIZE+(+5+52)*8(r1)
 #else
-       RESTORE_ARGUMENT_REGISTERS(LA_WORD_SIZE+1) /* restore 8 int/8 float args  */
-       RESTORE_TEMPORARY_REGISTERS(LA_WORD_SIZE+1+24)
+       RESTORE_ARGUMENT_REGISTERS(LA_SIZE_IN_POINTERS+PA_SIZE_IN_POINTERS) /* restore 8 int/8 float args  */
+       RESTORE_TEMPORARY_REGISTERS(LA_SIZE_IN_POINTERS+PA_SIZE_IN_POINTERS+ARG_CNT)
 #endif
 
-       lwz     itmp1,LA_SIZE+(5+54)*4(sp)
-       lwz     itmp2,LA_SIZE+(5+55)*4(sp)
-       lwz     pv,LA_SIZE+(5+56)*4(sp)
-       lwz     itmp3,LA_SIZE+(5+57)*4(sp)  /* restore return value into temp reg.*/
+       ld     itmp1,LA_SIZE+PA_SIZE+(ARG_CNT+TMP_CNT)*8+1*8(sp)
+       ld     itmp2,LA_SIZE+PA_SIZE+(ARG_CNT+TMP_CNT)*8+2*8(sp)
+       ld     pv,LA_SIZE+(ARG_CNT+TMP_CNT)*8+3*8(sp)
+       ld     itmp3,LA_SIZE+(ARG_CNT+TMP_CNT)*8+4*8(sp)
 
-       lwz     r0,(6+LA_WORD_SIZE+5+58)*4(sp) /* restore RA                      */
+       ld      r0,LA_SIZE+PA_SIZE+ARG_CNT*8+TMP_CNT*8+4*8+6*8(sp) /* restore RA                      */
        mtlr    r0
 
        mr.     itmp3,itmp3           /* check for an exception                   */
        bne     L_asm_patcher_wrapper_exception
 
                                      /* get return address (into JIT code)       */
-       lwz     itmp3,(5+LA_WORD_SIZE+5+58)*4(sp)
+       lwz     itmp3,LA_SIZE+PA_SIZE+ARG_CNT*8+TMP_CNT*8+4*8+5*8(sp)
 
                                      /* remove stack frame + patcher stub stack  */
-       addi    sp,sp,(8+LA_WORD_SIZE+5+58)*4
+       addi    sp,sp,LA_SIZE+PA_SIZE+ARG_CNT*8+TMP_CNT*8+4*8+8*8
 
        mtctr   itmp3
        bctr                          /* jump to new patched code                 */
 
 L_asm_patcher_wrapper_exception:
        mr      xptr,itmp3                  /* get exception                      */
-       lwz     xpc,(5+LA_WORD_SIZE+5+58)*4(sp)
-       addi    sp,sp,(8+LA_WORD_SIZE+5+58)*4
+       lwz     xpc,LA_SIZE+(5+5+58)*8(sp)
+       addi    sp,sp,LA_SIZE+(8+5+58)*8
        b       L_asm_handle_exception
 
 
@@ -1253,7 +1270,7 @@ asm_replacement_out:
 
 *******************************************************************************/
 
-asm_replacement_in:
+.asm_replacement_in:
        /* a0 == executionstate *es */
 
        /* set new sp and pv */
@@ -1345,15 +1362,22 @@ asm_replacement_in:
        bctr
 
 /*********************************************************************/
-
+/*
 asm_cacheflush:
+       .quad .asm_cacheflush,.TOC.@tocbase,0
+       .previous
+       .size asm_cacheflush,24
+       .type .asm_cacheflush,@function
+       .globl .asm_cacheflush
+*/
+.asm_cacheflush:
        add     r4,r3,r4
-       rlwinm  r3,r3,0,0,26
+       rldimi  r3,r3,0,26
        addi    r4,r4,31
-       rlwinm  r4,r4,0,0,26
-       mr      r6,r3
+       rldimi  r4,r4,0,26
+       mr      r5,r3
 1:
-       cmplw   r3,r4
+       cmpld   r3,r4
        bge     0f
        dcbst   0,r3
        addi    r3,r3,32
@@ -1361,7 +1385,7 @@ asm_cacheflush:
 0:
        sync
 1:
-       cmplw   r5,r4
+       cmpld   r5,r4
        bge     0f
        icbi    0,r5
        addi    r5,r5,32
@@ -1373,6 +1397,7 @@ asm_cacheflush:
 
 
 asm_getclassvalues_atomic:
+.globl .asm_getclassvalues_atomic
 _crit_restart:
 _crit_begin:
        lwz     r6,offbaseval(r3)