* Removed all Id tags.
[cacao.git] / src / vm / jit / i386 / asmpart.S
index 01b8fd8465fd5926472a7721b6b363f10636cb1b..179fbd038c0022f442e095b5d74c6d12cdd2aa02 100644 (file)
@@ -1,6 +1,6 @@
 /* src/vm/jit/i386/asmpart.S - Java-C interface functions for i386
 
-   Copyright (C) 1996-2005, 2006 R. Grafl, A. Krall, C. Kruegel,
+   Copyright (C) 1996-2005, 2006, 2007 R. Grafl, A. Krall, C. Kruegel,
    C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring,
    E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich,
    J. Wenninger, Institut f. Computersprachen - TU Wien
    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
    02110-1301, USA.
 
-   Contact: cacao@cacaojvm.org
-
-   Authors: Andreas Krall
-            Reinhard Grafl
-            Christian Thalinger
-
-   Changes: Joseph Wenninger
-            Edwin Steiner
-
-   $Id: asmpart.S 4606 2006-03-15 04:43:25Z edwin $
-
 */
 
 
 #include "config.h"
 
-#include "vm/jit/abi.h"
+#include "md-asm.h"
+
+#include "vm/jit/i386/arch.h"
 #include "vm/jit/i386/md-abi.h"
-#include "vm/jit/i386/md-asm.h"
-#include "vm/jit/i386/offsets.h"
 
+#include "vm/jit/abi-asm.h"
 #include "vm/jit/methodheader.h"
 
 
        .text
 
 
-/* exported functions and variables *******************************************/
+/* export functions ***********************************************************/
 
        .globl asm_md_init
 
        .globl asm_vm_call_method_long
        .globl asm_vm_call_method_float
        .globl asm_vm_call_method_double
+       .globl asm_vm_call_method_exception_handler
+       .globl asm_vm_call_method_end
 
        .globl asm_call_jit_compiler
        .globl asm_handle_nat_exception
        .globl asm_handle_exception
 
-       .globl asm_wrapper_patcher
+       .globl asm_abstractmethoderror
+
+       .globl asm_patcher_wrapper
+
+#if defined(ENABLE_REPLACEMENT)
        .globl asm_replacement_out
+       .globl asm_replacement_in
+#endif
 
        .globl asm_builtin_f2i
        .globl asm_builtin_f2l
        .globl asm_builtin_d2i
        .globl asm_builtin_d2l
 
-       .globl asm_perform_threadswitch
-       .globl asm_initialize_thread_stack
-       .globl asm_switchstackandcall
-       .globl asm_criticalsections
-       .globl asm_getclassvalues_atomic
+       .globl asm_compare_and_swap
+       .globl asm_memory_barrier
+
+       .globl asm_get_cycle_count
 
 
 /* asm_md_init *****************************************************************
@@ -122,9 +119,9 @@ asm_md_init:
        .align  8
 
        .long   0                           /* catch type all                     */
-       .long   calljava_xhandler2          /* handler pc                         */
-       .long   calljava_xhandler2          /* end pc                             */
-       .long   L_asm_vm_call_method        /* start pc                           */
+       .long   0                           /* handler pc                         */
+       .long   0                           /* end pc                             */
+       .long   0                           /* start pc                           */
        .long   1                           /* extable size                       */
        .long   0                           /* line number table start            */
        .long   0                           /* line number table size             */
@@ -133,90 +130,73 @@ asm_md_init:
        .long   0                           /* isleaf                             */
        .long   0                           /* IsSync                             */
        .long   0                           /* frame size                         */
-       .long   0                           /* method pointer (pointer to name)   */
+       .long   0                           /* codeinfo pointer                   */
 
 asm_vm_call_method:
 asm_vm_call_method_int:
 asm_vm_call_method_long:
 asm_vm_call_method_float:
 asm_vm_call_method_double:
-L_asm_vm_call_method:                   /* required for PIC code              */
-       push    %ebp
-       mov     %esp,%ebp                   /* save stackptr                      */
-
-       push    %ebx                        /* save registers                     */
-       push    %esi
-       push    %edi
-
-       mov     4*4(%ebp),%eax              /* pointer to arg block (4(push)+4(return)+4+4)*/
-       mov     3*4(%ebp),%ecx              /* arg count            (4(push)+4(return)+4 */
-
-       xor     %esi,%esi                 /* clear stackframe size (MUST be       */
-                                         /* before args check, may be zero!!!)   */
-       test    %ecx,%ecx                 /* maybe we have no args                */
-       jle     calljava_copydone
-
-       mov     %ecx,%edx                 /* calculate stack size                 */
-       mov     %eax,%edi                 /* save pointer to arg block            */
-
-calljava_calcstacksize:
-       mov     offvmargtype(%eax),%ebx
-       test    $1,%ebx                   /* two word type?                       */
-       jz      calljava_onewordtype
-       add     $4,%esi                   /* add 1 slot to stackframe size        */
-
-calljava_onewordtype:
-       add     $4,%esi                   /* add 1 slot to stackframe size        */
-       sub     $1,%edx
-       test    %edx,%edx                 /* any args left?                       */
-       jz      calljava_setstack
-       add     $sizevmarg,%eax             /* goto next argument block           */
-       jmp     calljava_calcstacksize
-               
-calljava_setstack:                             
-       mov     %edi,%eax                 /* restore pointer to arg block         */
-       sub     %esi,%esp                 /* create stackframe for arguments      */
-       mov     %esp,%edi                 /* move stackpointer into temp variable */
-
-calljava_copyloop:
-       mov     offvmargdata(%eax),%edx     /* copy 4 Byte of Argument            */
-       mov     %edx,(%edi)
-       add     $4,%edi                     /* increase sp to next argument       */
-       mov     offvmargtype(%eax),%ebx     /* type -> ebx                        */
-       test    $1,%ebx                     /* two word type?                     */
-       jz      calljava_copynext
-
-       mov     offvmargdata+4(%eax),%edx   /* copy upper 4 byte of 2 word type   */
-       mov     %edx,(%edi)                     
-       add     $4,%edi                     /* increase sp to next argument       */
-
-calljava_copynext:             
-       sub     $1,%ecx                     /* are there any args left?           */
-       test    %ecx,%ecx
-       jle     calljava_copydone
-
-       add     $sizevmarg,%eax             /* goto next argument block           */
-       jmp     calljava_copyloop
-
-calljava_copydone:
-       mov     2*4(%ebp),itmp1             /* move function pointer to itmp1     */
-
-       lea     L_asm_call_jit_compiler,itmp3
+       push    bp
+       mov     sp,bp                       /* save stackptr                      */
+       sub     $(4*4),sp                   /* create stackframe                  */
+       and     $0xfffffff0,sp              /* align stack to 16-byte             */
+
+       mov     t0,0*4(sp)                  /* save registers                     */
+       mov     s1,1*4(sp)
+       mov     s2,2*4(sp)
+
+       mov     sp,s1                       /* save stack pointer                 */
+
+       mov     3*4(bp),t0                  /* address of data structure          */
+       mov     4*4(bp),itmp1               /* number of stack arguments          */
+
+       cmp     $0,itmp1
+       je      L_asm_vm_call_method_stack_copy_done
+
+       mov     itmp1,itmp2
+       add     $1,itmp2                    /* keep stack 16-byte aligned         */
+       and     $0xfffffffe,itmp2
+       shl     $3,itmp2                    /* calculate stack size               */
+       sub     itmp2,sp                    /* create stack frame                 */
+       mov     sp,itmp2                    /* temporary stack pointer            */
+
+L_asm_vm_call_method_stack_copy_loop:
+       mov     0(t0),itmp3                 /* load argument                      */
+       mov     itmp3,0(itmp2)              /* store argument on stack            */
+       mov     4(t0),itmp3
+       mov     itmp3,4(itmp2)
+
+       sub     $1,itmp1                    /* subtract 1 argument                */
+       add     $8,t0                       /* set address of next argument       */
+       add     $8,itmp2                    /* increase SP                        */
+
+       cmp     $0,itmp1
+       jg      L_asm_vm_call_method_stack_copy_loop
+
+L_asm_vm_call_method_stack_copy_done:
+       lea     (2*4-256)(bp),mptr          /* We subtract 256 to force the next  */
+                                           /* move instruction to have a 32-bit  */
+                                           /* offset.                            */
+
+       mov     (0*4+256)(mptr),itmp3       /* method call as in Java             */
        call    *itmp3                      /* call JIT compiler                  */
 
 L_asm_vm_call_method_return:
-       add     %esi,%esp                   /* remove arg stack frame             */
-       pop     %edi                        /* restore registers                  */
-       pop     %esi
-       pop     %ebx
+       mov     s1,sp                       /* restore stackpointer               */
+
+       mov     0*4(sp),t0                  /* restore registers                  */
+       mov     1*4(sp),s1
+       mov     2*4(sp),s2
+
        leave
        ret
 
-calljava_xhandler2:
+asm_vm_call_method_exception_handler:
        push    xptr                        /* pass exception pointer             */
        call    builtin_throw_exception
        add     $4,sp
-       xor     v0,v0                       /* return NULL                        */
+asm_vm_call_method_end:
        jmp     L_asm_vm_call_method_return
 
 
@@ -249,79 +229,27 @@ calljava_xhandler2:
 
 asm_call_jit_compiler:
 L_asm_call_jit_compiler:                /* required for PIC code              */
-       sub     $((4+2)*4+sizestackframeinfo),sp /* create stack frame            */
-       mov     itmp1,(4+0)*4(sp)           /* save method pointer                */
-                       
-       mov     (4+2)*4+sizestackframeinfo(sp),itmp3 /* get return address        */
-       mov     -1(itmp3),itmp1b            /* get function code                  */
-       cmp     $0xd1,itmp1b                /* called with `call *REG_ITMP2'?     */
-       jne             L_not_static_special
-
-       sub     $6,itmp3                    /* calculate address of immediate     */
-       jmp             L_call_jit_compile
-               
-L_not_static_special:
-       cmp     $0xd0,itmp1b                /* called with `call *REG_ITMP1'      */
-       jne             L_not_virtual_interface
-       
-       sub     $6,itmp3                    /* calculate address of offset        */
-       mov     (itmp3),itmp3               /* get offset                         */
-       add     itmp2,itmp3                 /* add base address to get method adr */
-       jmp             L_call_jit_compile
-
-L_not_virtual_interface:
-       xor     itmp3,itmp3                 /* a call from asm_calljavafunction   */
-               
-L_call_jit_compile:
-       mov     itmp3,(4+1)*4(sp)           /* save address for method pointer    */
-
-       mov     sp,itmp1                    /* create stackframe info             */
-       add     $((4+2)*4),itmp1
-       mov     itmp1,0*4(sp)               /* stackframeinfo pointer             */
-       movl    $0,1*4(sp)                  /* if pv is NULL, use findmethod      */
-       mov     sp,itmp2
-       add     $((1+4+2)*4+sizestackframeinfo),itmp2 /* pass java sp             */
+       sub     $(4*4),sp                   /* keep stack 16-byte aligned         */
+
+       mov     itmp1,0*4(sp)               /* pass methodinfo pointer            */
+       mov     mptr,1*4(sp)                /* pass method pointer                */
+       mov     sp,itmp2                    /* pass java sp                       */
+       add     $((1+4)*4),itmp2
        mov     itmp2,2*4(sp)
-       mov     ((0+4+2)*4+sizestackframeinfo)(sp),itmp3 /* pass java ra          */
+       mov     4*4(sp),itmp3               /* pass java ra                       */
        mov     itmp3,3*4(sp)
-       call    stacktrace_create_inline_stackframeinfo
-
-       mov     (4+0)*4(sp),itmp1           /* pass method pointer                */
-       mov     itmp1,0*4(sp)
-       call    jit_compile
-       mov     v0,(4+0)*4(sp)              /* save return value                  */
-
-       mov     sp,itmp1                    /* remove stackframe info             */
-       add     $((4+2)*4),itmp1
-       mov     itmp1,0*4(sp)               /* stackframeinfo pointer             */
-       call    stacktrace_remove_stackframeinfo
-
-       mov     (4+0)*4(sp),v0              /* restore return value               */
-       mov     (4+1)*4(sp),itmp3           /* restore address for method pointer */
+       call    jit_asm_compile
 
-       add     $((4+2)*4+sizestackframeinfo),sp /* remove stack frame            */
+       add     $(4*4),sp                   /* remove stack frame                 */
 
        test    v0,v0                       /* check for exception                */
        je      L_asm_call_jit_compiler_exception
 
-       test    itmp3,itmp3                 /* was this a JIT call?               */
-       je              L_call_method
-       
-       mov     v0,(itmp3)                  /* save the new method pointer        */
-
-L_call_method:
        jmp             *v0                         /* ...and now call the new method     */
 
 L_asm_call_jit_compiler_exception:
-#if defined(USE_THREADS) && defined(NATIVE_THREADS)
-       call    builtin_asm_get_exceptionptrptr
-       mov     v0,itmp2                    /* v0 == itmp1                        */
-#else
-       lea     _exceptionptr,itmp2
-#endif
-       mov     (itmp2),xptr                /* get the exception pointer          */
-       movl    $0,(itmp2)                  /* clear the exception pointer        */
-
+       call    exceptions_get_and_clear_exception
+                                           /* v0 == xptr                         */
        pop     xpc                         /* get return address                 */
        sub     $2,xpc                      /* faulting address is ra - 2         */
        jmp     L_asm_handle_exception
@@ -342,25 +270,25 @@ asm_handle_nat_exception:
                
 asm_handle_exception:
 L_asm_handle_exception:                 /* required for PIC code              */
-       sub     $((ARG_CNT+TMP_CNT)*4),sp   /* create maybe-leaf stackframe       */
+       sub     $((ARG_CNT+TMP_CNT+3)*4),sp /* keep stack 16-byte aligned         */
 
        SAVE_ARGUMENT_REGISTERS(0)          /* we save arg and temp registers in  */
        SAVE_TEMPORARY_REGISTERS(ARG_CNT)   /* case this is a leaf method         */
 
-       mov     $((ARG_CNT+TMP_CNT)*4),itmp3/* prepare a3 for handle_exception    */
+       mov     $((ARG_CNT+TMP_CNT+3)*4),itmp3 /* prepare a3 for handle_exception */
        mov     $1,t0                       /* set maybe-leaf flag                */
 
 L_asm_handle_exception_stack_loop:
-       sub     $(10*4),sp                  /* create stackframe                  */
+       sub     $(12*4),sp                  /* keep stack 16-byte aligned         */
        mov     xptr,4*4(sp)                /* save exception pointer             */
        mov     xpc,5*4(sp)                 /* save exception pc                  */
        add     sp,itmp3                    /* calculate Java sp into a3...       */
-       add     $(10*4),itmp3
+       add     $(12*4),itmp3
        mov     itmp3,7*4(sp)               /* ...and save it                     */
        mov     t0,8*4(sp)                  /* save maybe-leaf flag               */
 
        mov     xpc,0*4(sp)                 /* pass exception pc                  */
-       call    codegen_findmethod
+       call    codegen_get_pv_from_pc
        mov     v0,6*4(sp)                  /* save data segment pointer          */
 
        mov     4*4(sp),itmp3               /* pass exception pointer             */
@@ -378,7 +306,7 @@ L_asm_handle_exception_stack_loop:
        mov     v0,xpc                      /* move handlerpc into xpc            */
        mov     4*4(sp),xptr                /* restore exception pointer          */
        mov     8*4(sp),t0                  /* get maybe-leaf flag                */
-       add     $(10*4),sp                  /* free stackframe                    */
+       add     $(12*4),sp                  /* free stackframe                    */
 
        test    t0,t0                       /* test for maybe-leaf flag           */
        jz      L_asm_handle_exception_no_leaf
@@ -386,7 +314,7 @@ L_asm_handle_exception_stack_loop:
        RESTORE_ARGUMENT_REGISTERS(0)       /* if this is a leaf method, we have  */
        RESTORE_TEMPORARY_REGISTERS(ARG_CNT)/* to restore arg and temp registers  */
 
-       add     $((ARG_CNT+TMP_CNT)*4),sp   /* remove maybe-leaf stackframe       */
+       add     $((ARG_CNT+TMP_CNT+3)*4),sp /* remove maybe-leaf stackframe       */
 
 L_asm_handle_exception_no_leaf:
        jmp     *xpc                        /* jump to exception handler          */
@@ -395,12 +323,12 @@ L_asm_handle_exception_not_catched:
        mov     4*4(sp),xptr                /* restore exception pointer          */
        mov     6*4(sp),itmp3               /* restore data segment pointer       */
        mov     8*4(sp),t0                  /* get maybe-leaf flag                */
-       add     $(10*4),sp                  /* free stackframe                    */
+       add     $(12*4),sp                  /* free stackframe                    */
 
        test    t0,t0
        jz      L_asm_handle_exception_no_leaf_stack
 
-       add     $((ARG_CNT+TMP_CNT)*4),sp   /* remove maybe-leaf stackframe       */
+       add     $((ARG_CNT+TMP_CNT+3)*4),sp /* remove maybe-leaf stackframe       */
        xor     t0,t0                       /* clear the maybe-leaf flag          */
 
 L_asm_handle_exception_no_leaf_stack:
@@ -418,11 +346,11 @@ L_asm_handle_exception_no_leaf_stack:
        cmp     $2,itmp1
        je      int2
 
-       mov     -3*4(itmp2),s0
+       mov     -3*8(itmp2),s0
 int2:  
-       mov     -2*4(itmp2),s1
+       mov     -2*8(itmp2),s1
 int1:  
-       mov     -1*4(itmp2),s2
+       mov     -1*8(itmp2),s2
 
        shl     $2,itmp1                    /* multiply by 4 bytes                */
        sub     itmp1,itmp2
@@ -466,12 +394,36 @@ noflt:
        jmp     L_asm_handle_exception_stack_loop
                
 
-/* asm_wrapper_patcher *********************************************************
+/* asm_abstractmethoderror *****************************************************
+
+   Creates and throws an AbstractMethodError.
+
+*******************************************************************************/
+
+asm_abstractmethoderror:
+       sub     $(3*4),sp                   /* keep stack 16-byte aligned         */
+       mov     sp,itmp1                    /* pass java sp                       */
+       add     $((1+3)*4),itmp1
+       mov     itmp1,0*4(sp)
+       mov     3*4(sp),itmp2               /* pass exception address             */
+       sub     $2,itmp2
+       mov     itmp2,1*4(sp)
+       call    exceptions_asm_new_abstractmethoderror
+                                           /* exception pointer is return value  */
+       add     $(3*4),sp                   /* remove stack frame                 */
+
+       pop     xpc                         /* get exception address              */
+       sub     $2,xpc                      /* exception address is ra - 2        */
+       jmp     L_asm_handle_exception
+
+
+/* asm_patcher_wrapper *********************************************************
 
    XXX
 
    Stack layout:
-     20   return address
+     24   return address
+     20   REG_ITMP3
      16   pointer to virtual java_objectheader
      12   last byte of machine code (xmcode)
       8   machine code (which is patched back later)
@@ -480,87 +432,144 @@ noflt:
 
 *******************************************************************************/
 
-asm_wrapper_patcher:
-       sub     $((2+4)*4+sizestackframeinfo),sp /* create stack frame            */
+asm_patcher_wrapper:
+       sub     $((1+4+4)*4),sp             /* keep stack 16-byte aligned         */
 
        mov     itmp1,(0+4)*4(sp)           /* save itmp1 and itmp2               */
-       mov     itmp2,(1+4)*4(sp)           /* may be used by some instructions   */
-
-       mov     sp,itmp1                    /* create stackframe info             */
-       add     $((2+4)*4),itmp1
-       mov     itmp1,0*4(sp)               /* stackframeinfo pointer             */
-       movl    $0,1*4(sp)                  /* if pv is NULL, use findmethod      */
-       mov     sp,itmp2
-       add     $((6+2+4)*4+sizestackframeinfo),itmp2
-       mov     itmp2,2*4(sp)               /* pass Java sp                       */
-       mov     ((5+2+4)*4+sizestackframeinfo)(sp),itmp3
-       mov     itmp3,3*4(sp)               /* pass ra to java function           */
-       call    stacktrace_create_inline_stackframeinfo
-
-       mov     sp,itmp1                    /* pass stack pointer                 */
-       add     $((1+2+4)*4+sizestackframeinfo),itmp1  /* skip function pointer   */
-       mov     itmp1,0*4(sp)
-       mov     (0+2+4)*4+sizestackframeinfo(sp),itmp1 /* get function pointer    */
-       call    *itmp1                      /* call the patcher function          */
-       mov     v0,1*4(sp)                  /* save return value                  */
+       mov     itmp2,(1+4)*4(sp)
 
-       mov     sp,itmp1                    /* remove stackframe info             */
-       add     $((2+4)*4),itmp1
-       mov     itmp1,0*4(sp)               /* stackframeinfo pointer             */
-       call    stacktrace_remove_stackframeinfo
+       mov     sp,itmp1                    /* pass SP of patcher stub            */
+       add     $((1+4+4)*4),itmp1
+       mov     itmp1,0*4(sp)
+       movl    $0,1*4(sp)                  /* pass PV (if NULL, use findmethod)  */
+       movl    $0,2*4(sp)                  /* pass RA (it's on the stack)        */
+       call    patcher_wrapper
+       mov     v0,itmp3                    /* save return value                  */
 
        mov     (0+4)*4(sp),itmp1           /* restore itmp1 and itmp2            */
-       mov     (1+4)*4(sp),itmp2           /* may be used by some instructions   */
-       mov     1*4(sp),itmp3               /* restore return value               */
+       mov     (1+4)*4(sp),itmp2
 
-       add     $((5+2+4)*4+sizestackframeinfo),sp /* remove stack frame, keep ra */
        test    itmp3,itmp3                 /* exception thrown?                  */
-       jz      L_asm_wrapper_patcher_exception
-       ret                                 /* call new patched code              */
-
-L_asm_wrapper_patcher_exception:
-#if defined(USE_THREADS) && defined(NATIVE_THREADS)
-       call    builtin_asm_get_exceptionptrptr
-       mov     v0,itmp2
-#else
-       lea     _exceptionptr,itmp2
-#endif
-       mov     (itmp2),xptr                /* get the exception pointer          */
-       movl    $0,(itmp2)                  /* clear the exception pointer        */
+       jne     L_asm_patcher_wrapper_exception
+
+       mov     (5+1+4+4)*4(sp),itmp3       /* restore itmp3                      */
+       add     $((6+1+4+4)*4),sp           /* remove stack frame, keep RA        */
 
+       ret                                 /* jump to new patched code           */
+
+L_asm_patcher_wrapper_exception:
+       add     $((6+1+4+4)*4),sp           /* remove stack frame, keep RA        */
+       mov     itmp3,xptr                  /* get exception                      */
        pop     xpc                         /* get and remove return address      */
        jmp     L_asm_handle_exception
 
+#if defined(ENABLE_REPLACEMENT)
 
 /* asm_replacement_out *********************************************************
 
    This code is jumped to from the replacement-out stubs that are executed
-   when a thread reaches a activated replacement point.
+   when a thread reaches an activated replacement point.
 
    The purpose of asm_replacement_out is to read out the parts of the
    execution state that cannot be accessed from C code, store this state,
-   and call the C function replace_me.
+   and then call the C function replace_me.
 
    Stack layout:
+      4                 start of stack inside method to replace
       0   rplpoint *    info on the replacement point that was reached
 
 *******************************************************************************/
 
-asm_replacement_out:
-       sub     $(sizeexecutionstate),sp    /* create stack frame                 */
-
-       mov     itmp1,(0*8+offes_regs)(sp)
-       movl    $0,(4+0*8+offes_regs)(sp)
-       mov     itmp2,(1*8+offes_regs)(sp)
-       movl    $0,(4+1*8+offes_regs)(sp)
-       mov     itmp3,(2*8+offes_regs)(sp)
-       movl    $0,(4+2*8+offes_regs)(sp)
+/* some room to accomodate changes of the stack frame size during replacement */
+       /* XXX we should find a cleaner solution here */
+#define REPLACEMENT_ROOM  512
 
-       mov     sizeexecutionstate(sp),itmp1   /* rplpoint *                         */
-    push    sp /* XXX */                /* arg1: execution state              */
+asm_replacement_out:
+    /* create stack frame */
+       sub     $(sizeexecutionstate + REPLACEMENT_ROOM),sp
+
+       /* save registers in execution state */
+       mov     %eax,(EAX*4+offes_intregs)(sp)
+       mov     %ebx,(EBX*4+offes_intregs)(sp)
+       mov     %ecx,(ECX*4+offes_intregs)(sp)
+       mov     %edx,(EDX*4+offes_intregs)(sp)
+       mov     %esi,(ESI*4+offes_intregs)(sp)
+       mov     %edi,(EDI*4+offes_intregs)(sp)
+       mov     %ebp,(EBP*4+offes_intregs)(sp)
+       movl    $0  ,(ESP*4+offes_intregs)(sp) /* not used */
+
+       /* calculate sp of method */
+       mov     sp,itmp1
+       add     $(sizeexecutionstate + REPLACEMENT_ROOM + 4),itmp1
+       mov     itmp1,(offes_sp)(sp)
+
+       /* pv must be looked up via AVL tree */
+       movl    $0,(offes_pv)(sp)
+
+       /* call replace_me */
+       mov     -4(itmp1),itmp1             /* rplpoint *                         */
+    push    sp                          /* arg1: execution state              */
     push    itmp1                       /* arg0: replacement point            */
     call    replace_me                  /* call C function replace_me         */
-    call    abort                       /* NEVER REACHED                      */
+
+
+/* asm_replacement_in **********************************************************
+
+   This code writes the given execution state and jumps to the replacement
+   code.
+
+   This function never returns!
+
+   C prototype:
+      void asm_replacement_in(executionstate *es, replace_safestack_t *st);
+
+*******************************************************************************/
+
+asm_replacement_in:
+       /* get arguments */
+       mov     8(sp),%esi                  /* replace_safestack_t *st            */
+       mov     4(sp),%ebp                  /* executionstate *es == safe stack   */
+
+       /* switch to the safe stack and build a stack frame */
+       mov     %ebp,sp
+       sub             $(1*4),sp
+
+       /* call replace_build_execution_state(st) */
+       mov             %esi,(0*4)(sp)
+       call    replace_build_execution_state
+
+       /* set new sp */
+       mov     (offes_sp)(%ebp),sp
+
+       /* push address of new code */
+       push    (offes_pc)(%ebp)
+
+       /* allocate an executionstate_t on the stack */
+       sub             $(sizeexecutionstate),sp
+
+       /* call replace_free_safestack(st,& of allocated executionstate_t) */
+       push    sp   /* tmpes */
+       push    %esi /* st    */
+       call    replace_free_safestack
+       add     $(2*4),sp
+
+       /* copy registers from execution state */
+       mov     (EAX*4+offes_intregs)(sp),%eax
+       mov     (EBX*4+offes_intregs)(sp),%ebx
+       mov     (ECX*4+offes_intregs)(sp),%ecx
+       mov     (EDX*4+offes_intregs)(sp),%edx
+       mov     (ESI*4+offes_intregs)(sp),%esi
+       mov     (EDI*4+offes_intregs)(sp),%edi
+       mov     (EBP*4+offes_intregs)(sp),%ebp
+
+       /* pop the execution state off the stack */
+       add             $(sizeexecutionstate),sp
+
+       /* jump to new code, hold your thumbs! ;) */
+       ret
+
+#endif /* defined(ENABLE_REPLACEMENT) */
+
 
 /************************ function asm_builtin_x2x *****************************
 *                                                                              *
@@ -569,184 +578,83 @@ asm_replacement_out:
 *******************************************************************************/
 
 asm_builtin_f2i:
-       sub     $4,%esp
+       sub     $(3*4),%esp
        fsts    (%esp)
        call    builtin_f2i
-       add     $4,%esp
+       add     $(3*4),%esp
        ret
 
 asm_builtin_d2i:
-       sub     $8,%esp
+       sub     $(3*4),%esp
        fstl    (%esp)
        call    builtin_d2i
-       add     $8,%esp
+       add     $(3*4),%esp
        ret
 
 asm_builtin_f2l:
-       sub     $4,%esp
+       sub     $(3*4),%esp
        fsts    (%esp)
        call    builtin_f2l
-       add     $4,%esp
+       add     $(3*4),%esp
        ret
 
 asm_builtin_d2l:
-       sub     $8,%esp
+       sub     $(3*4),%esp
        fstl    (%esp)
        call    builtin_d2l
-       add     $8,%esp
+       add     $(3*4),%esp
        ret
 
 
-/******************* function asm_initialize_thread_stack **********************
-*                                                                              *
-* initialized a thread stack                                                   *
-* (to)->restorePoint = asm_initialize_thread_stack((u1*)(func), (to)->stackEnd)*
-*                                                                              *
-*******************************************************************************/
+/* asm_compare_and_swap ********************************************************
 
-asm_initialize_thread_stack:
-               mov             8(%esp),%eax            /* (to)->stackEnd                     */
-               sub             $36,%eax                /* 4 bytes * 8 regs + 4 bytes func    */
-                               
-               xor             %edx,%edx
-               mov             %edx,0(%eax)
-               mov             %edx,4(%eax)
-               mov             %edx,8(%eax)
-               mov             %edx,12(%eax)
-               mov             %edx,16(%eax)
-               mov             %edx,20(%eax)
-               mov     %edx,24(%eax)
-               mov     %edx,28(%eax)
-                               
-               mov     4(%esp),%edx            /* save (u1*) (func)                  */
-               mov     %edx,32(%eax)
-
-               ret                             /* return restorepoint in %eax        */
-
-
-/******************* function asm_perform_threadswitch *************************
-*                                                                              *
-*   void asm_perform_threadswitch (u1 **from, u1 **to, u1 **stackTop);         *
-*                                                                              *
-*   performs a threadswitch                                                    *
-*                                                                              *
-*******************************************************************************/
+   Does an atomic compare and swap.  Required for the lock
+   implementation.
 
-asm_perform_threadswitch:
-       sub     $36,%esp
-          
-       mov     %eax,0(%esp)
-       mov     %ecx,4(%esp)
-       mov     %edx,8(%esp)
-       mov     %ebx,12(%esp)
-       mov     %esp,16(%esp)
-       mov     %ebp,20(%esp)
-       mov     %esi,24(%esp)
-       mov     %edi,28(%esp)
-          
-       mov     36(%esp),%eax         /* save current return address              */
-       mov     %eax,32(%esp)
-          
-       mov     40(%esp),%eax         /* first argument **from                    */
-       mov     %esp,0(%eax)
-          
-       mov     48(%esp),%eax         /* third argument **stackTop                */
-       mov     %esp,0(%eax)
-          
-       mov     44(%esp),%eax         /* second argument **to                     */
-       mov     0(%eax),%esp          /* load new stack pointer                   */
-          
-       mov     0(%esp),%eax
-       mov     4(%esp),%ecx
-       mov     8(%esp),%edx
-       mov     12(%esp),%ebx
-                                     /* skip stack pointer                       */
-       mov     20(%esp),%ebp
-       mov     24(%esp),%esi
-       mov     28(%esp),%edi
-          
-       add     $32,%esp              /* leave return address on stack            */
-       ret
-               
+   Atomically do the following: Check if the location still contains
+   `oldval`. If so, replace it by `newval` and return `oldval`.
 
-/********************* function asm_switchstackandcall *************************
-*                                                                              *
-*  int asm_switchstackandcall (void *stack, void *func, void **stacktopsave,   *
-*                                     void *p);                                       *
-*                                                                              *
-*   Switches to a new stack, calls a function and switches back.               *
-*       a0      new stack pointer                                              *
-*       a1      function pointer                                               *
-*              a2              pointer to variable where stack top should be stored           *
-*       a3      pointer to user data, is passed to the function                *
-*                                                                              *
-*******************************************************************************/
+   RETURN VALUE:
+       the old value at *p
 
-asm_switchstackandcall:
-       mov     4(%esp),%edx          /* first argument *stack                    */
-       sub     $8,%edx               /* allocate new stack                       */
+   long compare_and_swap(volatile long *p, long oldval, long newval);
 
-       mov     (%esp),%eax           /* save return address on new stack         */
-       mov     %eax,(%edx)
+*******************************************************************************/
+
+asm_compare_and_swap:
+       mov     1*4(sp),%ecx            /* load p into a register                 */
+       mov     2*4(sp),%eax            /* load oldval into return register       */
+       mov     3*4(sp),%edx            /* load newval into a register            */
+       lock; cmpxchgl %edx,0(%ecx)
+       ret
 
-       mov     %esp,4(%edx)          /* save old stack pointer on new stack      */
 
-       mov     12(%esp),%eax         /* third argument **stacktopsave            */
-       mov     %esp,(%eax)           /* save old stack pointer to variable       */
+/* asm_memory_barrier **********************************************************
 
-       mov     8(%esp),%eax          /* load function pointer                    */
-       mov     16(%esp),%ecx         /* fourth argument *p                       */
-       
-       mov     %edx,%esp             /* switch to new stack                      */
+   A memory barrier for the Java Memory Model.
 
-       sub     $4,%esp
-       mov     %ecx,0(%esp)          /* pass pointer                             */
-       call    *%eax                 /* and call function                        */
-       add     $4,%esp
+*******************************************************************************/
 
-       mov     (%esp),%edx           /* load return address                      */
-       mov     4(%esp),%esp          /* switch to old stack                      */
-       mov     %edx,(%esp)
+asm_memory_barrier:
+       lock; add $0,0(sp)
        ret
 
                
-asm_getclassvalues_atomic:
-_crit_restart2:
-       mov     4(%esp),%ecx        /* super */
-       mov     8(%esp),%edx        /* sub */
-_crit_begin2:
-       mov     offbaseval(%ecx),%eax
-       mov     offdiffval(%ecx),%ecx
-       mov     offbaseval(%edx),%edx
-_crit_end2:
-       push    %ebx
-       mov     16(%esp),%ebx      /* out */
-       mov     %eax,offcast_super_baseval(%ebx)
-       mov     %ecx,offcast_super_diffval(%ebx)
-       mov     %edx,offcast_sub_baseval(%ebx)
-       pop     %ebx
-       ret
+/* asm_get_cycle_count *********************************************************
 
-       .data
+   Get the current time-stamp counter from the CPU.
 
-asm_criticalsections:
-#if defined(USE_THREADS) && defined(NATIVE_THREADS)
-#if 0
-       .long   _crit_begin1
-       .long   _crit_end1
-       .long   _crit_restart1
-#endif
-       .long   _crit_begin2
-       .long   _crit_end2
-       .long   _crit_restart2
-#endif
-       .long 0
+*******************************************************************************/
+
+asm_get_cycle_count:
+       rdtsc
+       ret
 
 
-/* Disable exec-stacks, required for Gentoo ***********************************/
+/* disable exec-stacks ********************************************************/
 
-#if defined(__GCC__) && defined(__ELF__)
-       .section .note.GNU-stack,"",@progbits
+#if defined(__linux__) && defined(__ELF__)
+       .section .note.GNU-stack,"",%progbits
 #endif