* Removed all Id tags.
[cacao.git] / src / vm / jit / replace.c
index c8b0337d4b6541fc021b4664fd47effeaa738f77..03969f4a6125b2fcbb3ce0235002ae29d050b437 100644 (file)
@@ -1,6 +1,6 @@
-/* vm/jit/replace.c - on-stack replacement of methods
+/* src/vm/jit/replace.c - on-stack replacement of methods
 
-   Copyright (C) 1996-2005, 2006 R. Grafl, A. Krall, C. Kruegel,
+   Copyright (C) 1996-2005, 2006, 2007 R. Grafl, A. Krall, C. Kruegel,
    C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring,
    E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich,
    J. Wenninger, Institut f. Computersprachen - TU Wien
    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
    02110-1301, USA.
 
-   Contact: cacao@cacaojvm.org
-
-   Authors: Edwin Steiner
-
-   Changes:
-
-   $Id$
-
 */
 
 #include "config.h"
 #include "arch.h"
 
 #include "mm/memory.h"
+
+#include "threads/threads-common.h"
+
 #include "toolbox/logging.h"
-#include "vm/options.h"
+
 #include "vm/stringlocal.h"
+
 #include "vm/jit/abi.h"
-#include "vm/jit/jit.h"
-#include "vm/jit/replace.h"
 #include "vm/jit/asmpart.h"
 #include "vm/jit/disass.h"
-#include "vm/jit/show.h"
+#include "vm/jit/jit.h"
+#include "vm/jit/md.h"
 #include "vm/jit/methodheader.h"
+#include "vm/jit/replace.h"
+#include "vm/jit/show.h"
+#include "vm/jit/stack.h"
+
+#include "vmcore/options.h"
+#include "vmcore/classcache.h"
+
 
-#include "native/include/java_lang_String.h"
+#define REPLACE_PATCH_DYNAMIC_CALL
+/*#define REPLACE_PATCH_ALL*/
+
+#if defined(ENABLE_VMLOG)
+#include <vmlog_cacao.h>
+#endif
+
+/*** architecture-dependent configuration *************************************/
+
+/* first unset the macros (default) */
+#undef REPLACE_RA_BETWEEN_FRAMES
+#undef REPLACE_RA_TOP_OF_FRAME
+#undef REPLACE_RA_LINKAGE_AREA
+#undef REPLACE_LEAFMETHODS_RA_REGISTER
+#undef REPLACE_REG_RA
+
+/* i386, x86_64 and m68k */
+#if defined(__I386__) || defined(__X86_64__) || defined(__M68K__)
+#define REPLACE_RA_BETWEEN_FRAMES
+/* alpha */
+#elif defined(__ALPHA__)
+#define REPLACE_RA_TOP_OF_FRAME
+#define REPLACE_LEAFMETHODS_RA_REGISTER
+#define REPLACE_REG_RA REG_RA
+/* powerpc */
+#elif defined(__POWERPC__)
+#define REPLACE_RA_LINKAGE_AREA
+#define REPLACE_LEAFMETHODS_RA_REGISTER
+#define REPLACE_REG_RA REG_ITMP3 /* the execution state has the LR in itmp3 */
+/* s390 */
+#elif defined(__S390__)
+#define REPLACE_RA_TOP_OF_FRAME
+#define REPLACE_REG_RA REG_ITMP3
+#endif
 
 
 /*** configuration of native stack slot size **********************************/
@@ -72,20 +107,81 @@ typedef u8 stackslot_t;
 
 /*** debugging ****************************************************************/
 
-/*#define REPLACE_VERBOSE*/
+#if !defined(NDEBUG)
+static void java_value_print(s4 type, replace_val_t value);
+static void replace_stackframeinfo_println(stackframeinfo *sfi);
+#endif
 
-#if !defined(NDEBUG) && defined(REPLACE_VERBOSE)
-#define DOLOG(code) do{ if (1) { code; } } while(0)
+#if !defined(NDEBUG)
+#define DOLOG(code)        do{ if (opt_TraceReplacement > 1) { code; } } while(0)
+#define DOLOG_SHORT(code)  do{ if (opt_TraceReplacement > 0) { code; } } while(0)
 #else
 #define DOLOG(code)
+#define DOLOG_SHORT(code)
 #endif
 
 
+/*** statistics ***************************************************************/
+
+#define REPLACE_STATISTICS
+
+#if defined(REPLACE_STATISTICS)
+
+static int stat_replacements = 0;
+static int stat_frames = 0;
+static int stat_recompile = 0;
+static int stat_staticpatch = 0;
+static int stat_unroll_inline = 0;
+static int stat_unroll_call = 0;
+static int stat_dist_frames[20] = { 0 };
+static int stat_dist_locals[20] = { 0 };
+static int stat_dist_locals_adr[10] = { 0 };
+static int stat_dist_locals_prim[10] = { 0 };
+static int stat_dist_locals_ret[10] = { 0 };
+static int stat_dist_locals_void[10] = { 0 };
+static int stat_dist_stack[10] = { 0 };
+static int stat_dist_stack_adr[10] = { 0 };
+static int stat_dist_stack_prim[10] = { 0 };
+static int stat_dist_stack_ret[10] = { 0 };
+static int stat_methods = 0;
+static int stat_rploints = 0;
+static int stat_regallocs = 0;
+static int stat_dist_method_rplpoints[20] = { 0 };
+
+#define REPLACE_COUNT(cnt)  (cnt)++
+#define REPLACE_COUNT_IF(cnt, cond)  do{ if(cond) (cnt)++; } while(0)
+#define REPLACE_COUNT_INC(cnt, inc)  ((cnt) += (inc))
+
+#define REPLACE_COUNT_DIST(array, val)                               \
+    do {                                                             \
+        int limit = (sizeof(array) / sizeof(int)) - 1;               \
+        if ((val) < (limit)) (array)[val]++;                         \
+        else (array)[limit]++;                                       \
+    } while (0)
+
+static void replace_statistics_source_frame(sourceframe_t *frame);
+
+#else
+
+#define REPLACE_COUNT(cnt)
+#define REPLACE_COUNT_IF(cnt, cond)
+#define REPLACE_COUNT_INC(cnt, inc)
+#define REPLACE_COUNT_DIST(array, val)
+
+#endif /* defined(REPLACE_STATISTICS) */
+
+
 /*** constants used internally ************************************************/
 
 #define TOP_IS_NORMAL    0
 #define TOP_IS_ON_STACK  1
 #define TOP_IS_IN_ITMP1  2
+#define TOP_IS_VOID      3
+
+
+/******************************************************************************/
+/* PART I: Creating / freeing replacement points                              */
+/******************************************************************************/
 
 
 /* replace_create_replacement_point ********************************************
@@ -97,6 +193,7 @@ typedef u8 stackslot_t;
           iinfo............inlining info for the current position
           rp...............pre-allocated (uninitialized) rplpoint
           type.............RPLPOINT_TYPE constant
+          iptr.............current instruction
           *pra.............current rplalloc pointer
           javalocals.......the javalocals at the current point
           stackvars........the stack variables at the current point
@@ -126,20 +223,17 @@ static void replace_create_replacement_point(jitdata *jd,
 
        ra = *pra;
 
-       /* there will be a replacement point at the start of this block */
+       REPLACE_COUNT(stat_rploints);
 
        rp->method = (iinfo) ? iinfo->method : jd->m;
        rp->pc = NULL;        /* set by codegen */
-       rp->outcode = NULL;   /* set by codegen */
        rp->callsize = 0;     /* set by codegen */
-       rp->target = NULL;
        rp->regalloc = ra;
        rp->flags = 0;
        rp->type = type;
        rp->id = iptr->flags.bits >> INS_FLAG_ID_SHIFT;
 
        /* XXX unify these two fields */
-       rp->code = jd->code;
        rp->parent = (iinfo) ? iinfo->rp : NULL;
 
        /* store local allocation info of javalocals */
@@ -151,17 +245,17 @@ static void replace_create_replacement_point(jitdata *jd,
                                continue;
 
                        ra->index = i;
-                       if (index < UNUSED) {
-                               ra->regoff = (UNUSED - index) - 1;
-                               ra->type = TYPE_RET;
-                               ra->flags = 0;
-                       }
-                       else {
+                       if (index >= 0) {
                                v = VAR(index);
                                ra->flags = v->flags & (INMEMORY);
                                ra->regoff = v->vv.regoff;
                                ra->type = v->type;
                        }
+                       else {
+                               ra->regoff = RETADDR_FROM_JAVALOCAL(index);
+                               ra->type = TYPE_RET;
+                               ra->flags = 0;
+                       }
                        ra++;
                }
        }
@@ -172,8 +266,14 @@ static void replace_create_replacement_point(jitdata *jd,
                v = VAR(stackvars[i]);
                ra->flags = v->flags & (INMEMORY);
                ra->index = (i < paramcount) ? RPLALLOC_PARAM : RPLALLOC_STACK;
-               ra->regoff = v->vv.regoff;
                ra->type  = v->type;
+               /* XXX how to handle locals on the stack containing returnAddresses? */
+               if (v->type == TYPE_RET) {
+                       assert(stackvars[i] >= jd->localcount);
+                       ra->regoff = v->vv.retaddr->nr;
+               }
+               else
+                       ra->regoff = v->vv.regoff;
                ra++;
        }
 
@@ -185,6 +285,59 @@ static void replace_create_replacement_point(jitdata *jd,
 }
 
 
+/* replace_create_inline_start_replacement_point *******************************
+
+   Create an INLINE_START replacement point.
+
+   IN:
+       jd...............current jitdata
+          rp...............pre-allocated (uninitialized) rplpoint
+          iptr.............current instruction
+          *pra.............current rplalloc pointer
+          javalocals.......the javalocals at the current point
+
+   OUT:
+       *rpa.............points to the next free rplalloc
+
+   RETURN VALUE:
+       the insinfo_inline * for the following inlined body
+
+*******************************************************************************/
+
+static insinfo_inline * replace_create_inline_start_replacement_point(
+                                                                                        jitdata *jd,
+                                                                                        rplpoint *rp,
+                                                                                        instruction *iptr,
+                                                                                        rplalloc **pra,
+                                                                                        s4 *javalocals)
+{
+       insinfo_inline *calleeinfo;
+       rplalloc       *ra;
+
+       calleeinfo = iptr->sx.s23.s3.inlineinfo;
+
+       calleeinfo->rp = rp;
+
+       replace_create_replacement_point(jd, calleeinfo->parent, rp,
+                       RPLPOINT_TYPE_INLINE, iptr, pra,
+                       javalocals,
+                       calleeinfo->stackvars, calleeinfo->stackvarscount,
+                       calleeinfo->paramcount);
+
+       if (calleeinfo->synclocal != UNUSED) {
+               ra = (*pra)++;
+               ra->index  = RPLALLOC_SYNC;
+               ra->regoff = jd->var[calleeinfo->synclocal].vv.regoff;
+               ra->flags  = jd->var[calleeinfo->synclocal].flags & INMEMORY;
+               ra->type   = TYPE_ADR;
+
+               rp->regalloccount++;
+       }
+
+       return calleeinfo;
+}
+
+
 /* replace_create_replacement_points *******************************************
  
    Create the replacement points for the given code.
@@ -206,6 +359,27 @@ static void replace_create_replacement_point(jitdata *jd,
    
 *******************************************************************************/
 
+#define CLEAR_javalocals(array, method)                              \
+    do {                                                             \
+        for (i=0; i<(method)->maxlocals; ++i)                        \
+            (array)[i] = UNUSED;                                     \
+    } while (0)
+
+#define COPY_OR_CLEAR_javalocals(dest, array, method)                \
+    do {                                                             \
+        if ((array) != NULL)                                         \
+            MCOPY((dest), (array), s4, (method)->maxlocals);         \
+        else                                                         \
+            CLEAR_javalocals((dest), (method));                      \
+    } while (0)
+
+#define COUNT_javalocals(array, method, counter)                     \
+    do {                                                             \
+        for (i=0; i<(method)->maxlocals; ++i)                        \
+            if ((array)[i] != UNUSED)                                \
+                               (counter)++;                                         \
+    } while (0)
+
 bool replace_create_replacement_points(jitdata *jd)
 {
        codeinfo        *code;
@@ -222,11 +396,16 @@ bool replace_create_replacement_points(jitdata *jd)
        instruction     *iptr;
        instruction     *iend;
        s4              *javalocals;
+       s4              *jl;
        methoddesc      *md;
-       s4               j;
        insinfo_inline  *iinfo;
-       insinfo_inline  *calleeinfo;
        s4               startcount;
+       s4               firstcount;
+#if defined(REPLACE_PATCH_DYNAMIC_CALL)
+       bool             needentry;
+#endif
+
+       REPLACE_COUNT(stat_methods);
 
        /* get required compiler data */
 
@@ -243,10 +422,27 @@ bool replace_create_replacement_points(jitdata *jd)
        assert(code->regalloccount == 0);
        assert(code->globalcount == 0);
 
-       /* iterate over the basic block list to find replacement points */
-
        m = code->m;
 
+       /* set codeinfo flags */
+
+       if (jd->isleafmethod)
+               CODE_SETFLAG_LEAFMETHOD(code);
+
+       /* in instance methods, we may need a rplpoint at the method entry */
+
+#if defined(REPLACE_PATCH_DYNAMIC_CALL)
+       if (!(m->flags & ACC_STATIC)) {
+               jd->basicblocks[0].bitflags |= BBFLAG_REPLACEMENT;
+               needentry = true;
+       }
+       else {
+               needentry = false;
+       }
+#endif /* defined(REPLACE_PATCH_DYNAMIC_CALL) */
+
+       /* iterate over the basic block list to find replacement points */
+
        count = 0;
        alloccount = 0;
 
@@ -266,17 +462,14 @@ bool replace_create_replacement_points(jitdata *jd)
 
                /* initialize javalocals at the start of this block */
 
-               if (bptr->javalocals)
-                       MCOPY(javalocals, bptr->javalocals, s4, m->maxlocals);
-               else
-                       for (i=0; i<m->maxlocals; ++i)
-                               javalocals[i] = UNUSED;
+               COPY_OR_CLEAR_javalocals(javalocals, bptr->javalocals, m);
 
                /* iterate over the instructions */
 
                iptr = bptr->iinstr;
                iend = iptr + bptr->icount;
                startcount = count;
+               firstcount = count;
 
                for (; iptr != iend; ++iptr) {
                        switch (iptr->opc) {
@@ -286,9 +479,7 @@ bool replace_create_replacement_points(jitdata *jd)
                                case ICMD_INVOKEINTERFACE:
                                        INSTRUCTION_GET_METHODDESC(iptr, md);
                                        count++;
-                                       for (i=0; i<m->maxlocals; ++i)
-                                               if (javalocals[i] != UNUSED)
-                                                       alloccount++;
+                                       COUNT_javalocals(javalocals, m, alloccount);
                                        alloccount += iptr->s1.argcount;
                                        if (iinfo)
                                                alloccount -= iinfo->throughcount;
@@ -299,19 +490,7 @@ bool replace_create_replacement_points(jitdata *jd)
                                case ICMD_FSTORE:
                                case ICMD_DSTORE:
                                case ICMD_ASTORE:
-                                       /* XXX share code with stack.c */
-                                       j = iptr->dst.varindex;
-                                       i = iptr->sx.s23.s3.javaindex;
-                                       if (i != UNUSED) {
-                                               if (iptr->flags.bits & INS_FLAG_RETADDR)
-                                                       javalocals[i] = iptr->sx.s23.s2.retaddrnr;
-                                               else
-                                                       javalocals[i] = j;
-                                               if (iptr->flags.bits & INS_FLAG_KILL_PREV)
-                                                       javalocals[i-1] = UNUSED;
-                                               if (iptr->flags.bits & INS_FLAG_KILL_NEXT)
-                                                       javalocals[i+1] = UNUSED;
-                                       }
+                                       stack_javalocals_store(iptr, javalocals);
                                        break;
 
                                case ICMD_IRETURN:
@@ -329,19 +508,32 @@ bool replace_create_replacement_points(jitdata *jd)
                                        iinfo = iptr->sx.s23.s3.inlineinfo;
 
                                        count++;
-                                       for (i=0; i<m->maxlocals; ++i)
-                                               if (javalocals[i] != UNUSED)
-                                                       alloccount++;
+                                       COUNT_javalocals(javalocals, m, alloccount);
                                        alloccount += iinfo->stackvarscount;
                                        if (iinfo->synclocal != UNUSED)
                                                alloccount++;
 
                                        m = iinfo->method;
-                                       if (iinfo->javalocals_start)
-                                               MCOPY(javalocals, iinfo->javalocals_start, s4, m->maxlocals);
+                                       /* javalocals may be set at next block start, or now */
+                                       COPY_OR_CLEAR_javalocals(javalocals, iinfo->javalocals_start, m);
+                                       break;
+
+                               case ICMD_INLINE_BODY:
+                                       assert(iinfo == iptr->sx.s23.s3.inlineinfo);
+
+                                       jl = iinfo->javalocals_start;
+                                       if (jl == NULL) {
+                                               /* get the javalocals from the following block start */
+                                               assert(bptr->next);
+                                               jl = bptr->next->javalocals;
+                                       }
+                                       count++;
+                                       COUNT_javalocals(jl, m, alloccount);
                                        break;
 
                                case ICMD_INLINE_END:
+                                       assert(iinfo == iptr->sx.s23.s3.inlineinfo ||
+                                                  iinfo == iptr->sx.s23.s3.inlineinfo->parent);
                                        iinfo = iptr->sx.s23.s3.inlineinfo;
                                        m = iinfo->outer;
                                        if (iinfo->javalocals_end)
@@ -349,6 +541,9 @@ bool replace_create_replacement_points(jitdata *jd)
                                        iinfo = iinfo->parent;
                                        break;
                        }
+
+                       if (iptr == bptr->iinstr)
+                               firstcount = count;
                } /* end instruction loop */
 
                /* create replacement points at targets of backward branches */
@@ -356,17 +551,22 @@ bool replace_create_replacement_points(jitdata *jd)
                /* replacement point inside the block.                       */
 
                if (bptr->bitflags & BBFLAG_REPLACEMENT) {
-                       if (count > startcount) {
-                               /* we don't need it */
+#if defined(REPLACE_PATCH_DYNAMIC_CALL)
+                       int test = (needentry && bptr == jd->basicblocks) ? firstcount : count;
+#else
+                       int test = count;
+#endif
+                       if (test > startcount) {
+                               /* we don't need an extra rplpoint */
                                bptr->bitflags &= ~BBFLAG_REPLACEMENT;
                        }
                        else {
                                count++;
                                alloccount += bptr->indepth;
+                               if (bptr->inlineinfo)
+                                       alloccount -= bptr->inlineinfo->throughcount;
 
-                               for (i=0; i<m->maxlocals; ++i)
-                                       if (bptr->javalocals[i] != UNUSED)
-                                               alloccount++;
+                               COUNT_javalocals(bptr->javalocals, bptr->method, alloccount);
                        }
                }
 
@@ -402,19 +602,19 @@ bool replace_create_replacement_points(jitdata *jd)
 
                /* initialize javalocals at the start of this block */
 
-               if (bptr->javalocals)
-                       MCOPY(javalocals, bptr->javalocals, s4, m->maxlocals);
-               else
-                       for (i=0; i<m->maxlocals; ++i)
-                               javalocals[i] = UNUSED;
+               COPY_OR_CLEAR_javalocals(javalocals, bptr->javalocals, m);
 
                /* create replacement points at targets of backward branches */
 
                if (bptr->bitflags & BBFLAG_REPLACEMENT) {
 
+                       i = (iinfo) ? iinfo->throughcount : 0;
                        replace_create_replacement_point(jd, iinfo, rp++,
                                        bptr->type, bptr->iinstr, &ra,
-                                       bptr->javalocals, bptr->invars, bptr->indepth, 0);
+                                       bptr->javalocals, bptr->invars + i, bptr->indepth - i, 0);
+
+                       if (JITDATA_HAS_FLAG_COUNTDOWN(jd))
+                               rp[-1].flags |= RPLPOINT_FLAG_COUNTDOWN;
                }
 
                /* iterate over the instructions */
@@ -443,19 +643,7 @@ bool replace_create_replacement_points(jitdata *jd)
                                case ICMD_FSTORE:
                                case ICMD_DSTORE:
                                case ICMD_ASTORE:
-                                       /* XXX share code with stack.c */
-                                       j = iptr->dst.varindex;
-                                       i = iptr->sx.s23.s3.javaindex;
-                                       if (i != UNUSED) {
-                                               if (iptr->flags.bits & INS_FLAG_RETADDR)
-                                                       javalocals[i] = iptr->sx.s23.s2.retaddrnr;
-                                               else
-                                                       javalocals[i] = j;
-                                               if (iptr->flags.bits & INS_FLAG_KILL_PREV)
-                                                       javalocals[i-1] = UNUSED;
-                                               if (iptr->flags.bits & INS_FLAG_KILL_NEXT)
-                                                       javalocals[i+1] = UNUSED;
-                                       }
+                                       stack_javalocals_store(iptr, javalocals);
                                        break;
 
                                case ICMD_IRETURN:
@@ -475,31 +663,32 @@ bool replace_create_replacement_points(jitdata *jd)
                                        break;
 
                                case ICMD_INLINE_START:
-                                       calleeinfo = iptr->sx.s23.s3.inlineinfo;
+                                       iinfo = replace_create_inline_start_replacement_point(
+                                                               jd, rp++, iptr, &ra, javalocals);
+                                       m = iinfo->method;
+                                       /* javalocals may be set at next block start, or now */
+                                       COPY_OR_CLEAR_javalocals(javalocals, iinfo->javalocals_start, m);
+                                       break;
 
-                                       calleeinfo->rp = rp;
-                                       replace_create_replacement_point(jd, iinfo, rp++,
-                                                       RPLPOINT_TYPE_INLINE, iptr, &ra,
-                                                       javalocals,
-                                                       calleeinfo->stackvars, calleeinfo->stackvarscount,
-                                                       calleeinfo->paramcount);
-
-                                       if (calleeinfo->synclocal != UNUSED) {
-                                               ra->index = RPLALLOC_SYNC;
-                                               ra->regoff = jd->var[calleeinfo->synclocal].vv.regoff;
-                                               ra->flags  = jd->var[calleeinfo->synclocal].flags & INMEMORY;
-                                               ra->type = TYPE_ADR;
-                                               ra++;
-                                               rp[-1].regalloccount++;
-                                       }
+                               case ICMD_INLINE_BODY:
+                                       assert(iinfo == iptr->sx.s23.s3.inlineinfo);
 
-                                       iinfo = calleeinfo;
-                                       m = iinfo->method;
-                                       if (iinfo->javalocals_start)
-                                               MCOPY(javalocals, iinfo->javalocals_start, s4, m->maxlocals);
+                                       jl = iinfo->javalocals_start;
+                                       if (jl == NULL) {
+                                               /* get the javalocals from the following block start */
+                                               assert(bptr->next);
+                                               jl = bptr->next->javalocals;
+                                       }
+                                       /* create a non-trappable rplpoint */
+                                       replace_create_replacement_point(jd, iinfo, rp++,
+                                                       RPLPOINT_TYPE_BODY, iptr, &ra,
+                                                       jl, NULL, 0, 0);
+                                       rp[-1].flags |= RPLPOINT_FLAG_NOTRAP;
                                        break;
 
                                case ICMD_INLINE_END:
+                                       assert(iinfo == iptr->sx.s23.s3.inlineinfo ||
+                                                  iinfo == iptr->sx.s23.s3.inlineinfo->parent);
                                        iinfo = iptr->sx.s23.s3.inlineinfo;
                                        m = iinfo->outer;
                                        if (iinfo->javalocals_end)
@@ -522,9 +711,15 @@ bool replace_create_replacement_points(jitdata *jd)
        code->globalcount   = 0;
        code->savedintcount = INT_SAV_CNT - rd->savintreguse;
        code->savedfltcount = FLT_SAV_CNT - rd->savfltreguse;
+#if defined(HAS_ADDRESS_REGISTER_FILE)
+       code->savedadrcount = ADR_SAV_CNT - rd->savadrreguse;
+#endif
        code->memuse        = rd->memuse;
        code->stackframesize = jd->cd->stackframesize;
 
+       REPLACE_COUNT_DIST(stat_dist_method_rplpoints, count);
+       REPLACE_COUNT_INC(stat_regallocs, alloccount);
+
        /* everything alright */
 
        return true;
@@ -558,66 +753,171 @@ void replace_free_replacement_points(codeinfo *code)
 }
 
 
-/* replace_activate_replacement_point ******************************************
+/******************************************************************************/
+/* PART II: Activating / deactivating replacement points                      */
+/******************************************************************************/
+
+
+/* replace_activate_replacement_points *****************************************
  
-   Activate a replacement point. When this function returns, the
-   replacement point is "armed", that is each thread reaching this point
-   will be replace to `target`.
+   Activate the replacement points of the given compilation unit. When this
+   function returns, the replacement points are "armed", so each thread
+   reaching one of the points will enter the replacement mechanism.
    
    IN:
-       rp...............replacement point to activate
-          target...........target of replacement
+       code.............codeinfo of which replacement points should be
+                                               activated
+          mappable.........if true, only mappable replacement points are
+                                               activated
   
 *******************************************************************************/
 
-void replace_activate_replacement_point(rplpoint *rp,rplpoint *target)
+void replace_activate_replacement_points(codeinfo *code, bool mappable)
 {
-       assert(rp->target == NULL);
+       rplpoint *rp;
+       s4        i;
+       s4        count;
+       s4        index;
+       u1       *savedmcode;
+
+       assert(code->savedmcode == NULL);
+
+       /* count trappable replacement points */
+
+       count = 0;
+       index = 0;
+       i = code->rplpointcount;
+       rp = code->rplpoints;
+       for (; i--; rp++) {
+               if (rp->flags & RPLPOINT_FLAG_NOTRAP)
+                       continue;
+
+               index++;
+
+               if (mappable && (rp->type == RPLPOINT_TYPE_RETURN))
+                       continue;
+
+               count++;
+       }
+
+       /* allocate buffer for saved machine code */
+
+       savedmcode = MNEW(u1, count * REPLACEMENT_PATCH_SIZE);
+       code->savedmcode = savedmcode;
+       savedmcode += count * REPLACEMENT_PATCH_SIZE;
+
+       /* activate trappable replacement points */
+       /* (in reverse order to handle overlapping points within basic blocks) */
 
-       DOLOG( printf("activate replacement point:\n");
-                  replace_replacement_point_println(rp, 1); fflush(stdout); );
+       i = code->rplpointcount;
+       rp = code->rplpoints + i;
+       while (rp--, i--) {
+               assert(!(rp->flags & RPLPOINT_FLAG_ACTIVE));
+
+               if (rp->flags & RPLPOINT_FLAG_NOTRAP)
+                       continue;
+
+               index--;
 
-       rp->target = target;
+               if (mappable && (rp->type == RPLPOINT_TYPE_RETURN))
+                       continue;
+
+               DOLOG( printf("activate replacement point:\n");
+                          replace_replacement_point_println(rp, 1); fflush(stdout); );
 
-#if (defined(__I386__) || defined(__X86_64__) || defined(__ALPHA__) || defined(__POWERPC__) || defined(__MIPS__)) && defined(ENABLE_JIT)
-       md_patch_replacement_point(rp);
+               savedmcode -= REPLACEMENT_PATCH_SIZE;
+
+#if (defined(__I386__) || defined(__X86_64__) || defined(__ALPHA__) || defined(__POWERPC__) || defined(__MIPS__) || defined(__S390__)) && defined(ENABLE_JIT)
+               md_patch_replacement_point(code, index, rp, savedmcode);
 #endif
+               rp->flags |= RPLPOINT_FLAG_ACTIVE;
+       }
+
+       assert(savedmcode == code->savedmcode);
 }
 
 
-/* replace_deactivate_replacement_point ****************************************
+/* replace_deactivate_replacement_point***************************************
  
-   Deactivate a replacement point. When this function returns, the
-   replacement point is "un-armed", that is a each thread reaching this point
-   will just continue normally.
+   Deactivate a replacement points in the given compilation unit.
+   When this function returns, the replacement points will be "un-armed",
+   that is a each thread reaching a point will just continue normally.
    
    IN:
-       rp...............replacement point to deactivate
+       code.............the compilation unit
   
 *******************************************************************************/
 
-void replace_deactivate_replacement_point(rplpoint *rp)
+void replace_deactivate_replacement_points(codeinfo *code)
 {
-       assert(rp->target);
+       rplpoint *rp;
+       s4        i;
+       s4        count;
+       u1       *savedmcode;
+
+       if (code->savedmcode == NULL) {
+               /* disarm countdown points by patching the branches */
+
+               i = code->rplpointcount;
+               rp = code->rplpoints;
+               for (; i--; rp++) {
+                       if ((rp->flags & (RPLPOINT_FLAG_ACTIVE | RPLPOINT_FLAG_COUNTDOWN))
+                                       == RPLPOINT_FLAG_COUNTDOWN)
+                       {
+#if 0
+                               *(s4*) (rp->pc + 9) = 0; /* XXX machine dependent! */
+#endif
+                       }
+               }
+               return;
+       }
 
-       DOLOG( printf("deactivate replacement point:\n");
-                  replace_replacement_point_println(rp, 1); fflush(stdout); );
+       assert(code->savedmcode != NULL);
+       savedmcode = code->savedmcode;
 
-       rp->target = NULL;
+       /* de-activate each trappable replacement point */
 
-#if (defined(__I386__) || defined(__X86_64__) || defined(__ALPHA__) || defined(__POWERPC__) || defined(__MIPS__)) && defined(ENABLE_JIT)
-       md_patch_replacement_point(rp);
+       i = code->rplpointcount;
+       rp = code->rplpoints;
+       count = 0;
+       for (; i--; rp++) {
+               if (!(rp->flags & RPLPOINT_FLAG_ACTIVE))
+                       continue;
+
+               count++;
+
+               DOLOG( printf("deactivate replacement point:\n");
+                          replace_replacement_point_println(rp, 1); fflush(stdout); );
+
+#if (defined(__I386__) || defined(__X86_64__) || defined(__ALPHA__) || defined(__POWERPC__) || defined(__MIPS__) || defined(__S390__)) && defined(ENABLE_JIT)
+               md_patch_replacement_point(code, -1, rp, savedmcode);
 #endif
+
+               rp->flags &= ~RPLPOINT_FLAG_ACTIVE;
+
+               savedmcode += REPLACEMENT_PATCH_SIZE;
+       }
+
+       assert(savedmcode == code->savedmcode + count * REPLACEMENT_PATCH_SIZE);
+
+       /* free saved machine code */
+
+       MFREE(code->savedmcode, u1, count * REPLACEMENT_PATCH_SIZE);
+       code->savedmcode = NULL;
 }
 
 
+/******************************************************************************/
+/* PART III: The replacement mechanism                                        */
+/******************************************************************************/
+
+
 /* replace_read_value **********************************************************
 
    Read a value with the given allocation from the execution state.
    
    IN:
           es...............execution state
-          sp...............stack pointer of the execution state (XXX eliminate?)
           ra...............allocation
           javaval..........where to put the value
 
@@ -627,19 +927,18 @@ void replace_deactivate_replacement_point(rplpoint *rp)
 *******************************************************************************/
 
 static void replace_read_value(executionstate_t *es,
-                                                          stackslot_t *sp,
                                                           rplalloc *ra,
-                                                          u8 *javaval)
+                                                          replace_val_t *javaval)
 {
        if (ra->flags & INMEMORY) {
                /* XXX HAS_4BYTE_STACKSLOT may not be the right discriminant here */
 #ifdef HAS_4BYTE_STACKSLOT
                if (IS_2_WORD_TYPE(ra->type)) {
-                       *javaval = *(u8*)(sp + ra->regoff);
+                       javaval->l = *(u8*)(es->sp + ra->regoff);
                }
                else {
 #endif
-                       *javaval = sp[ra->regoff];
+                       javaval->p = *(ptrint*)(es->sp + ra->regoff);
 #ifdef HAS_4BYTE_STACKSLOT
                }
 #endif
@@ -647,10 +946,25 @@ static void replace_read_value(executionstate_t *es,
        else {
                /* allocated register */
                if (IS_FLT_DBL_TYPE(ra->type)) {
-                       *javaval = es->fltregs[ra->regoff];
+                       javaval->d = es->fltregs[ra->regoff];
+
+                       if (ra->type == TYPE_FLT)
+                               javaval->f = javaval->d;
                }
+#if defined(HAS_ADDRESS_REGISTER_FILE)
+               else if (IS_ADR_TYPE(ra->type)) {
+                       javaval->p = es->adrregs[ra->regoff];
+               }
+#endif
                else {
-                       *javaval = es->intregs[ra->regoff];
+#if defined(SUPPORT_COMBINE_INTEGER_REGISTERS)
+                       if (ra->type == TYPE_LNG) {
+                               javaval->words.lo = es->intregs[GET_LOW_REG(ra->regoff)];
+                               javaval->words.hi = es->intregs[GET_HIGH_REG(ra->regoff)];
+                       }
+                       else
+#endif /* defined(SUPPORT_COMBINE_INTEGER_REGISTERS) */
+                               javaval->p = es->intregs[ra->regoff];
                }
        }
 }
@@ -662,59 +976,116 @@ static void replace_read_value(executionstate_t *es,
    
    IN:
           es...............execution state
-          sp...............stack pointer of the execution state (XXX eliminate?)
           ra...............allocation
           *javaval.........the value
 
 *******************************************************************************/
 
 static void replace_write_value(executionstate_t *es,
-                                                           stackslot_t *sp,
                                                            rplalloc *ra,
-                                                           u8 *javaval)
+                                                           replace_val_t *javaval)
 {
        if (ra->flags & INMEMORY) {
                /* XXX HAS_4BYTE_STACKSLOT may not be the right discriminant here */
 #ifdef HAS_4BYTE_STACKSLOT
                if (IS_2_WORD_TYPE(ra->type)) {
-                       *(u8*)(sp + ra->regoff) = *javaval;
+                       *(u8*)(es->sp + ra->regoff) = javaval->l;
                }
                else {
 #endif
-                       sp[ra->regoff] = *javaval;
+                       *(ptrint*)(es->sp + ra->regoff) = javaval->p;
 #ifdef HAS_4BYTE_STACKSLOT
                }
 #endif
        }
        else {
                /* allocated register */
-               if (IS_FLT_DBL_TYPE(ra->type)) {
-                       es->fltregs[ra->regoff] = *javaval;
-               }
-               else {
-                       es->intregs[ra->regoff] = *javaval;
+               switch (ra->type) {
+                       case TYPE_FLT:
+                               es->fltregs[ra->regoff] = (double) javaval->f;
+                               break;
+                       case TYPE_DBL:
+                               es->fltregs[ra->regoff] = javaval->d;
+                               break;
+#if defined(SUPPORT_COMBINE_INTEGER_REGISTERS)
+                       case TYPE_LNG:
+                               es->intregs[GET_LOW_REG(ra->regoff)] = javaval->words.lo;
+                               es->intregs[GET_HIGH_REG(ra->regoff)] = javaval->words.hi;
+                               break;
+#endif
+#if defined(HAS_ADDRESS_REGISTER_FILE)
+                       case TYPE_ADR:
+                               es->adrregs[ra->regoff] = javaval->p;
+#endif
+                       default:
+                               es->intregs[ra->regoff] = javaval->p;
                }
        }
 }
 
 
-/* replace_read_executionstate *************************************************
+/* replace_new_sourceframe *****************************************************
 
-   Read the given executions state and translate it to a source frame.
+   Allocate a new source frame and insert it at the front of the frame list.
    
+   IN:
+          ss...............the source state
+
+   OUT:
+          ss->frames.......set to new frame (the new head of the frame list).
+
+   RETURN VALUE:
+       returns the new frame
+
+*******************************************************************************/
+
+static sourceframe_t *replace_new_sourceframe(sourcestate_t *ss)
+{
+       sourceframe_t *frame;
+
+       frame = DNEW(sourceframe_t);
+       MZERO(frame, sourceframe_t, 1);
+
+       frame->down = ss->frames;
+       ss->frames = frame;
+
+       return frame;
+}
+
+
+/* replace_read_executionstate *************************************************
+
+   Read a source frame from the given executions state.
+   The new source frame is pushed to the front of the frame list of the
+   source state.
+
    IN:
        rp...............replacement point at which `es` was taken
           es...............execution state
-          ss...............where to put the source state
+          ss...............the source state to add the source frame to
+          topframe.........true, if the first (top-most) source frame on the
+                           stack is to be read
 
    OUT:
-       *ss..............the source state derived from the execution state
+       *ss..............the source state with the newly created source frame
+                           added
   
 *******************************************************************************/
 
+static s4 replace_normalize_type_map[] = {
+/* RPLPOINT_TYPE_STD    |--> */ RPLPOINT_TYPE_STD,
+/* RPLPOINT_TYPE_EXH    |--> */ RPLPOINT_TYPE_STD,
+/* RPLPOINT_TYPE_SBR    |--> */ RPLPOINT_TYPE_STD,
+/* RPLPOINT_TYPE_CALL   |--> */ RPLPOINT_TYPE_CALL,
+/* RPLPOINT_TYPE_INLINE |--> */ RPLPOINT_TYPE_CALL,
+/* RPLPOINT_TYPE_RETURN |--> */ RPLPOINT_TYPE_RETURN,
+/* RPLPOINT_TYPE_BODY   |--> */ RPLPOINT_TYPE_STD
+};
+
+
 static void replace_read_executionstate(rplpoint *rp,
                                                                                executionstate_t *es,
-                                                                               sourcestate_t *ss,
+                                                                               sourcestate_t *ss,
                                                                                bool topframe)
 {
        methodinfo    *m;
@@ -727,7 +1098,7 @@ static void replace_read_executionstate(rplpoint *rp,
        stackslot_t   *sp;
        stackslot_t   *basesp;
 
-       code = rp->code;
+       code = code_find_codeinfo_for_pc(rp->pc);
        m = rp->method;
        topslot = TOP_IS_NORMAL;
 
@@ -735,63 +1106,47 @@ static void replace_read_executionstate(rplpoint *rp,
 
        sp = (stackslot_t *) es->sp;
 
-       /* on some architectures the returnAddress is passed on the stack by JSR */
-
-#if defined(__I386__) || defined(__X86_64__)
-       if (rp->type == BBTYPE_SBR) {
-               sp++;
-               topslot = TOP_IS_ON_STACK; /* XXX */
-       }
-#endif
-
        /* in some cases the top stack slot is passed in REG_ITMP1 */
 
-       if (  (rp->type == BBTYPE_EXH)
-#if defined(__ALPHA__) || defined(__POWERPC__) || defined(__MIPS__)
-          || (rp->type == BBTYPE_SBR) /* XXX */
-#endif
-          )
-       {
+       if (rp->type == BBTYPE_EXH) {
                topslot = TOP_IS_IN_ITMP1;
        }
 
        /* calculate base stack pointer */
 
-       basesp = sp + code_get_stack_frame_size(code);
+       basesp = sp + code->stackframesize;
 
        /* create the source frame */
 
-       frame = DNEW(sourceframe_t);
-       frame->up = ss->frames;
+       frame = replace_new_sourceframe(ss);
        frame->method = rp->method;
        frame->id = rp->id;
-       frame->syncslotcount = 0;
-       frame->syncslots = NULL;
-#if !defined(NDEBUG)
-       frame->debug_rp = rp;
-#endif
-
-       ss->frames = frame;
+       assert(rp->type >= 0 && rp->type < sizeof(replace_normalize_type_map)/sizeof(s4));
+       frame->type = replace_normalize_type_map[rp->type];
+       frame->fromrp = rp;
+       frame->fromcode = code;
 
        /* read local variables */
 
        count = m->maxlocals;
        frame->javalocalcount = count;
-       frame->javalocals = DMNEW(u8, count);
+       frame->javalocals = DMNEW(replace_val_t, count);
        frame->javalocaltype = DMNEW(u1, count);
 
-#if !defined(NDEBUG)
        /* mark values as undefined */
        for (i=0; i<count; ++i) {
-               frame->javalocals[i] = (u8) 0x00dead0000dead00ULL;
+#if !defined(NDEBUG)
+               frame->javalocals[i].l = (u8) 0x00dead0000dead00ULL;
+#endif
                frame->javalocaltype[i] = TYPE_VOID;
        }
 
        /* some entries in the intregs array are not meaningful */
        /*es->intregs[REG_ITMP3] = (u8) 0x11dead1111dead11ULL;*/
-       es->intregs[REG_SP   ] = (u8) 0x11dead1111dead11ULL;
+#if !defined(NDEBUG)
+       es->intregs[REG_SP   ] = (ptrint) 0x11dead1111dead11ULL;
 #ifdef REG_PV
-       es->intregs[REG_PV   ] = (u8) 0x11dead1111dead11ULL;
+       es->intregs[REG_PV   ] = (ptrint) 0x11dead1111dead11ULL;
 #endif
 #endif /* !defined(NDEBUG) */
 
@@ -804,23 +1159,53 @@ static void replace_read_executionstate(rplpoint *rp,
                assert(i < m->maxlocals);
                frame->javalocaltype[i] = ra->type;
                if (ra->type == TYPE_RET)
-                       frame->javalocals[i] = ra->regoff;
+                       frame->javalocals[i].i = ra->regoff;
                else
-                       replace_read_value(es, sp, ra, frame->javalocals + i);
+                       replace_read_value(es, ra, frame->javalocals + i);
                ra++;
                count--;
        }
 
+       /* read instance, if this is the first rplpoint */
+
+#if defined(REPLACE_PATCH_DYNAMIC_CALL)
+       if (topframe && !(rp->method->flags & ACC_STATIC) && rp == code->rplpoints) {
+#if 1
+               /* we are at the start of the method body, so if local 0 is set, */
+               /* it is the instance.                                           */
+               if (frame->javalocaltype[0] == TYPE_ADR)
+                       frame->instance = frame->javalocals[0];
+#else
+               rplalloc instra;
+               methoddesc *md;
+
+               md = rp->method->parseddesc;
+               assert(md->params);
+               assert(md->paramcount >= 1);
+               instra.type = TYPE_ADR;
+               instra.regoff = md->params[0].regoff;
+               if (md->params[0].inmemory) {
+                       instra.flags = INMEMORY;
+                       instra.regoff += (1 + code->stackframesize) * SIZE_OF_STACKSLOT;
+               }
+               else {
+                       instra.flags = 0;
+               }
+               replace_read_value(es, &instra, &(frame->instance));
+#endif
+       }
+#endif /* defined(REPLACE_PATCH_DYNAMIC_CALL) */
+
        /* read stack slots */
 
        frame->javastackdepth = count;
-       frame->javastack = DMNEW(u8, count);
+       frame->javastack = DMNEW(replace_val_t, count);
        frame->javastacktype = DMNEW(u1, count);
 
 #if !defined(NDEBUG)
        /* mark values as undefined */
        for (i=0; i<count; ++i) {
-               frame->javastack[i] = (u8) 0x00dead0000dead00ULL;
+               frame->javastack[i].l = (u8) 0x00dead0000dead00ULL;
                frame->javastacktype[i] = TYPE_VOID;
        }
 #endif /* !defined(NDEBUG) */
@@ -833,7 +1218,8 @@ static void replace_read_executionstate(rplpoint *rp,
                assert(count);
 
                assert(ra->index == RPLALLOC_STACK);
-               frame->javastack[i] = sp[-1];
+               assert(ra->type == TYPE_ADR);
+               frame->javastack[i].p = sp[-1];
                frame->javastacktype[i] = TYPE_ADR; /* XXX RET */
                count--;
                i++;
@@ -843,12 +1229,23 @@ static void replace_read_executionstate(rplpoint *rp,
                assert(count);
 
                assert(ra->index == RPLALLOC_STACK);
-               frame->javastack[i] = es->intregs[REG_ITMP1];
+               assert(ra->type == TYPE_ADR);
+               frame->javastack[i].p = es->intregs[REG_ITMP1];
                frame->javastacktype[i] = TYPE_ADR; /* XXX RET */
                count--;
                i++;
                ra++;
        }
+       else if (topslot == TOP_IS_VOID) {
+               assert(count);
+
+               assert(ra->index == RPLALLOC_STACK);
+               frame->javastack[i].l = 0;
+               frame->javastacktype[i] = TYPE_VOID;
+               count--;
+               i++;
+               ra++;
+       }
 
        /* read remaining stack slots */
 
@@ -859,14 +1256,14 @@ static void replace_read_executionstate(rplpoint *rp,
                        /* only read synchronization slots when traversing an inline point */
 
                        if (!topframe) {
-                               sourceframe_t *calleeframe = frame->up;
+                               sourceframe_t *calleeframe = frame->down;
                                assert(calleeframe);
                                assert(calleeframe->syncslotcount == 0);
                                assert(calleeframe->syncslots == NULL);
 
                                calleeframe->syncslotcount = 1;
-                               calleeframe->syncslots = DMNEW(u8, 1);
-                               replace_read_value(es,sp,ra,calleeframe->syncslots);
+                               calleeframe->syncslots = DMNEW(replace_val_t, 1);
+                               replace_read_value(es,ra,calleeframe->syncslots);
                        }
 
                        frame->javastackdepth--;
@@ -881,7 +1278,10 @@ static void replace_read_executionstate(rplpoint *rp,
                        frame->javastackdepth--;
                }
                else {
-                       replace_read_value(es,sp,ra,frame->javastack + i);
+                       if (ra->type == TYPE_RET)
+                               frame->javastack[i].i = ra->regoff;
+                       else
+                               replace_read_value(es,ra,frame->javastack + i);
                        frame->javastacktype[i] = ra->type;
                        i++;
                }
@@ -891,13 +1291,16 @@ static void replace_read_executionstate(rplpoint *rp,
 
 /* replace_write_executionstate ************************************************
 
-   Translate the given source state into an execution state.
-   
+   Pop a source frame from the front of the frame list of the given source state
+   and write its values into the execution state.
+
    IN:
        rp...............replacement point for which execution state should be
-                           creates
-          es...............where to put the execution state
+                           created
+          es...............the execution state to modify
           ss...............the given source state
+          topframe.........true, if this is the last (top-most) source frame to be
+                           translated
 
    OUT:
        *es..............the execution state derived from the source state
@@ -919,7 +1322,7 @@ static void replace_write_executionstate(rplpoint *rp,
        stackslot_t    *sp;
        stackslot_t    *basesp;
 
-       code = rp->code;
+       code = code_find_codeinfo_for_pc(rp->pc);
        m = rp->method;
        topslot = TOP_IS_NORMAL;
 
@@ -927,30 +1330,17 @@ static void replace_write_executionstate(rplpoint *rp,
 
        frame = ss->frames;
        assert(frame);
-       ss->frames = frame->up;
+       ss->frames = frame->down;
 
        /* calculate stack pointer */
 
        sp = (stackslot_t *) es->sp;
 
-       basesp = sp + code_get_stack_frame_size(code);
-
-       /* on some architectures the returnAddress is passed on the stack by JSR */
-
-#if defined(__I386__) || defined(__X86_64__)
-       if (rp->type == BBTYPE_SBR) {
-               topslot = TOP_IS_ON_STACK; /* XXX */
-       }
-#endif
+       basesp = sp + code->stackframesize;
 
        /* in some cases the top stack slot is passed in REG_ITMP1 */
 
-       if (  (rp->type == BBTYPE_EXH)
-#if defined(__ALPHA__) || defined(__POWERPC__) || defined(__MIPS__)
-          || (rp->type == BBTYPE_SBR) /* XXX */
-#endif
-          )
-       {
+       if (rp->type == BBTYPE_EXH) {
                topslot = TOP_IS_IN_ITMP1;
        }
 
@@ -967,7 +1357,7 @@ static void replace_write_executionstate(rplpoint *rp,
                        /* XXX assert that it matches this rplpoint */
                }
                else
-                       replace_write_value(es, sp, ra, frame->javalocals + i);
+                       replace_write_value(es, ra, frame->javalocals + i);
                count--;
                ra++;
        }
@@ -984,7 +1374,7 @@ static void replace_write_executionstate(rplpoint *rp,
                assert(ra->index == RPLALLOC_STACK);
                assert(i < frame->javastackdepth);
                assert(frame->javastacktype[i] == TYPE_ADR);
-               sp[-1] = frame->javastack[i];
+               sp[-1] = frame->javastack[i].p;
                count--;
                i++;
                ra++;
@@ -995,7 +1385,17 @@ static void replace_write_executionstate(rplpoint *rp,
                assert(ra->index == RPLALLOC_STACK);
                assert(i < frame->javastackdepth);
                assert(frame->javastacktype[i] == TYPE_ADR);
-               es->intregs[REG_ITMP1] = frame->javastack[i];
+               es->intregs[REG_ITMP1] = frame->javastack[i].p;
+               count--;
+               i++;
+               ra++;
+       }
+       else if (topslot == TOP_IS_VOID) {
+               assert(count);
+
+               assert(ra->index == RPLALLOC_STACK);
+               assert(i < frame->javastackdepth);
+               assert(frame->javastacktype[i] == TYPE_VOID);
                count--;
                i++;
                ra++;
@@ -1010,10 +1410,11 @@ static void replace_write_executionstate(rplpoint *rp,
                        /* only write synchronization slots when traversing an inline point */
 
                        if (!topframe) {
-                               assert(frame->syncslotcount == 1); /* XXX need to understand more cases */
-                               assert(frame->syncslots != NULL);
+                               assert(frame->down);
+                               assert(frame->down->syncslotcount == 1); /* XXX need to understand more cases */
+                               assert(frame->down->syncslots != NULL);
 
-                               replace_write_value(es,sp,ra,frame->syncslots);
+                               replace_write_value(es,ra,frame->down->syncslots);
                        }
                        continue;
                }
@@ -1028,7 +1429,12 @@ static void replace_write_executionstate(rplpoint *rp,
                else {
                        assert(i < frame->javastackdepth);
                        assert(ra->type == frame->javastacktype[i]);
-                       replace_write_value(es,sp,ra,frame->javastack + i);
+                       if (ra->type == TYPE_RET) {
+                               /* XXX assert that it matches this rplpoint */
+                       }
+                       else {
+                               replace_write_value(es,ra,frame->javastack + i);
+                       }
                        i++;
                }
        }
@@ -1055,8 +1461,8 @@ static void replace_write_executionstate(rplpoint *rp,
   
 *******************************************************************************/
 
-bool replace_pop_activation_record(executionstate_t *es,
-                                                                  sourceframe_t *frame)
+u1* replace_pop_activation_record(executionstate_t *es,
+                                                                 sourceframe_t *frame)
 {
        u1 *ra;
        u1 *pv;
@@ -1072,43 +1478,50 @@ bool replace_pop_activation_record(executionstate_t *es,
 
        /* read the return address */
 
-       ra = md_stacktrace_get_returnaddress(es->sp,
-                       SIZE_OF_STACKSLOT * es->code->stackframesize);
+#if defined(REPLACE_LEAFMETHODS_RA_REGISTER)
+       if (CODE_IS_LEAFMETHOD(es->code))
+               ra = (u1*) (ptrint) es->intregs[REPLACE_REG_RA];
+       else
+#endif
+               ra = md_stacktrace_get_returnaddress(es->sp,
+                               SIZE_OF_STACKSLOT * es->code->stackframesize);
 
        DOLOG( printf("return address: %p\n", (void*)ra); );
 
-       /* find the new codeinfo */
+       assert(ra);
 
-       pv = md_codegen_get_pv_from_pc(ra);
+       /* calculate the base of the stack frame */
 
-       DOLOG( printf("PV = %p\n", (void*) pv); );
+       sp = (stackslot_t *) es->sp;
+       basesp = sp + es->code->stackframesize;
 
-       if (pv == NULL)
-               return false;
-
-       code = *(codeinfo **)(pv + CodeinfoPointer);
-
-       DOLOG( printf("CODE = %p\n", (void*) code); );
-
-       if (code == NULL)
-               return false;
-
-       /* calculate the base of the stack frame */
-
-       sp = (stackslot_t *) es->sp;
-       basesp = sp + es->code->stackframesize;
-
-       /* read slots used for synchronization */
+       /* read slots used for synchronization */
 
        assert(frame->syncslotcount == 0);
        assert(frame->syncslots == NULL);
        count = code_get_sync_slot_count(es->code);
        frame->syncslotcount = count;
-       frame->syncslots = DMNEW(u8, count);
+       frame->syncslots = DMNEW(replace_val_t, count);
        for (i=0; i<count; ++i) {
-               frame->syncslots[i] = sp[es->code->memuse + i];
+               frame->syncslots[i].p = sp[es->code->memuse + i]; /* XXX */
        }
 
+       /* restore return address, if part of frame */
+
+#if defined(REPLACE_RA_TOP_OF_FRAME)
+#if defined(REPLACE_LEAFMETHODS_RA_REGISTER)
+       if (!CODE_IS_LEAFMETHOD(es->code))
+#endif
+               es->intregs[REPLACE_REG_RA] = *--basesp;
+#endif /* REPLACE_RA_TOP_OF_FRAME */
+
+#if defined(REPLACE_RA_LINKAGE_AREA)
+#if defined(REPLACE_LEAFMETHODS_RA_REGISTER)
+       if (!CODE_IS_LEAFMETHOD(es->code))
+#endif
+               es->intregs[REPLACE_REG_RA] = basesp[LA_LR_OFFSET / sizeof(stackslot_t)];
+#endif /* REPLACE_RA_LINKAGE_AREA */
+
        /* restore saved int registers */
 
        reg = INT_REG_CNT;
@@ -1126,207 +1539,1247 @@ bool replace_pop_activation_record(executionstate_t *es,
                while (nregdescfloat[--reg] != REG_SAV)
                        ;
                basesp -= STACK_SLOTS_PER_FLOAT;
-               es->fltregs[reg] = *(u8*)basesp;
+               es->fltregs[reg] = *(double*)basesp;
+       }
+
+#if defined(HAS_ADDRESS_REGISTER_FILE)
+       /* restore saved adr registers */
+
+       reg = ADR_REG_CNT;
+       for (i=0; i<es->code->savedadrcount; ++i) {
+               while (nregdescadr[--reg] != REG_SAV)
+                       ;
+               es->adrregs[reg] = *--basesp;
        }
+#endif
+
+       /* adjust the stackpointer */
+
+       es->sp += SIZE_OF_STACKSLOT * es->code->stackframesize;
+
+#if defined(REPLACE_RA_BETWEEN_FRAMES)
+       es->sp += SIZE_OF_STACKSLOT; /* skip return address */
+#endif
 
        /* Set the new pc. Subtract one so we do not hit the replacement point */
        /* of the instruction following the call, if there is one.             */
 
        es->pc = ra - 1;
 
-       /* adjust the stackpointer */
+       /* find the new codeinfo */
+
+       pv = md_codegen_get_pv_from_pc(ra);
+
+       DOLOG( printf("PV = %p\n", (void*) pv); );
+
+       if (pv == NULL) /* XXX can this really happen? */
+               return NULL;
+
+       code = *(codeinfo **)(pv + CodeinfoPointer);
+
+       DOLOG( printf("CODE = %p\n", (void*) code); );
+
+       /* return NULL if we reached native code */
+
+       es->pv = pv;
+       es->code = code;
+
+       /* in debugging mode clobber non-saved registers */
+
+#if !defined(NDEBUG)
+       /* for debugging */
+       for (i=0; i<INT_REG_CNT; ++i)
+               if ((nregdescint[i] != REG_SAV)
+#if defined(REG_RA)
+                               && (i != REPLACE_REG_RA)
+#endif
+                       )
+                       es->intregs[i] = (ptrint) 0x33dead3333dead33ULL;
+       for (i=0; i<FLT_REG_CNT; ++i)
+               if (nregdescfloat[i] != REG_SAV)
+                       *(u8*)&(es->fltregs[i]) = 0x33dead3333dead33ULL;
+# if defined(HAS_ADDRESS_REGISTER_FILE)
+       for (i=0; i<ADR_REG_CNT; ++i)
+               if (nregdescadr[i] != REG_SAV)
+                       es->adrregs[i] = (ptrint) 0x33dead3333dead33ULL;
+# endif
+#endif /* !defined(NDEBUG) */
+
+       return (code) ? ra : NULL;
+}
+
+
+/* replace_patch_method_pointer ************************************************
+
+   Patch a method pointer (may be in code, data segment, vftbl, or interface
+   table).
+
+   IN:
+          mpp..............address of the method pointer to patch
+          entrypoint.......the new entrypoint of the method
+          kind.............kind of call to patch, used only for debugging
+
+*******************************************************************************/
+
+static void replace_patch_method_pointer(methodptr *mpp,
+                                                                                methodptr entrypoint,
+                                                                                const char *kind)
+{
+#if !defined(NDEBUG)
+       codeinfo       *oldcode;
+       codeinfo       *newcode;
+#endif
+
+       DOLOG( printf("patch method pointer from: %p to %p\n",
+                                 (void*) *mpp, (void*)entrypoint); );
+
+#if !defined(NDEBUG)
+       oldcode = *(codeinfo **)((u1*)(*mpp) + CodeinfoPointer);
+       newcode = *(codeinfo **)((u1*)(entrypoint) + CodeinfoPointer);
+
+       DOLOG_SHORT( printf("\tpatch %s %p ", kind, (void*) oldcode);
+                                method_println(oldcode->m);
+                                printf("\t      with      %p ", (void*) newcode);
+                                method_println(newcode->m); );
+
+       assert(oldcode->m == newcode->m);
+#endif
+
+       /* write the new entrypoint */
+
+       *mpp = (methodptr) entrypoint;
+}
+
+
+/* replace_patch_class *********************************************************
+
+   Patch a method in the given class.
+
+   IN:
+          vftbl............vftbl of the class
+          m................the method to patch
+          oldentrypoint....the old entrypoint to replace
+          entrypoint.......the new entrypoint
+
+*******************************************************************************/
+
+void replace_patch_class(vftbl_t *vftbl,
+                                                methodinfo *m,
+                                                u1 *oldentrypoint,
+                                                u1 *entrypoint)
+{
+       s4                 i;
+       methodptr         *mpp;
+       methodptr         *mppend;
+
+       /* patch the vftbl of the class */
+
+       replace_patch_method_pointer(vftbl->table + m->vftblindex,
+                                                                entrypoint,
+                                                                "virtual  ");
+
+       /* patch the interface tables */
+
+       assert(oldentrypoint);
+
+       for (i=0; i < vftbl->interfacetablelength; ++i) {
+               mpp = vftbl->interfacetable[-i];
+               mppend = mpp + vftbl->interfacevftbllength[i];
+               for (; mpp != mppend; ++mpp)
+                       if (*mpp == oldentrypoint) {
+                               replace_patch_method_pointer(mpp, entrypoint, "interface");
+                       }
+       }
+}
+
+
+/* replace_patch_class_hierarchy ***********************************************
+
+   Patch a method in all loaded classes.
+
+   IN:
+          m................the method to patch
+          oldentrypoint....the old entrypoint to replace
+          entrypoint.......the new entrypoint
+
+*******************************************************************************/
+
+struct replace_patch_data_t {
+       methodinfo *m;
+       u1         *oldentrypoint;
+       u1         *entrypoint;
+};
+
+#define CODEINFO_OF_CODE(entrypoint) \
+       (*(codeinfo **)((u1*)(entrypoint) + CodeinfoPointer))
+
+#define METHOD_OF_CODE(entrypoint) \
+       (CODEINFO_OF_CODE(entrypoint)->m)
+
+void replace_patch_callback(classinfo *c, struct replace_patch_data_t *pd)
+{
+       vftbl_t *vftbl = c->vftbl;
+
+       if (vftbl != NULL
+               && vftbl->vftbllength > pd->m->vftblindex
+               && vftbl->table[pd->m->vftblindex] != &asm_abstractmethoderror
+               && METHOD_OF_CODE(vftbl->table[pd->m->vftblindex]) == pd->m)
+       {
+               replace_patch_class(c->vftbl, pd->m, pd->oldentrypoint, pd->entrypoint);
+       }
+}
+
+void replace_patch_class_hierarchy(methodinfo *m,
+                                                                  u1 *oldentrypoint,
+                                                                  u1 *entrypoint)
+{
+       struct replace_patch_data_t pd;
+
+       pd.m = m;
+       pd.oldentrypoint = oldentrypoint;
+       pd.entrypoint = entrypoint;
+
+       DOLOG_SHORT( printf("patching class hierarchy: ");
+                            method_println(m); );
+
+       classcache_foreach_loaded_class(
+                       (classcache_foreach_functionptr_t) &replace_patch_callback,
+                       (void*) &pd);
+}
+
+
+/* replace_patch_future_calls **************************************************
+
+   Analyse a call site and depending on the kind of call patch the call, the
+   virtual function table, or the interface table.
+
+   IN:
+          ra...............return address pointing after the call site
+          callerframe......source frame of the caller
+          calleeframe......source frame of the callee, must have been mapped
+
+*******************************************************************************/
+
+void replace_patch_future_calls(u1 *ra,
+                                                               sourceframe_t *callerframe,
+                                                               sourceframe_t *calleeframe)
+{
+       u1             *patchpos;
+       methodptr       entrypoint;
+       methodptr       oldentrypoint;
+       bool            atentry;
+       stackframeinfo  sfi;
+       codeinfo       *calleecode;
+       methodinfo     *calleem;
+       java_object_t  *obj;
+       vftbl_t        *vftbl;
+
+       assert(ra);
+       assert(callerframe->down == calleeframe);
+
+       /* get the new codeinfo and the method that shall be entered */
+
+       calleecode = calleeframe->tocode;
+       assert(calleecode);
+
+       calleem = calleeframe->method;
+       assert(calleem == calleecode->m);
+
+       entrypoint = (methodptr) calleecode->entrypoint;
+
+       /* check if we are at an method entry rplpoint at the innermost frame */
+
+       atentry = (calleeframe->down == NULL)
+                       && !(calleem->flags & ACC_STATIC)
+                       && (calleeframe->fromrp->id == 0); /* XXX */
+
+       /* get the position to patch, in case it was a statically bound call   */
+
+       sfi.pv = callerframe->fromcode->entrypoint;
+       patchpos = md_get_method_patch_address(ra, &sfi, NULL);
+
+       if (patchpos == NULL) {
+               /* the call was dispatched dynamically */
+
+               /* we can only patch such calls if we are at the entry point */
+
+               if (!atentry)
+                       return;
+
+               assert((calleem->flags & ACC_STATIC) == 0);
+
+               oldentrypoint = calleeframe->fromcode->entrypoint;
+
+               /* we need to know the instance */
+
+               if (!calleeframe->instance.a) {
+                       DOLOG_SHORT( printf("WARNING: object instance unknown!\n"); );
+                       replace_patch_class_hierarchy(calleem, oldentrypoint, entrypoint);
+                       return;
+               }
+
+               /* get the vftbl */
+
+               obj = calleeframe->instance.a;
+               vftbl = obj->vftbl;
+
+               assert(vftbl->class->vftbl == vftbl);
+
+               DOLOG_SHORT( printf("\tclass: "); class_println(vftbl->class); );
+
+               replace_patch_class(vftbl, calleem, oldentrypoint, entrypoint);
+       }
+       else {
+               /* the call was statically bound */
+
+               replace_patch_method_pointer((methodptr *) patchpos, entrypoint, "static   ");
+       }
+}
+
+
+/* replace_push_activation_record **********************************************
+
+   Push a stack frame onto the execution state.
+   
+   *** This function imitates the effects of a call and the ***
+   *** method prolog of the callee.                         ***
+
+   IN:
+          es...............execution state
+          rpcall...........the replacement point at the call site
+          callerframe......source frame of the caller, or NULL for creating the
+                           first frame
+          calleeframe......source frame of the callee, must have been mapped
+
+   OUT:
+       *es..............the execution state after pushing the stack frame
+  
+*******************************************************************************/
+
+void replace_push_activation_record(executionstate_t *es,
+                                                                       rplpoint *rpcall,
+                                                                       sourceframe_t *callerframe,
+                                                                       sourceframe_t *calleeframe)
+{
+       s4           reg;
+       s4           i;
+       s4           count;
+       stackslot_t *basesp;
+       stackslot_t *sp;
+       u1          *ra;
+       codeinfo    *calleecode;
+
+       assert(es);
+       assert(!rpcall || callerframe);
+    assert(!rpcall || rpcall->type == RPLPOINT_TYPE_CALL);
+       assert(!rpcall || rpcall == callerframe->torp);
+       assert(calleeframe);
+       assert(!callerframe || calleeframe == callerframe->down);
+
+       /* the compilation unit we are entering */
+
+       calleecode = calleeframe->tocode;
+       assert(calleecode);
+
+       /* calculate the return address */
+
+       if (rpcall)
+               ra = rpcall->pc + rpcall->callsize;
+       else
+               ra = es->pc + 1 /* XXX this is ugly */;
+
+       /* write the return address */
+
+#if defined(REPLACE_RA_BETWEEN_FRAMES)
+       es->sp -= SIZE_OF_STACKSLOT;
+
+       *((stackslot_t *)es->sp) = (stackslot_t) ra;
+#endif /* REPLACE_RA_BETWEEN_FRAMES */
+
+#if defined(REPLACE_REG_RA)
+       es->intregs[REPLACE_REG_RA] = (ptrint) ra;
+#endif
+
+       /* we move into a new code unit */
+
+       es->code = calleecode;
+
+       /* set the new pc XXX not needed? */
+
+       es->pc = calleecode->entrypoint;
+
+       /* build the stackframe */
+
+       DOLOG( printf("building stackframe of %d words at %p\n",
+                                 calleecode->stackframesize, (void*)es->sp); );
+
+       sp = (stackslot_t *) es->sp;
+       basesp = sp;
+
+       sp -= calleecode->stackframesize;
+       es->sp = (u1*) sp;
+
+       /* in debug mode, invalidate stack frame first */
+
+       /* XXX may not invalidate linkage area used by native code! */
+#if !defined(NDEBUG) && 0
+       for (i=0; i<(basesp - sp); ++i) {
+               sp[i] = 0xdeaddeadU;
+       }
+#endif
+
+       /* save the return address register */
+
+#if defined(REPLACE_RA_TOP_OF_FRAME)
+#if defined(REPLACE_LEAFMETHODS_RA_REGISTER)
+       if (!CODE_IS_LEAFMETHOD(calleecode))
+#endif
+               *--basesp = (ptrint) ra;
+#endif /* REPLACE_RA_TOP_OF_FRAME */
+
+#if defined(REPLACE_RA_LINKAGE_AREA)
+#if defined(REPLACE_LEAFMETHODS_RA_REGISTER)
+       if (!CODE_IS_LEAFMETHOD(calleecode))
+#endif
+               basesp[LA_LR_OFFSET / sizeof(stackslot_t)] = (ptrint) ra;
+#endif /* REPLACE_RA_LINKAGE_AREA */
+
+       /* save int registers */
+
+       reg = INT_REG_CNT;
+       for (i=0; i<calleecode->savedintcount; ++i) {
+               while (nregdescint[--reg] != REG_SAV)
+                       ;
+               *--basesp = es->intregs[reg];
+
+               /* XXX may not clobber saved regs used by native code! */
+#if !defined(NDEBUG) && 0
+               es->intregs[reg] = (ptrint) 0x44dead4444dead44ULL;
+#endif
+       }
+
+       /* save flt registers */
+
+       /* XXX align? */
+       reg = FLT_REG_CNT;
+       for (i=0; i<calleecode->savedfltcount; ++i) {
+               while (nregdescfloat[--reg] != REG_SAV)
+                       ;
+               basesp -= STACK_SLOTS_PER_FLOAT;
+               *(double*)basesp = es->fltregs[reg];
+
+               /* XXX may not clobber saved regs used by native code! */
+#if !defined(NDEBUG) && 0
+               *(u8*)&(es->fltregs[reg]) = 0x44dead4444dead44ULL;
+#endif
+       }
+
+#if defined(HAS_ADDRESS_REGISTER_FILE)
+       /* save adr registers */
+
+       reg = ADR_REG_CNT;
+       for (i=0; i<calleecode->savedadrcount; ++i) {
+               while (nregdescadr[--reg] != REG_SAV)
+                       ;
+               *--basesp = es->adrregs[reg];
+
+               /* XXX may not clobber saved regs used by native code! */
+#if !defined(NDEBUG) && 0
+               es->adrregs[reg] = (ptrint) 0x44dead4444dead44ULL;
+#endif
+       }
+#endif
+
+       /* write slots used for synchronization */
+
+       count = code_get_sync_slot_count(calleecode);
+       assert(count == calleeframe->syncslotcount);
+       for (i=0; i<count; ++i) {
+               sp[calleecode->memuse + i] = calleeframe->syncslots[i].p;
+       }
+
+       /* set the PV */
+
+       es->pv = calleecode->entrypoint;
+
+       /* redirect future invocations */
+
+       if (callerframe && rpcall) {
+#if defined(REPLACE_PATCH_ALL)
+               if (rpcall->type == callerframe->fromrp->type)
+#else
+               if (rpcall == callerframe->fromrp)
+#endif
+                       replace_patch_future_calls(ra, callerframe, calleeframe);
+       }
+}
+
+
+/* replace_find_replacement_point **********************************************
+
+   Find the replacement point in the given code corresponding to the
+   position given in the source frame.
+   
+   IN:
+          code.............the codeinfo in which to search the rplpoint
+          frame............the source frame defining the position to look for
+          parent...........parent replacement point to match
+
+   RETURN VALUE:
+       the replacement point
+  
+*******************************************************************************/
+
+rplpoint * replace_find_replacement_point(codeinfo *code,
+                                                                                 sourceframe_t *frame,
+                                                                                 rplpoint *parent)
+{
+       methodinfo *m;
+       rplpoint *rp;
+       s4        i;
+       s4        j;
+       s4        stacki;
+       rplalloc *ra;
+
+       assert(code);
+       assert(frame);
+
+       DOLOG( printf("searching replacement point for:\n");
+                  replace_source_frame_println(frame); );
+
+       m = frame->method;
+
+       DOLOG( printf("code = %p\n", (void*)code); );
+
+       rp = code->rplpoints;
+       i = code->rplpointcount;
+       while (i--) {
+               if (rp->id == frame->id && rp->method == frame->method
+                               && rp->parent == parent
+                               && replace_normalize_type_map[rp->type] == frame->type)
+               {
+                       /* check if returnAddresses match */
+                       /* XXX optimize: only do this if JSRs in method */
+                       DOLOG( printf("checking match for:");
+                                  replace_replacement_point_println(rp, 1); fflush(stdout); );
+                       ra = rp->regalloc;
+                       stacki = 0;
+                       for (j = rp->regalloccount; j--; ++ra) {
+                               if (ra->type == TYPE_RET) {
+                                       if (ra->index == RPLALLOC_STACK) {
+                                               assert(stacki < frame->javastackdepth);
+                                               if (frame->javastack[stacki].i != ra->regoff)
+                                                       goto no_match;
+                                               stacki++;
+                                       }
+                                       else {
+                                               assert(ra->index >= 0 && ra->index < frame->javalocalcount);
+                                               if (frame->javalocals[ra->index].i != ra->regoff)
+                                                       goto no_match;
+                                       }
+                               }
+                       }
+
+                       /* found */
+                       return rp;
+               }
+no_match:
+               rp++;
+       }
+
+#if !defined(NDEBUG)
+       printf("candidate replacement points were:\n");
+       rp = code->rplpoints;
+       i = code->rplpointcount;
+       for (; i--; ++rp) {
+               replace_replacement_point_println(rp, 1);
+       }
+#endif
+
+       vm_abort("no matching replacement point found");
+       return NULL; /* NOT REACHED */
+}
+
+
+/* replace_find_replacement_point_for_pc ***************************************
+
+   Find the nearest replacement point at or before the given PC.
+
+   IN:
+       code.............compilation unit the PC is in
+          pc...............the machine code PC
+
+   RETURN VALUE:
+       the replacement point found, or
+          NULL if no replacement point was found
+
+*******************************************************************************/
+
+rplpoint *replace_find_replacement_point_for_pc(codeinfo *code, u1 *pc)
+{
+       rplpoint *found;
+       rplpoint *rp;
+       s4        i;
+
+       DOLOG( printf("searching for rp in %p ", (void*)code);
+                  method_println(code->m); );
+
+       found = NULL;
+
+       rp = code->rplpoints;
+       for (i=0; i<code->rplpointcount; ++i, ++rp) {
+               DOLOG( replace_replacement_point_println(rp, 2); );
+               if (rp->pc <= pc)
+                       found = rp;
+       }
+
+       return found;
+}
+
+
+/* replace_pop_native_frame ****************************************************
+
+   Unroll a native frame in the execution state and create a source frame
+   for it.
+
+   IN:
+          es...............current execution state
+          ss...............the current source state
+          sfi..............stackframeinfo for the native frame
+
+   OUT:
+       es...............execution state after unrolling the native frame
+          ss...............gets the added native source frame
+
+*******************************************************************************/
+
+static void replace_pop_native_frame(executionstate_t *es,
+                                                                        sourcestate_t *ss,
+                                                                        stackframeinfo *sfi)
+{
+       sourceframe_t *frame;
+       codeinfo      *code;
+       s4             i,j;
+
+       assert(sfi);
+
+       frame = replace_new_sourceframe(ss);
+
+       frame->sfi = sfi;
+
+       /* remember pc and size of native frame */
+
+       frame->nativepc = es->pc;
+       frame->nativeframesize = sfi->sp - es->sp;
+       assert(frame->nativeframesize >= 0);
+
+       /* remember values of saved registers */
+
+       j = 0;
+       for (i=0; i<INT_REG_CNT; ++i) {
+               if (nregdescint[i] == REG_SAV)
+                       frame->nativesavint[j++] = es->intregs[i];
+       }
+
+       j = 0;
+       for (i=0; i<FLT_REG_CNT; ++i) {
+               if (nregdescfloat[i] == REG_SAV)
+                       frame->nativesavflt[j++] = es->fltregs[i];
+       }
+
+#if defined(HAS_ADDRESS_REGISTER_FILE)
+       j = 0;
+       for (i=0; i<ADR_REG_CNT; ++i) {
+               if (nregdescadr[i] == REG_SAV)
+                       frame->nativesavadr[j++] = es->adrregs[i];
+       }
+#endif
+
+       /* restore saved registers */
+
+#if 0
+       /* XXX we don't have them, yet, in the sfi, so clear them */
+
+       for (i=0; i<INT_REG_CNT; ++i) {
+               if (nregdescint[i] == REG_SAV)
+                       es->intregs[i] = 0;
+       }
+
+       for (i=0; i<FLT_REG_CNT; ++i) {
+               if (nregdescfloat[i] == REG_SAV)
+                       es->fltregs[i] = 0.0;
+       }
+
+# if defined(HAS_ADDRESS_REGISTER_FILE)
+       for (i=0; i<ADR_REG_CNT; ++i) {
+               if (nregdescadr[i] == REG_SAV)
+                       es->adrregs[i] = 0;
+       }
+# endif
+#endif
+
+       /* restore pv, pc, and sp */
+
+       if (sfi->pv == NULL) {
+               /* frame of a native function call */
+               es->pv = md_codegen_get_pv_from_pc(sfi->ra);
+       }
+       else {
+               es->pv = sfi->pv;
+       }
+       es->pc = ((sfi->xpc) ? sfi->xpc : sfi->ra) - 1;
+       es->sp = sfi->sp;
+
+       /* find the new codeinfo */
+
+       DOLOG( printf("PV = %p\n", (void*) es->pv); );
+
+       assert(es->pv != NULL);
+
+       code = *(codeinfo **)(es->pv + CodeinfoPointer);
+
+       DOLOG( printf("CODE = %p\n", (void*) code); );
+
+       es->code = code;
+}
+
+
+/* replace_push_native_frame ***************************************************
+
+   Rebuild a native frame onto the execution state and remove its source frame.
+
+   Note: The native frame is "rebuild" by setting fields like PC and stack
+         pointer in the execution state accordingly. Values in the
+                stackframeinfo may be modified, but the actual stack frame of the
+                native code is not touched.
+
+   IN:
+          es...............current execution state
+          ss...............the current source state
+
+   OUT:
+       es...............execution state after re-rolling the native frame
+          ss...............the native source frame is removed
+
+*******************************************************************************/
+
+static void replace_push_native_frame(executionstate_t *es, sourcestate_t *ss)
+{
+       sourceframe_t *frame;
+       s4             i,j;
+
+       assert(es);
+       assert(ss);
+
+       DOLOG( printf("pushing native frame\n"); );
+
+       /* remove the frame from the source state */
+
+       frame = ss->frames;
+       assert(frame);
+       assert(REPLACE_IS_NATIVE_FRAME(frame));
+
+       ss->frames = frame->down;
+
+       /* assert that the native frame has not moved */
+
+       assert(es->sp == frame->sfi->sp);
+
+       /* restore saved registers */
+
+       j = 0;
+       for (i=0; i<INT_REG_CNT; ++i) {
+               if (nregdescint[i] == REG_SAV)
+                       es->intregs[i] = frame->nativesavint[j++];
+       }
+
+       j = 0;
+       for (i=0; i<FLT_REG_CNT; ++i) {
+               if (nregdescfloat[i] == REG_SAV)
+                       es->fltregs[i] = frame->nativesavflt[j++];
+       }
+
+#if defined(HAS_ADDRESS_REGISTER_FILE)
+       j = 0;
+       for (i=0; i<ADR_REG_CNT; ++i) {
+               if (nregdescadr[i] == REG_SAV)
+                       es->adrregs[i] = frame->nativesavadr[j++];
+       }
+#endif
+
+       /* skip the native frame on the machine stack */
+
+       es->sp -= frame->nativeframesize;
+
+       /* set the pc the next frame must return to */
+
+       es->pc = frame->nativepc;
+}
+
+
+/* replace_recover_source_state ************************************************
+
+   Recover the source state from the given replacement point and execution
+   state.
+
+   IN:
+       rp...............replacement point that has been reached, if any
+          sfi..............stackframeinfo, if called from native code
+          es...............execution state at the replacement point rp
+
+   RETURN VALUE:
+       the source state
+
+*******************************************************************************/
+
+sourcestate_t *replace_recover_source_state(rplpoint *rp,
+                                                                                       stackframeinfo *sfi,
+                                                                                   executionstate_t *es)
+{
+       sourcestate_t *ss;
+       u1            *ra;
+       bool           locked;
+#if defined(REPLACE_STATISTICS)
+       s4             depth;
+#endif
+
+       /* create the source frame structure in dump memory */
+
+       ss = DNEW(sourcestate_t);
+       ss->frames = NULL;
+
+       /* get the stackframeinfo if none is given */
+
+       if (sfi == NULL)
+               sfi = STACKFRAMEINFO;
+
+       /* each iteration of the loop recovers one source frame */
+
+       depth = 0;
+       locked = false;
+
+       while (rp || sfi) {
+
+               DOLOG( replace_executionstate_println(es); );
+
+               /* if we are not at a replacement point, it is a native frame */
+
+               if (rp == NULL) {
+                       DOLOG( printf("native frame: sfi: "); replace_stackframeinfo_println(sfi); );
+
+                       locked = true;
+                       replace_pop_native_frame(es, ss, sfi);
+                       sfi = sfi->prev;
+
+                       if (es->code == NULL)
+                               continue;
+
+                       goto after_machine_frame;
+               }
+
+               /* read the values for this source frame from the execution state */
+
+               DOLOG( printf("recovering source state for%s:\n",
+                                       (ss->frames == NULL) ? " TOPFRAME" : "");
+                          replace_replacement_point_println(rp, 1); );
+
+               replace_read_executionstate(rp, es, ss, ss->frames == NULL);
+
+#if defined(ENABLE_VMLOG)
+               vmlog_cacao_unrol_method(ss->frames->method);
+#endif
+
+#if defined(REPLACE_STATISTICS)
+               REPLACE_COUNT(stat_frames);
+               depth++;
+               replace_statistics_source_frame(ss->frames);
+#endif
+
+               /* in locked areas (below native frames), identity map the frame */
+
+               if (locked) {
+                       ss->frames->torp = ss->frames->fromrp;
+                       ss->frames->tocode = ss->frames->fromcode;
+               }
+
+               /* unroll to the next (outer) frame */
+
+               if (rp->parent) {
+                       /* this frame is in inlined code */
+
+                       DOLOG( printf("INLINED!\n"); );
+
+                       rp = rp->parent;
+
+                       assert(rp->type == RPLPOINT_TYPE_INLINE);
+                       REPLACE_COUNT(stat_unroll_inline);
+               }
+               else {
+                       /* this frame had been called at machine-level. pop it. */
+
+                       DOLOG( printf("UNWIND\n"); );
+
+                       ra = replace_pop_activation_record(es, ss->frames);
+                       if (ra == NULL) {
+                               DOLOG( printf("REACHED NATIVE CODE\n"); );
+
+                               rp = NULL;
+
+                               break; /* XXX remove to activate native frames */
+                               continue;
+                       }
+
+                       /* find the replacement point at the call site */
+
+after_machine_frame:
+                       rp = replace_find_replacement_point_for_pc(es->code, es->pc);
+
+                       if (rp == NULL)
+                               vm_abort("could not find replacement point while unrolling call");
+
+                       DOLOG( printf("found replacement point.\n");
+                                       replace_replacement_point_println(rp, 1); );
+
+                       assert(rp->type == RPLPOINT_TYPE_CALL);
+                       REPLACE_COUNT(stat_unroll_call);
+               }
+       } /* end loop over source frames */
+
+       REPLACE_COUNT_DIST(stat_dist_frames, depth);
+
+       return ss;
+}
+
+
+/* replace_map_source_state ****************************************************
+
+   Map each source frame in the given source state to a target replacement
+   point and compilation unit. If no valid code is available for a source
+   frame, it is (re)compiled.
+
+   IN:
+       ss...............the source state
+
+   OUT:
+       ss...............the source state, modified: The `torp` and `tocode`
+                           fields of each source frame are set.
+
+   RETURN VALUE:
+       true.............everything went ok
+          false............an exception has been thrown
+
+*******************************************************************************/
+
+static bool replace_map_source_state(sourcestate_t *ss)
+{
+       sourceframe_t *frame;
+       codeinfo      *code;
+       rplpoint      *rp;
+       rplpoint      *parent; /* parent of inlined rplpoint */
+#if defined(REPLACE_STATISTICS)
+       codeinfo      *oldcode;
+#endif
+
+       parent = NULL;
+       code = NULL;
+
+       /* iterate over the source frames from outermost to innermost */
+
+       for (frame = ss->frames; frame != NULL; frame = frame->down) {
+
+               /* XXX skip native frames */
+
+               if (REPLACE_IS_NATIVE_FRAME(frame)) {
+                       parent = NULL;
+                       continue;
+               }
+
+               /* map frames which are not already mapped */
+
+               if (frame->tocode) {
+                       code = frame->tocode;
+                       rp = frame->torp;
+                       assert(rp);
+               }
+               else {
+                       assert(frame->torp == NULL);
+
+                       if (parent == NULL) {
+                               /* find code for this frame */
+
+#if defined(REPLACE_STATISTICS)
+                               oldcode = frame->method->code;
+#endif
+                               /* request optimization of hot methods and their callers */
+
+                               if (frame->method->hitcountdown < 0
+                                       || (frame->down && frame->down->method->hitcountdown < 0))
+                                       jit_request_optimization(frame->method);
+
+                               code = jit_get_current_code(frame->method);
+
+                               if (code == NULL)
+                                       return false; /* exception */
+
+                               REPLACE_COUNT_IF(stat_recompile, code != oldcode);
+                       }
+
+                       assert(code);
+
+                       /* map this frame */
+
+                       rp = replace_find_replacement_point(code, frame, parent);
+
+                       frame->tocode = code;
+                       frame->torp = rp;
+               }
+
+               if (rp->type == RPLPOINT_TYPE_CALL) {
+                       parent = NULL;
+               }
+               else {
+                       /* inlining */
+                       parent = rp;
+               }
+       }
+
+       return true;
+}
+
+
+/* replace_build_execution_state_intern ****************************************
+
+   Build an execution state for the given (mapped) source state.
+
+   !!! CAUTION: This function rewrites the machine stack !!!
+
+   THIS FUNCTION MUST BE CALLED USING A SAFE STACK AREA!
+
+   IN:
+       ss...............the source state. Must have been mapped by
+                                               replace_map_source_state before.
+          es...............the base execution state on which to build
+
+   OUT:
+       *es..............the new execution state
+
+*******************************************************************************/
+
+static void replace_build_execution_state_intern(sourcestate_t *ss,
+                                                                                                executionstate_t *es)
+{
+       rplpoint      *rp;
+       sourceframe_t *prevframe;
+       rplpoint      *parent;
+
+       parent = NULL;
+       prevframe = NULL;
+       rp = NULL;
+
+       while (ss->frames) {
+
+               if (REPLACE_IS_NATIVE_FRAME(ss->frames)) {
+                       prevframe = ss->frames;
+                       replace_push_native_frame(es, ss);
+                       parent = NULL;
+                       rp = NULL;
+                       continue;
+               }
+
+               if (parent == NULL) {
+                       /* create a machine-level stack frame */
+
+                       DOLOG( printf("pushing activation record for:\n");
+                                  if (rp) replace_replacement_point_println(rp, 1);
+                                  else printf("\tfirst frame\n"); );
+
+                       replace_push_activation_record(es, rp, prevframe, ss->frames);
+
+                       DOLOG( replace_executionstate_println(es); );
+               }
+
+               rp = ss->frames->torp;
+               assert(rp);
+
+               DOLOG( printf("creating execution state for%s:\n",
+                               (ss->frames->down == NULL) ? " TOPFRAME" : "");
+                          replace_replacement_point_println(ss->frames->fromrp, 1);
+                          replace_replacement_point_println(rp, 1); );
+
+               es->code = ss->frames->tocode;
+               prevframe = ss->frames;
+
+#if defined(ENABLE_VMLOG)
+               vmlog_cacao_rerol_method(ss->frames->method);
+#endif
+
+               replace_write_executionstate(rp, es, ss, ss->frames->down == NULL);
+
+               DOLOG( replace_executionstate_println(es); );
+
+               if (rp->type == RPLPOINT_TYPE_CALL) {
+                       parent = NULL;
+               }
+               else {
+                       /* inlining */
+                       parent = rp;
+               }
+       }
+}
+
+
+/* replace_build_execution_state ***********************************************
+
+   This function contains the final phase of replacement. It builds the new
+   execution state, releases dump memory, and returns to the calling
+   assembler function which finishes replacement.
+
+   NOTE: This function is called from asm_replacement_in, with the stack
+         pointer at the start of the safe stack area.
+
+   THIS FUNCTION MUST BE CALLED USING A SAFE STACK AREA!
+
+   CAUTION: This function and its children must not use a lot of stack!
+            There are only REPLACE_SAFESTACK_SIZE bytes of C stack
+                       available.
+
+   IN:
+       st...............the safestack contained the necessary data
+
+*******************************************************************************/
+
+void replace_build_execution_state(replace_safestack_t *st)
+{
+       replace_build_execution_state_intern(st->ss, &(st->es));
+
+       DOLOG( replace_executionstate_println(&(st->es)); );
 
-       es->sp += SIZE_OF_STACKSLOT * es->code->stackframesize;
-       es->sp += SIZE_OF_STACKSLOT; /* skip return address */
+       /* release dump area */
 
-       es->pv = pv;
-       es->code = code;
+       dump_release(st->dumpsize);
 
-#if !defined(NDEBUG)
-       /* for debugging */
-       for (i=0; i<INT_REG_CNT; ++i)
-               if (nregdescint[i] != REG_SAV)
-                       es->intregs[i] = 0x33dead3333dead33ULL;
-       for (i=0; i<FLT_REG_CNT; ++i)
-               if (nregdescfloat[i] != REG_SAV)
-                       es->fltregs[i] = 0x33dead3333dead33ULL;
-#endif /* !defined(NDEBUG) */
+       /* new code is entered after returning */
 
-       return true;
+       DOLOG( printf("JUMPING IN!\n"); fflush(stdout); );
 }
 
 
-/* replace_push_activation_record **********************************************
+/* replace_alloc_safestack *****************************************************
 
-   Push a stack frame onto the execution state.
-   
-   *** This function imitates the effects of a call and the ***
-   *** method prolog of the callee.                         ***
+   Allocate a safe stack area to use during the final phase of replacement.
+   The returned area is not initialized. This must be done by the caller.
 
-   IN:
-          es...............execution state
-          rpcall...........the replacement point at the call site
-          calleecode.......the codeinfo of the callee
-          frame............source frame, only the synch. slots are used
+   RETURN VALUE:
+       a newly allocated replace_safestack_t *
 
-   OUT:
-       *es..............the execution state after pushing the stack frame
-  
 *******************************************************************************/
 
-void replace_push_activation_record(executionstate_t *es,
-                                                                       rplpoint *rpcall,
-                                                                       codeinfo *calleecode,
-                                                                       sourceframe_t *frame)
+static replace_safestack_t *replace_alloc_safestack()
 {
-       s4 reg;
-       s4 i;
-       s4 count;
-       stackslot_t *basesp;
-       stackslot_t *sp;
-
-       assert(es);
-       assert(rpcall && rpcall->type == RPLPOINT_TYPE_CALL);
-       assert(calleecode);
-       assert(frame);
-
-       /* write the return address */
+       u1 *mem;
+       replace_safestack_t *st;
 
-       es->sp -= SIZE_OF_STACKSLOT;
+       mem = MNEW(u1, sizeof(replace_safestack_t) + REPLACE_STACK_ALIGNMENT - 1);
 
-       DOLOG( printf("writing return address %p to %p\n",
-                               (void*) (rpcall->pc + rpcall->callsize),
-                               (void*) es->sp); );
+       st = (replace_safestack_t *) ((ptrint)(mem + REPLACE_STACK_ALIGNMENT - 1)
+                                                                               & ~(REPLACE_STACK_ALIGNMENT - 1));
 
-       *((stackslot_t *)es->sp) = (stackslot_t) (rpcall->pc + rpcall->callsize);
+#if !defined(NDEBUG)
+       memset(st, 0xa5, sizeof(replace_safestack_t));
+#endif
 
-       /* we move into a new code unit */
+       st->mem = mem;
 
-       es->code = calleecode;
+       return st;
+}
 
-       /* set the new pc XXX not needed */
 
-       es->pc = es->code->entrypoint;
+/* replace_free_safestack ******************************************************
 
-       /* build the stackframe */
+   Free the given safestack structure, making a copy of the contained
+   execution state before freeing it.
 
-       DOLOG( printf("building stackframe of %d words at %p\n",
-                               es->code->stackframesize, (void*)es->sp); );
+   NOTE: This function is called from asm_replacement_in.
 
-       sp = (stackslot_t *) es->sp;
-       basesp = sp;
+   IN:
+       st...............the safestack to free
+          tmpes............where to copy the execution state to
 
-       sp -= es->code->stackframesize;
-       es->sp = (u1*) sp;
+   OUT:
+          *tmpes...........receives a copy of st->es
 
-       /* in debug mode, invalidate stack frame first */
+*******************************************************************************/
 
-#if !defined(NDEBUG)
-       for (i=0; i<(basesp - sp); ++i) {
-               sp[i] = 0xdeaddeadU;
-       }
-#endif
+void replace_free_safestack(replace_safestack_t *st, executionstate_t *tmpes)
+{
+       u1 *mem;
 
-       /* save int registers */
+       /* copy the executionstate_t to the temporary location */
 
-       reg = INT_REG_CNT;
-       for (i=0; i<es->code->savedintcount; ++i) {
-               while (nregdescint[--reg] != REG_SAV)
-                       ;
-               *--basesp = es->intregs[reg];
+       *tmpes = st->es;
 
-#if !defined(NDEBUG)
-               es->intregs[reg] = 0x44dead4444dead44ULL;
-#endif
-       }
+       /* get the memory address to free */
 
-       /* save flt registers */
+       mem = st->mem;
 
-       /* XXX align? */
-       reg = FLT_REG_CNT;
-       for (i=0; i<es->code->savedfltcount; ++i) {
-               while (nregdescfloat[--reg] != REG_SAV)
-                       ;
-               basesp -= STACK_SLOTS_PER_FLOAT;
-               *(u8*)basesp = es->fltregs[reg];
+       /* destroy memory (in debug mode) */
 
 #if !defined(NDEBUG)
-               es->fltregs[reg] = 0x44dead4444dead44ULL;
+       memset(st, 0xa5, sizeof(replace_safestack_t));
 #endif
-       }
-
-       /* write slots used for synchronization */
-
-       count = code_get_sync_slot_count(es->code);
-       assert(count == frame->syncslotcount);
-       for (i=0; i<count; ++i) {
-               sp[es->code->memuse + i] = frame->syncslots[i];
-       }
 
-       /* set the PV */
+       /* free the safe stack struct */
 
-       es->pv = es->code->entrypoint;
+       MFREE(mem, u1, sizeof(replace_safestack_t) + REPLACE_STACK_ALIGNMENT - 1);
 }
 
 
-/* replace_find_replacement_point **********************************************
+/* replace_me_wrapper **********************************************************
 
-   Find the replacement point in the given code corresponding to the
-   position given in the source frame.
-   
-   IN:
-          code.............the codeinfo in which to search the rplpoint
-          ss...............the source state defining the position to look for
+   TODO: Document me!
 
-   RETURN VALUE:
-       the replacement point
-  
 *******************************************************************************/
 
-rplpoint * replace_find_replacement_point(codeinfo *code, sourcestate_t *ss)
+bool replace_me_wrapper(u1 *pc)
 {
-       sourceframe_t *frame;
-       methodinfo *m;
-       rplpoint *rp;
-       s4        i;
-       s4        j;
-       rplalloc *ra;
+       codeinfo         *code;
+       rplpoint         *rp;
+       executionstate_t  es;
 
-       assert(ss);
+       /* search the codeinfo for the given PC */
 
-       frame = ss->frames;
-       assert(frame);
+       code = code_find_codeinfo_for_pc(pc);
+       assert(code);
 
-       DOLOG( printf("searching replacement point for:\n");
-                  replace_source_frame_println(frame); );
+       /* search for a replacement point at the given PC */
 
-       m = frame->method;
+#if 0
+       rp = replace_find_replacement_point_for_pc(code, pc);
+       assert(rp == NULL || rp->pc == pc);
+#else
+       {
+               int i;
+               rplpoint *rp2;
+               rp = NULL;
+               for (i=0,rp2=code->rplpoints; i<code->rplpointcount; i++,rp2++) {
+                       if (rp2->pc == pc)
+                               rp = rp2;
+               }
+       }
+#endif
 
-       DOLOG( printf("code = %p\n", (void*)code); );
+       /* check if the replacement point is active */
 
-       rp = code->rplpoints;
-       i = code->rplpointcount;
-       while (i--) {
-               if (rp->id == frame->id && rp->method == frame->method) {
-                       /* check if returnAddresses match */
-                       ra = rp->regalloc;
-                       for (j = rp->regalloccount; j--; ++ra) {
-                               if (ra->type == TYPE_RET) {
-                                       assert(ra->index >= 0 && ra->index < frame->javalocalcount);
-                                       if (frame->javalocals[ra->index] != ra->regoff)
-                                               goto no_match;
-                               }
-                       }
+       if (rp != NULL && (rp->flags & RPLPOINT_FLAG_ACTIVE)) {
 
-                       /* found */
-                       return rp;
-               }
-no_match:
-               rp++;
-       }
+               /*md_replace_executionstate_read(&es, context);*/
 
-       assert(0);
-       return NULL; /* NOT REACHED */
+               replace_me(rp, &es);
+
+               return true;
+       }
+       else
+               return false;
 }
 
 
@@ -1346,126 +2799,185 @@ no_match:
 
 void replace_me(rplpoint *rp, executionstate_t *es)
 {
-       rplpoint     *target;
-       sourcestate_t ss;
-       s4            dumpsize;
-       rplpoint     *candidate;
-       codeinfo     *code;
-       s4            i;
+       sourcestate_t       *ss;
+       sourceframe_t       *frame;
+       s4                   dumpsize;
+       rplpoint            *origrp;
+       replace_safestack_t *safestack;
+
+       origrp = rp;
+       es->code = code_find_codeinfo_for_pc(rp->pc);
 
-       es->code = rp->code;
+       DOLOG_SHORT( printf("REPLACING(%d %p): (id %d %p) ",
+                                stat_replacements, (void*)THREADOBJECT,
+                                rp->id, (void*)rp);
+                                method_println(es->code->m); );
+
+       DOLOG( replace_replacement_point_println(rp, 1);
+                  replace_executionstate_println(es); );
 
-       DOLOG( printf("REPLACING: "); method_println(es->code->m); );
+       REPLACE_COUNT(stat_replacements);
 
        /* mark start of dump memory area */
 
        dumpsize = dump_size();
 
-       /* fetch the target of the replacement */
+       /* recover source state */
 
-       target = rp->target;
+       ss = replace_recover_source_state(rp, NULL, es);
 
-       /* XXX DEBUG turn off self-replacement */
-       if (target == rp)
-               replace_deactivate_replacement_point(rp);
+       /* map the source state */
 
-       DOLOG( printf("replace_me(%p,%p)\n",(void*)rp,(void*)es); fflush(stdout);
-                  replace_replacement_point_println(rp, 1);
-                  replace_executionstate_println(es); );
+       if (!replace_map_source_state(ss))
+               vm_abort("exception during method replacement");
 
-       /* read execution state of old code */
+       DOLOG( replace_sourcestate_println(ss); );
 
-       ss.frames = NULL;
+       DOLOG_SHORT( replace_sourcestate_println_short(ss); );
 
-       /* XXX testing */
+       /* avoid infinite loops by self-replacement */
 
-       candidate = rp;
-       do {
-               DOLOG( printf("recovering source state for%s:\n",
-                                       (ss.frames == NULL) ? " TOPFRAME" : "");
-                          replace_replacement_point_println(candidate, 1); );
+       frame = ss->frames;
+       while (frame->down)
+               frame = frame->down;
+
+       if (frame->torp == origrp) {
+               DOLOG_SHORT(
+                       printf("WARNING: identity replacement, turning off rps to avoid infinite loop\n");
+               );
+               replace_deactivate_replacement_points(frame->tocode);
+       }
 
-               replace_read_executionstate(candidate, es, &ss, ss.frames == NULL);
+       /* write execution state of new code */
 
-               if (candidate->parent) {
-                       DOLOG( printf("INLINED!\n"); );
-                       candidate = candidate->parent;
-                       assert(candidate->type == RPLPOINT_TYPE_INLINE);
-               }
-               else {
-                       DOLOG( printf("UNWIND\n"); );
-                       if (!replace_pop_activation_record(es, ss.frames)) {
-                               DOLOG( printf("BREAKING\n"); );
-                               break;
-                       }
-                       DOLOG( replace_executionstate_println(es); );
-                       candidate = NULL;
-                       rp = es->code->rplpoints;
-                       for (i=0; i<es->code->rplpointcount; ++i, ++rp)
-                               if (rp->pc <= es->pc)
-                                       candidate = rp;
-                       if (!candidate)
-                               DOLOG( printf("NO CANDIDATE!\n"); );
-                       else {
-                               DOLOG( printf("found replacement point.\n");
-                                          replace_replacement_point_println(candidate, 1); );
-                               assert(candidate->type == RPLPOINT_TYPE_CALL);
-                       }
-               }
-       } while (candidate);
+       DOLOG( replace_executionstate_println(es); );
 
-       DOLOG( replace_sourcestate_println(&ss); );
+       /* allocate a safe stack area and copy all needed data there */
 
-       /* write execution state of new code */
+       safestack = replace_alloc_safestack();
 
-       DOLOG( replace_executionstate_println(es); );
+       safestack->es = *es;
+       safestack->ss = ss;
+       safestack->dumpsize = dumpsize;
 
-       code = es->code;
+       /* call the assembler code for the last phase of replacement */
 
-       /* XXX get new code */
+#if (defined(__I386__) || defined(__X86_64__) || defined(__ALPHA__) || defined(__POWERPC__) || defined(__MIPS__) || defined(__S390__)) && defined(ENABLE_JIT)
+       /*asm_replacement_in(&(safestack->es), safestack);*/
+#endif
 
-       while (ss.frames) {
+       abort(); /* NOT REACHED */
+}
 
-               candidate = replace_find_replacement_point(code, &ss);
 
-               DOLOG( printf("creating execution state for%s:\n",
-                               (ss.frames->up == NULL) ? " TOPFRAME" : "");
-                          replace_replacement_point_println(ss.frames->debug_rp, 1);
-                          replace_replacement_point_println(candidate, 1); );
+/******************************************************************************/
+/* NOTE: No important code below.                                             */
+/******************************************************************************/
 
-               replace_write_executionstate(candidate, es, &ss, ss.frames->up == NULL);
-               if (ss.frames == NULL)
-                       break;
-               DOLOG( replace_executionstate_println(es); );
 
-               if (candidate->type == RPLPOINT_TYPE_CALL) {
-                       jit_recompile(ss.frames->method);
-                       code = ss.frames->method->code;
-                       assert(code);
-                       DOLOG( printf("pushing activation record for:\n");
-                                  replace_replacement_point_println(candidate, 1); );
-                       replace_push_activation_record(es, candidate, code, ss.frames);
-               }
-               DOLOG( replace_executionstate_println(es); );
+/* statistics *****************************************************************/
+
+#if defined(REPLACE_STATISTICS)
+static void print_freq(FILE *file,int *array,int limit)
+{
+       int i;
+       int sum = 0;
+       int cum = 0;
+       for (i=0; i<limit; ++i)
+               sum += array[i];
+       sum += array[limit];
+       for (i=0; i<limit; ++i) {
+               cum += array[i];
+               fprintf(file,"      %3d: %8d (cum %3d%%)\n",
+                               i, array[i], (sum) ? ((100*cum)/sum) : 0);
        }
+       fprintf(file,"    >=%3d: %8d\n",limit,array[limit]);
+}
+#endif /* defined(REPLACE_STATISTICS) */
 
-       DOLOG( replace_executionstate_println(es); );
 
-       /* release dump area */
+#if defined(REPLACE_STATISTICS)
 
-       dump_release(dumpsize);
+#define REPLACE_PRINT_DIST(name, array)                              \
+    printf("    " name " distribution:\n");                          \
+    print_freq(stdout, (array), sizeof(array)/sizeof(int) - 1);
 
-       /* enter new code */
+void replace_print_statistics(void)
+{
+       printf("replacement statistics:\n");
+       printf("    # of replacements:   %d\n", stat_replacements);
+       printf("    # of frames:         %d\n", stat_frames);
+       printf("    # of recompilations: %d\n", stat_recompile);
+       printf("    patched static calls:%d\n", stat_staticpatch);
+       printf("    unrolled inlines:    %d\n", stat_unroll_inline);
+       printf("    unrolled calls:      %d\n", stat_unroll_call);
+       REPLACE_PRINT_DIST("frame depth", stat_dist_frames);
+       REPLACE_PRINT_DIST("locals per frame", stat_dist_locals);
+       REPLACE_PRINT_DIST("ADR locals per frame", stat_dist_locals_adr);
+       REPLACE_PRINT_DIST("primitive locals per frame", stat_dist_locals_prim);
+       REPLACE_PRINT_DIST("RET locals per frame", stat_dist_locals_ret);
+       REPLACE_PRINT_DIST("void locals per frame", stat_dist_locals_void);
+       REPLACE_PRINT_DIST("stack slots per frame", stat_dist_stack);
+       REPLACE_PRINT_DIST("ADR stack slots per frame", stat_dist_stack_adr);
+       REPLACE_PRINT_DIST("primitive stack slots per frame", stat_dist_stack_prim);
+       REPLACE_PRINT_DIST("RET stack slots per frame", stat_dist_stack_ret);
+       printf("\n");
+       printf("    # of methods:            %d\n", stat_methods);
+       printf("    # of replacement points: %d\n", stat_rploints);
+       printf("    # of regallocs:          %d\n", stat_regallocs);
+       printf("        per rplpoint:        %f\n", (double)stat_regallocs / stat_rploints);
+       printf("        per method:          %f\n", (double)stat_regallocs / stat_methods);
+       REPLACE_PRINT_DIST("replacement points per method", stat_dist_method_rplpoints);
+       printf("\n");
 
-       DOLOG( printf("JUMPING IN!\n"); fflush(stdout); );
+}
+#endif /* defined(REPLACE_STATISTICS) */
 
-#if (defined(__I386__) || defined(__X86_64__) || defined(__ALPHA__) || defined(__POWERPC__) || defined(__MIPS__)) && defined(ENABLE_JIT)
-       asm_replacement_in(es);
-#endif
-       abort(); /* NOT REACHED */
+
+#if defined(REPLACE_STATISTICS)
+static void replace_statistics_source_frame(sourceframe_t *frame)
+{
+       int adr = 0;
+       int ret = 0;
+       int prim = 0;
+       int vd = 0;
+       int n = 0;
+       int i;
+
+       for (i=0; i<frame->javalocalcount; ++i) {
+               switch (frame->javalocaltype[i]) {
+                       case TYPE_ADR: adr++; break;
+                       case TYPE_RET: ret++; break;
+                       case TYPE_INT: case TYPE_LNG: case TYPE_FLT: case TYPE_DBL: prim++; break;
+                       case TYPE_VOID: vd++; break;
+                       default: assert(0);
+               }
+               n++;
+       }
+       REPLACE_COUNT_DIST(stat_dist_locals, n);
+       REPLACE_COUNT_DIST(stat_dist_locals_adr, adr);
+       REPLACE_COUNT_DIST(stat_dist_locals_void, vd);
+       REPLACE_COUNT_DIST(stat_dist_locals_ret, ret);
+       REPLACE_COUNT_DIST(stat_dist_locals_prim, prim);
+       adr = ret = prim = n = 0;
+       for (i=0; i<frame->javastackdepth; ++i) {
+               switch (frame->javastacktype[i]) {
+                       case TYPE_ADR: adr++; break;
+                       case TYPE_RET: ret++; break;
+                       case TYPE_INT: case TYPE_LNG: case TYPE_FLT: case TYPE_DBL: prim++; break;
+               }
+               n++;
+       }
+       REPLACE_COUNT_DIST(stat_dist_stack, n);
+       REPLACE_COUNT_DIST(stat_dist_stack_adr, adr);
+       REPLACE_COUNT_DIST(stat_dist_stack_ret, ret);
+       REPLACE_COUNT_DIST(stat_dist_stack_prim, prim);
 }
+#endif /* defined(REPLACE_STATISTICS) */
 
 
+/* debugging helpers **********************************************************/
+
 /* replace_replacement_point_println *******************************************
  
    Print replacement point info.
@@ -1485,7 +2997,8 @@ static char *replace_type_str[] = {
        "SBR",
        "CALL",
        "INLINE",
-       "RETURN"
+       "RETURN",
+       "BODY"
 };
 
 void replace_replacement_point_println(rplpoint *rp, int depth)
@@ -1501,10 +3014,16 @@ void replace_replacement_point_println(rplpoint *rp, int depth)
        for (j=0; j<depth; ++j)
                putchar('\t');
 
-       printf("rplpoint (id %d) %p pc:%p+%d out:%p target:%p mcode:%016llx type:%s flags:%01x parent:%p\n",
-                       rp->id, (void*)rp,rp->pc,rp->callsize,rp->outcode,(void*)rp->target,
-                       (unsigned long long)rp->mcode,replace_type_str[rp->type],rp->flags,
-                       (void*)rp->parent);
+       printf("rplpoint (id %d) %p pc:%p+%d type:%s",
+                       rp->id, (void*)rp,rp->pc,rp->callsize,
+                       replace_type_str[rp->type]);
+       if (rp->flags & RPLPOINT_FLAG_NOTRAP)
+               printf(" NOTRAP");
+       if (rp->flags & RPLPOINT_FLAG_COUNTDOWN)
+               printf(" COUNTDOWN");
+       if (rp->flags & RPLPOINT_FLAG_ACTIVE)
+               printf(" ACTIVE");
+       printf(" parent:%p\n", (void*)rp->parent);
        for (j=0; j<depth; ++j)
                putchar('\t');
        printf("ra:%d = [",     rp->regalloccount);
@@ -1536,7 +3055,7 @@ void replace_replacement_point_println(rplpoint *rp, int depth)
 
        printf("\n");
 }
-#endif
+#endif /* !defined(NDEBUG) */
 
 
 /* replace_show_replacement_points *********************************************
@@ -1566,6 +3085,9 @@ void replace_show_replacement_points(codeinfo *code)
        printf("\ttotal allocations : %d\n",code->regalloccount);
        printf("\tsaved int regs    : %d\n",code->savedintcount);
        printf("\tsaved flt regs    : %d\n",code->savedfltcount);
+#if defined(HAS_ADDRESS_REGISTER_FILE)
+       printf("\tsaved adr regs    : %d\n",code->savedadrcount);
+#endif
        printf("\tmemuse            : %d\n",code->memuse);
 
        printf("\n");
@@ -1573,8 +3095,6 @@ void replace_show_replacement_points(codeinfo *code)
        for (i=0; i<code->rplpointcount; ++i) {
                rp = code->rplpoints + i;
 
-               assert(rp->code == code);
-
                depth = 1;
                parent = rp->parent;
                while (parent) {
@@ -1603,7 +3123,7 @@ void replace_executionstate_println(executionstate_t *es)
        int slots;
        stackslot_t *sp;
        int extraslots;
-       
+
        if (!es) {
                printf("(executionstate_t *)NULL\n");
                return;
@@ -1619,7 +3139,11 @@ void replace_executionstate_println(executionstate_t *es)
                        printf("\t");
                else
                        printf(" ");
-               printf("%-3s = %016llx",regs[i],(unsigned long long)es->intregs[i]);
+#if SIZEOF_VOID_P == 8
+               printf("%-3s = %016llx",abi_registers_integer_name[i],(unsigned long long)es->intregs[i]);
+#else
+               printf("%-3s = %08lx",abi_registers_integer_name[i],(unsigned long)es->intregs[i]);
+#endif
                if (i%4 == 3)
                        printf("\n");
        }
@@ -1632,6 +3156,17 @@ void replace_executionstate_println(executionstate_t *es)
                if (i%4 == 3)
                        printf("\n");
        }
+# if defined(HAS_ADDRESS_REGISTER_FILE)
+       for (i=0; i<ADR_REG_CNT; ++i) {
+               if (i%4 == 0)
+                       printf("\t");
+               else
+                       printf(" ");
+               printf("A%02d = %016llx",i,(unsigned long long)es->adrregs[i]);
+               if (i%4 == 3)
+                       printf("\n");
+       }
+# endif
 #endif
 
        sp = (stackslot_t *) es->sp;
@@ -1640,7 +3175,7 @@ void replace_executionstate_println(executionstate_t *es)
 
        if (es->code) {
                methoddesc *md = es->code->m->parseddesc;
-               slots = code_get_stack_frame_size(es->code);
+               slots = es->code->stackframesize;
                extraslots = 1 + md->memuse;
        }
        else
@@ -1675,32 +3210,41 @@ void replace_executionstate_println(executionstate_t *es)
 #endif
 
 #if !defined(NDEBUG)
-void java_value_print(s4 type, u8 value)
+static void java_value_print(s4 type, replace_val_t value)
 {
-       java_objectheader *obj;
-       utf               *u;
+       java_object_t *obj;
+       utf           *u;
 
-       printf("%016llx",(unsigned long long) value);
+       printf("%016llx",(unsigned long long) value.l);
 
        if (type < 0 || type > TYPE_RET)
                printf(" <INVALID TYPE:%d>", type);
        else
                printf(" %s", show_jit_type_names[type]);
 
-       if (type == TYPE_ADR && value != 0) {
-               obj = (java_objectheader *) (ptrint) value;
+       if (type == TYPE_ADR && value.a != NULL) {
+               obj = value.a;
                putchar(' ');
                utf_display_printable_ascii_classname(obj->vftbl->class->name);
 
                if (obj->vftbl->class == class_java_lang_String) {
                        printf(" \"");
-                       u = javastring_toutf((java_lang_String *)obj, false);
+                       u = javastring_toutf(obj, false);
                        utf_display_printable_ascii(u);
                        printf("\"");
                }
        }
-       else if (type == TYPE_INT || type == TYPE_LNG) {
-               printf(" %lld", (long long) value);
+       else if (type == TYPE_INT) {
+               printf(" %ld", (long) value.i);
+       }
+       else if (type == TYPE_LNG) {
+               printf(" %lld", (long long) value.l);
+       }
+       else if (type == TYPE_FLT) {
+               printf(" %f", value.f);
+       }
+       else if (type == TYPE_DBL) {
+               printf(" %f", value.d);
        }
 }
 #endif /* !defined(NDEBUG) */
@@ -1709,14 +3253,43 @@ void java_value_print(s4 type, u8 value)
 #if !defined(NDEBUG)
 void replace_source_frame_println(sourceframe_t *frame)
 {
-       s4 i;
+       s4 i,j;
        s4 t;
 
+       if (REPLACE_IS_NATIVE_FRAME(frame)) {
+               printf("\tNATIVE\n");
+               printf("\tsfi: "); replace_stackframeinfo_println(frame->sfi);
+               printf("\tnativepc: %p\n", frame->nativepc);
+               printf("\tframesize: %d\n", frame->nativeframesize);
+
+               j = 0;
+               for (i=0; i<INT_REG_CNT; ++i) {
+                       if (nregdescint[i] == REG_SAV)
+                               printf("\t%s = %p\n", abi_registers_integer_name[i], (void*)frame->nativesavint[j++]);
+               }
+
+               j = 0;
+               for (i=0; i<FLT_REG_CNT; ++i) {
+                       if (nregdescfloat[i] == REG_SAV)
+                               printf("\tF%02d = %f\n", i, frame->nativesavflt[j++]);
+               }
+
+               printf("\n");
+               return;
+       }
+
        printf("\t");
        method_println(frame->method);
        printf("\tid: %d\n", frame->id);
+       printf("\ttype: %s\n", replace_type_str[frame->type]);
        printf("\n");
 
+       if (frame->instance.a) {
+               printf("\tinstance: ");
+               java_value_print(TYPE_ADR, frame->instance);
+               printf("\n");
+       }
+
        if (frame->javalocalcount) {
                printf("\tlocals (%d):\n",frame->javalocalcount);
                for (i=0; i<frame->javalocalcount; ++i) {
@@ -1754,13 +3327,33 @@ void replace_source_frame_println(sourceframe_t *frame)
                for (i=0; i<frame->syncslotcount; ++i) {
                        printf("\tslot[%2d] = ",i);
 #ifdef HAS_4BYTE_STACKSLOT
-                       printf("%08lx\n",(unsigned long) frame->syncslots[i]);
+                       printf("%08lx\n",(unsigned long) frame->syncslots[i].p);
 #else
-                       printf("%016llx\n",(unsigned long long) frame->syncslots[i]);
+                       printf("%016llx\n",(unsigned long long) frame->syncslots[i].p);
 #endif
                }
                printf("\n");
        }
+
+       if (frame->fromcode) {
+               printf("\tfrom %p ", (void*)frame->fromcode);
+               method_println(frame->fromcode->m);
+       }
+       if (frame->tocode) {
+               printf("\tto %p ", (void*)frame->tocode);
+               method_println(frame->tocode->m);
+       }
+
+       if (frame->fromrp) {
+               printf("\tfrom replacement point:\n");
+               replace_replacement_point_println(frame->fromrp, 2);
+       }
+       if (frame->torp) {
+               printf("\tto replacement point:\n");
+               replace_replacement_point_println(frame->torp, 2);
+       }
+
+       printf("\n");
 }
 #endif /* !defined(NDEBUG) */
 
@@ -1787,13 +3380,73 @@ void replace_sourcestate_println(sourcestate_t *ss)
 
        printf("sourcestate_t:\n");
 
-       for (i=0, frame = ss->frames; frame != NULL; frame = frame->up, ++i) {
+       for (i=0, frame = ss->frames; frame != NULL; frame = frame->down, ++i) {
                printf("    frame %d:\n", i);
                replace_source_frame_println(frame);
        }
 }
 #endif
 
+
+/* replace_sourcestate_println_short *******************************************
+
+   Print a compact representation of the given source state.
+
+   IN:
+       ss...............the source state to print
+
+*******************************************************************************/
+
+#if !defined(NDEBUG)
+void replace_sourcestate_println_short(sourcestate_t *ss)
+{
+       sourceframe_t *frame;
+
+       for (frame = ss->frames; frame != NULL; frame = frame->down) {
+               printf("\t");
+
+               if (REPLACE_IS_NATIVE_FRAME(frame)) {
+                       printf("NATIVE (pc %p size %d) ",
+                                       (void*)frame->nativepc, frame->nativeframesize);
+                       replace_stackframeinfo_println(frame->sfi);
+                       continue;
+               }
+
+               if (frame->torp) {
+                       printf("%c", (frame->torp == frame->fromrp) ? '=' : '+');
+               }
+
+               printf("%s", replace_type_str[frame->fromrp->type]);
+
+               if (frame->torp && frame->torp->type != frame->fromrp->type)
+                       printf("->%s", replace_type_str[frame->torp->type]);
+
+               if (frame->tocode != frame->fromcode)
+                       printf(" (%p->%p/%d) ",
+                                  (void*) frame->fromcode, (void*) frame->tocode,
+                                  frame->fromrp->id);
+               else
+                       printf(" (%p/%d) ", (void*) frame->fromcode, frame->fromrp->id);
+
+               method_println(frame->method);
+       }
+}
+#endif
+
+#if !defined(NDEBUG)
+static void replace_stackframeinfo_println(stackframeinfo *sfi)
+{
+       printf("prev=%p pv=%p sp=%p ra=%p xpc=%p method=",
+                       (void*)sfi->prev, (void*)sfi->pv, (void*)sfi->sp,
+                       (void*)sfi->ra, (void*)sfi->xpc);
+
+       if (sfi->method)
+               method_println(sfi->method);
+       else
+               printf("(nil)\n");
+}
+#endif
+
 /*
  * These are local overrides for various environment variables in Emacs.
  * Please do not remove this and leave it at the end of the file, where