1 /* src/vm/jit/intrp/dynamic-super.c dynamic superinstruction support
3 Copyright (C) 1995,1996,1997,1998,2000,2003,2004 Free Software Foundation, Inc.
6 Copyright (C) 1996-2005, 2006 R. Grafl, A. Krall, C. Kruegel,
7 C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring,
8 E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich,
9 J. Wenninger, Institut f. Computersprachen - TU Wien
11 This file is part of CACAO.
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2, or (at
16 your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 Contact: cacao@cacaojvm.org
30 Authors: Christian Thalinger
35 $Id: dynamic-super.c 4357 2006-01-22 23:33:38Z twisti $
39 #define NO_IP 0 /* proper native code, without interpreter's ip */
47 #include "mm/memory.h"
49 #if defined(USE_THREADS)
50 # if defined(NATIVE_THREADS)
51 # include "threads/native/threads.h"
53 # include "threads/green/threads.h"
57 #include "vm/hashtable.h"
58 #include "vm/options.h"
60 #include "vm/jit/intrp/intrp.h"
61 #include "toolbox/logging.h"
63 s4 no_super=0; /* option: just use replication, but no dynamic superinsts */
65 static char MAYBE_UNUSED superend[]={
66 #include "java-superend.i"
69 const char * const prim_names[]={
70 #include "java-names.i"
74 #define INST_ADDR(_inst) N_##_inst
75 #include "java-labels.i"
82 Label start; /* NULL if not relocatable */
83 s4 length; /* only includes the jump iff superend is true*/
84 s4 restlength; /* length of the rest (i.e., the jump or (on superend) 0) */
85 char superend; /* true if primitive ends superinstruction, i.e.,
86 unconditional branch, execute, etc. */
89 s4 offset; /* offset of immarg within prim */
90 char rel; /* true if immarg is relative */
91 } immargs[MAX_IMMARGS];
99 typedef struct superstart {
100 struct superstart *next;
101 s4 patcherm; /* offset of patcher, -1 if super has no patcher */
102 u4 length; /* length of superinstruction */
103 u1 *oldsuper; /* reused superinstruction: NULL if there is none */
110 static hashtable hashtable_patchersupers;
111 #define HASHTABLE_PATCHERSUPERS_BITS 14
113 #if defined(USE_THREADS)
114 static java_objectheader *lock_hashtable_patchersupers;
117 /* stuff for -no-replication */
118 typedef struct superreuse {
119 struct superreuse *next;
124 static hashtable hashtable_superreuse;
125 #define HASHTABLE_SUPERREUSE_BITS 14
127 #if defined(USE_THREADS)
128 static java_objectheader *lock_hashtable_superreuse;
131 # define debugp(x...) if (opt_verbose) fprintf(x)
132 #define debugp1(x...)
134 /* statistics variables */
136 u4 count_supers = 0; /* dynamic superinstructions, including replicas */
137 u4 count_supers_unique = 0; /* dynamic superinstructions, without replicas */
138 u4 count_supers_reused = 0; /* reused dynamic superinstructions */
139 u4 count_patchers_exec = 0; /* executed patchers */
140 u4 count_patchers_last = 0; /* executed last patchers */
141 u4 count_patchers_ins = 0; /* patchers inserted in patchersupers table */
142 u4 count_patchers = 0; /* superstarts with patcherm!=-1 */
143 u4 count_supers_nopatch= 0; /* superinstructions for code without patchers */
144 u4 count_supers_patch = 0; /* superinstructions for code with patchers */
145 u4 count_dispatches = 0; /* dynamic superinstructions generated */
146 u4 count_disp_nonreloc = 0; /* */
147 u4 count_insts_reloc = 0; /* relocatable insts compiled (append_prim) */
148 u4 count_insts = 0; /* compiled insts (gen_inst) */
149 u4 count_native_code = 0; /* native code bytes */
150 u4 count_native_saved = 0; /* reclaimed native code */
152 /* determine priminfos */
155 java_objectheader *engine2(Inst *ip0, Cell * sp, Cell * fp);
157 static int compare_labels(const void *pa, const void *pb)
159 char *a = *(char **)pa;
160 char *b = *(char **)pb;
164 static Label bsearch_next(Label key, Label *a, u4 n)
165 /* a is sorted; return the label >=key that is the closest in a;
166 return NULL if there is no label in a >=key */
178 return bsearch_next(key, a+mid+1, n-mid-1);
180 return bsearch_next(key, a, mid+1);
184 each VM instruction <x> looks like this:
187 I_<x>: /* entry point */
189 J_<x>: /* start of dispatch */
190 NEXT_P1_5; /* dispatch except GOTO */
191 K_<x>: /* just before goto */
192 DO_GOTO; /* goto part of dispatch */
195 * We need I_<x> for threaded code: this is the code address stored in
196 * direct threaded code.
198 * We need the <skip> to detect non-relocatability (by comparing
199 * engine and engine2 with different skips). H_<x> is there to ensure
200 * that gcc does not think that <skip> is dead code.
202 * We need J_<x> to implement dynamic superinstructions: copy
203 * everything between I_<x> and J_<x> to get a VM instruction without
206 * At the end of a dynamic superinstruction we need a dispatch. We
207 * need the DO_GOTO to work around gcc bug 15242: we copy everything
208 * up to K_<x> and then copy the indirect jump from &&before_goto.
213 static void check_prims(Label symbols1[])
216 Label *symbols2, *symbols3, *js1, *ks1, *ks1sorted;
217 Label after_goto, before_goto2;
221 fprintf(stderr, "Compiled with gcc-" __VERSION__ "\n");
223 for (i=0; symbols1[i]!=0; i++)
230 symbols2=(Inst *)engine2(0,0,0);
232 symbols3=(Inst *)engine3(0,0,0);
238 before_goto = ks1[i+1]; /* after ks and after after_last */
239 after_goto = ks1[i+2];
240 before_goto2 = (ks1+(symbols2-symbols1))[i+1];
241 /* some gccs reorder the code; below we look for the next K label
242 with binary search, and whether it belongs to the current
243 instruction; prepare for this by sorting the K labels */
244 ks1sorted = (Label *)alloca((i+1)*sizeof(Label));
245 MCOPY(ks1sorted,ks1,Label,i+1);
246 qsort(ks1sorted, i+1, sizeof(Label), compare_labels);
248 /* check whether the "goto *" is relocatable */
249 goto_len = after_goto-before_goto;
250 debugp(stderr, "goto * %p %p len=%d\n",
251 before_goto,before_goto2,goto_len);
252 if (memcmp(before_goto, before_goto2, goto_len)!=0) { /* unequal */
253 opt_no_dynamic = true;
254 debugp(stderr," not relocatable, disabling dynamic code generation\n");
258 priminfos = calloc(i,sizeof(PrimInfo));
259 for (i=0; symbols1[i]!=0; i++) {
260 /* check whether the VM instruction i has the same code in
261 symbols1 and symbols2 (then it is relocatable). Also, for real
262 native code (not activated), check where the versions in
263 symbols1 and symbols 3 differ; these are places for
265 int prim_len = js1[i]-symbols1[i];
266 PrimInfo *pi=&priminfos[i];
268 char *s1 = (char *)symbols1[i];
269 char *s2 = (char *)symbols2[i];
270 char *s3 = (char *)symbols3[i];
271 Label endlabel = bsearch_next(s1+1,ks1sorted,npriminfos+1);
274 pi->superend = superend[i]|no_super;
275 pi->length = prim_len;
276 pi->restlength = endlabel - symbols1[i] - pi->length;
279 debugp(stderr, "%-15s %3d %p %p %p len=%3ld restlen=%2ld s-end=%1d",
280 prim_names[i], i, s1, s2, s3, (long)(pi->length), (long)(pi->restlength), pi->superend);
281 if (endlabel == NULL) {
282 pi->start = NULL; /* not relocatable */
283 if (pi->length<0) pi->length=1000;
284 debugp(stderr,"\n non_reloc: no K label > start found\n");
287 if (js1[i] > endlabel && !pi->superend) {
288 pi->start = NULL; /* not relocatable */
289 pi->length = endlabel-symbols1[i];
290 debugp(stderr,"\n non_reloc: there is a K label before the J label (restlength<0)\n");
293 if (js1[i] < pi->start && !pi->superend) {
294 pi->start = NULL; /* not relocatable */
295 pi->length = endlabel-symbols1[i];
296 debugp(stderr,"\n non_reloc: J label before I label (length<0)\n");
299 assert(pi->length>=0);
300 assert(pi->restlength >=0);
301 while (j<(pi->length+pi->restlength)) {
303 if (s1[j] != s2[j]) {
304 pi->start = NULL; /* not relocatable */
305 debugp(stderr,"\n non_reloc: engine1!=engine2 offset %3d",j);
306 /* assert(j<prim_len); */
311 /* only used for NO_IP native code generation */
312 /* here we find differences in inline arguments */
321 static bool is_relocatable(ptrint p)
323 return !opt_no_dynamic && priminfos[p].start != NULL;
327 void append_prim(codegendata *cd, ptrint p)
329 PrimInfo *pi = &priminfos[p];
330 debugp1(stderr,"append_prim %p %s\n",cd->lastmcodeptr, prim_names[p]);
331 if (cd->ncodeptr + pi->length + pi->restlength + goto_len >
332 cd->ncodebase + cd->ncodesize) {
333 cd->ncodeptr = codegen_ncode_increase(cd, cd->ncodeptr);
335 memcpy(cd->ncodeptr, pi->start, pi->length);
336 cd->ncodeptr += pi->length;
340 static void init_dynamic_super(codegendata *cd)
342 cd->lastpatcheroffset = -1;
343 cd->dynsuperm = -1; /* the next VM instr may be non-relocatable */
344 cd->dynsupern = cd->ncodeptr - cd->ncodebase;
348 /******************* -no-replication stuff **********************/
350 /* bugs: superinstructions are inserted into the table only after the
351 end of a method, so there may be replication within a method even
352 with -no-replication. Moreover, there is a race condition that can
353 cause varying amounts of replication between different threads;
354 this could only be avoided by eliminating the bug above and having
357 static u4 hash_superreuse(u1 *code, u4 length)
358 /* calculates a hash value for given code length */
363 for (i=0; i<(length&(~3)); i+=sizeof(u4)) {
364 r += *(s4 *)(code+i); /* !! align each superinstruction */
366 return (r+(r>>HASHTABLE_SUPERREUSE_BITS))&((1<<HASHTABLE_SUPERREUSE_BITS)-1);
369 static void superreuse_insert(u1 *code, u4 length)
371 u4 slot = hash_superreuse(code, length);
372 superreuse **listp = (superreuse **)&hashtable_superreuse.ptr[slot];
373 superreuse *sr = NEW(superreuse);
376 #if defined(USE_THREADS)
377 builtin_monitorenter(lock_hashtable_superreuse);
381 #if defined(USE_THREADS)
382 builtin_monitorexit(lock_hashtable_superreuse);
384 count_supers_unique++;
387 static u1 *superreuse_lookup(u1 *code, u4 length)
388 /* returns earlier instances code address, or NULL */
390 u4 slot = hash_superreuse(code, length);
391 superreuse *sr = (superreuse *)hashtable_superreuse.ptr[slot];
392 /* since there is another race condition anyway, we don't bother
393 locking here; both race conditions don't affect the correctness */
394 for (; sr != NULL; sr = sr->next)
395 if (length == sr->length && memcmp(code, sr->code, length)==0)
400 /******************* patcher stuff *******************************/
402 void patchersuper_rewrite(Inst *p)
403 /* p is the address of a patcher; if this is the last patcher in
404 a dynamic superinstruction, rewrite the threaded code to use
405 the dynamic superinstruction; the data for that comes from
406 hashtable_patchersupers */
408 ptrint key = (ptrint)p;
409 u4 slot = ((key + (key>>HASHTABLE_PATCHERSUPERS_BITS)) &
410 ((1<<HASHTABLE_PATCHERSUPERS_BITS)-1));
411 superstart **listp = (superstart **)&hashtable_patchersupers.ptr[slot];
413 count_patchers_exec++;
414 #if defined(USE_THREADS)
415 builtin_monitorenter(lock_hashtable_patchersupers);
417 for (; ss=*listp, ss!=NULL; listp = &(ss->next)) {
418 if (p == ((Inst *)(ss->mcodebase + ss->patcherm))) {
420 if (ss->oldsuper != NULL)
421 target = (Inst)ss->oldsuper;
423 target = (Inst)(ss->ncodebase + ss->dynsupern);
424 *(Inst *)(ss->mcodebase + ss->dynsuperm) = target;
425 debugp1(stderr, "patcher rewrote %p into %p\n",
426 ss->mcodebase + ss->dynsuperm, target);
428 FREE(ss, superstart);
429 count_patchers_last++;
433 #if defined(USE_THREADS)
434 builtin_monitorexit(lock_hashtable_patchersupers);
438 static void hashtable_patchersupers_insert(superstart *ss)
440 ptrint key = (ptrint)(ss->mcodebase + ss->patcherm);
441 u4 slot = ((key + (key>>HASHTABLE_PATCHERSUPERS_BITS)) &
442 ((1<<HASHTABLE_PATCHERSUPERS_BITS)-1));
443 void **listp = &hashtable_patchersupers.ptr[slot];
444 #if defined(USE_THREADS)
445 builtin_monitorenter(lock_hashtable_patchersupers);
447 ss->next = (superstart *)*listp;
449 #if defined(USE_THREADS)
450 builtin_monitorexit(lock_hashtable_patchersupers);
452 count_patchers_ins++;
455 void dynamic_super_rewrite(codegendata *cd)
456 /* rewrite the threaded code in the superstarts list to point to
457 the dynamic superinstructions */
459 superstart *ss = cd->superstarts;
461 for (; ss != NULL; ss = ssnext) {
463 if (opt_no_replication && ss->oldsuper == NULL)
464 superreuse_insert(cd->ncodebase + ss->dynsupern, ss->length);
465 if (ss->patcherm == -1) {
466 assert(ss->oldsuper == NULL);
467 *(Inst *)(cd->mcodebase + ss->dynsuperm) =
468 (Inst)(cd->ncodebase + ss->dynsupern);
469 debugp1(stderr, "rewrote %p into %p\n",
470 cd->mcodebase + ss->dynsuperm, cd->ncodebase + ss->dynsupern);
471 count_supers_nopatch++;
473 /* fprintf(stderr,"%p stage2\n", ss); */
474 ss->mcodebase = cd->mcodebase;
475 ss->ncodebase = cd->ncodebase;
476 hashtable_patchersupers_insert(ss);
477 count_supers_patch++;
482 static void new_dynamic_super(codegendata *cd)
483 /* add latest dynamic superinstruction to the appropriate table
484 for rewriting the threaded code either on relocation (if there
485 is no patcher), or when the last patcher is executed (if there
490 u1 *code = cd->ncodebase + cd->dynsupern;
491 u4 length = cd->ncodeptr - code;
493 if (opt_no_replication) {
494 oldsuper = superreuse_lookup(code, length);
495 if (oldsuper != NULL) {
496 count_supers_reused++;
497 count_native_saved += length;
499 if (cd->lastpatcheroffset == -1) {
500 *(Inst *)(cd->mcodebase + cd->dynsuperm) = (Inst)oldsuper;
501 debugp1(stderr, "rewrote %p into %p (reused)\n",
502 cd->mcodebase + ss->dynsuperm, oldsuper);
507 if (cd->lastpatcheroffset == -1) {
508 ss = DNEW(superstart);
510 ss = NEW(superstart);
512 /* fprintf(stderr,"%p stage1\n", ss); */
514 ss->patcherm = cd->lastpatcheroffset;
515 ss->oldsuper = oldsuper;
517 ss->dynsuperm = cd->dynsuperm;
518 ss->dynsupern = cd->dynsupern;
519 ss->next = cd->superstarts;
520 cd->superstarts = ss;
521 count_native_code += cd->ncodeptr - code;
524 void append_dispatch(codegendata *cd)
526 debugp1(stderr,"append_dispatch %p\n",cd->lastmcodeptr);
527 if (cd->lastinstwithoutdispatch != ~0) {
528 PrimInfo *pi = &priminfos[cd->lastinstwithoutdispatch];
530 memcpy(cd->ncodeptr, pi->start + pi->length, pi->restlength);
531 cd->ncodeptr += pi->restlength;
532 memcpy(cd->ncodeptr, before_goto, goto_len);
533 cd->ncodeptr += goto_len;
534 cd->lastinstwithoutdispatch = ~0;
535 new_dynamic_super(cd);
538 init_dynamic_super(cd);
541 static void compile_prim_dyn(codegendata *cd, ptrint p)
542 /* compile prim #p dynamically (mod flags etc.) */
546 assert(p<npriminfos);
547 if (!is_relocatable(p)) {
548 if (cd->lastinstwithoutdispatch != ~0) /* only count real dispatches */
549 count_disp_nonreloc++;
550 append_dispatch(cd); /* to previous dynamic superinstruction */
553 if (cd->dynsuperm == -1)
554 cd->dynsuperm = cd->lastmcodeptr - cd->mcodebase;
556 cd->lastinstwithoutdispatch = p;
557 if (priminfos[p].superend)
562 static void replace_patcher(codegendata *cd, ptrint p)
563 /* compile p dynamically, and note that there is a patcher here */
565 if (opt_no_quicksuper) {
568 cd->lastpatcheroffset = cd->lastmcodeptr - cd->mcodebase;
569 compile_prim_dyn(cd, p);
573 void gen_inst1(codegendata *cd, ptrint instr)
575 /* actually generate the threaded code instruction */
577 debugp1(stderr, "gen_inst %p, %s\n", cd->lastmcodeptr, prim_names[instr]);
578 *((Inst *) cd->lastmcodeptr) = vm_prim[instr];
580 case N_PATCHER_ACONST: replace_patcher(cd, N_ACONST1); break;
581 case N_PATCHER_ARRAYCHECKCAST: replace_patcher(cd, N_ARRAYCHECKCAST); break;
582 case N_PATCHER_GETSTATIC_INT: replace_patcher(cd, N_GETSTATIC_INT); break;
583 case N_PATCHER_GETSTATIC_LONG: replace_patcher(cd, N_GETSTATIC_LONG); break;
584 case N_PATCHER_GETSTATIC_CELL: replace_patcher(cd, N_GETSTATIC_CELL); break;
585 case N_PATCHER_PUTSTATIC_INT: replace_patcher(cd, N_PUTSTATIC_INT); break;
586 case N_PATCHER_PUTSTATIC_LONG: replace_patcher(cd, N_PUTSTATIC_LONG); break;
587 case N_PATCHER_PUTSTATIC_CELL: replace_patcher(cd, N_PUTSTATIC_CELL); break;
588 case N_PATCHER_GETFIELD_INT: replace_patcher(cd, N_GETFIELD_INT); break;
589 case N_PATCHER_GETFIELD_LONG: replace_patcher(cd, N_GETFIELD_LONG); break;
590 case N_PATCHER_GETFIELD_CELL: replace_patcher(cd, N_GETFIELD_CELL); break;
591 case N_PATCHER_PUTFIELD_INT: replace_patcher(cd, N_PUTFIELD_INT); break;
592 case N_PATCHER_PUTFIELD_LONG: replace_patcher(cd, N_PUTFIELD_LONG); break;
593 case N_PATCHER_PUTFIELD_CELL: replace_patcher(cd, N_PUTFIELD_CELL); break;
594 case N_PATCHER_MULTIANEWARRAY: replace_patcher(cd, N_MULTIANEWARRAY); break;
595 case N_PATCHER_INVOKESTATIC: replace_patcher(cd, N_INVOKESTATIC); break;
596 case N_PATCHER_INVOKESPECIAL: replace_patcher(cd, N_INVOKESPECIAL); break;
597 case N_PATCHER_INVOKEVIRTUAL: replace_patcher(cd, N_INVOKEVIRTUAL); break;
598 case N_PATCHER_INVOKEINTERFACE:replace_patcher(cd, N_INVOKEINTERFACE); break;
599 case N_PATCHER_CHECKCAST: replace_patcher(cd, N_CHECKCAST); break;
600 case N_PATCHER_INSTANCEOF: replace_patcher(cd, N_INSTANCEOF); break;
601 case N_PATCHER_NATIVECALL:
603 replace_patcher(cd, N_TRACENATIVECALL);
605 replace_patcher(cd, N_NATIVECALL);
607 case N_TRANSLATE: break;
610 compile_prim_dyn(cd, instr);
616 void finish_ss(codegendata *cd)
617 /* conclude the last static super and compile it dynamically */
620 if (cd->lastmcodeptr != NULL) {
621 gen_inst1(cd, *(ptrint *)(cd->lastmcodeptr));
622 cd->lastmcodeptr = NULL;
628 void gen_inst(codegendata *cd, ptrint instr)
630 cd->lastmcodeptr = cd->mcodeptr;
631 gen_inst1(cd, instr);
632 cd->mcodeptr += sizeof(Inst);
633 cd->lastmcodeptr = NULL;
636 void gen_inst(codegendata *cd, ptrint instr)
638 /* vmgen-0.6.2 generates gen_... calls with Inst ** as first
639 parameter, but we need to pass in cd to make lastmcodeptr
641 ptrint *lastmcodeptr = (ptrint *)cd->lastmcodeptr;
643 if (lastmcodeptr != NULL) {
646 assert(lastmcodeptr < cd->mcodeptr && cd->mcodeptr < lastmcodeptr+40);
648 combo = peephole_opt(*lastmcodeptr, instr, peeptable);
651 *lastmcodeptr = combo;
657 /* actually generate the threaded code instruction */
659 *((Inst *) cd->mcodeptr) = instr;
660 cd->lastmcodeptr = cd->mcodeptr;
661 cd->mcodeptr += sizeof(Inst);
665 void print_dynamic_super_statistics(void)
667 dolog("count_supers = %d", count_supers );
668 dolog("count_supers_unique = %d", count_supers_unique );
669 dolog("count_supers_reused = %d", count_supers_reused );
670 dolog("count_patchers_exec = %d", count_patchers_exec );
671 dolog("count_patchers_last = %d", count_patchers_last );
672 dolog("count_patchers_ins = %d", count_patchers_ins );
673 dolog("count_patchers = %d", count_patchers );
674 dolog("count_supers_nopatch= %d", count_supers_nopatch);
675 dolog("count_supers_patch = %d", count_supers_patch );
676 dolog("count_dispatches = %d", count_dispatches );
677 dolog("count_disp_nonreloc = %d", count_disp_nonreloc );
678 dolog("count_insts_reloc = %d", count_insts_reloc );
679 dolog("count_insts = %d", count_insts );
680 dolog("count_native_code = %d", count_native_code );
681 dolog("count_native_saved = %d", count_native_saved );
684 void dynamic_super_init(void)
686 check_prims(vm_prim);
687 hashtable_create(&hashtable_patchersupers, 1<<HASHTABLE_PATCHERSUPERS_BITS);
688 if (opt_no_replication)
689 hashtable_create(&hashtable_superreuse, 1<<HASHTABLE_SUPERREUSE_BITS);
691 #if defined(USE_THREADS)
692 /* create patchersupers hashtable lock object */
694 lock_hashtable_patchersupers = NEW(java_objectheader);
695 lock_hashtable_superreuse = NEW(java_objectheader);
697 # if defined(NATIVE_THREADS)
698 initObjectLock(lock_hashtable_patchersupers);
699 initObjectLock(lock_hashtable_superreuse);