1 /* src/vm/jit/intrp/dynamic-super.c dynamic superinstruction support
3 Copyright (C) 1995,1996,1997,1998,2000,2003,2004 Free Software Foundation, Inc.
6 Copyright (C) 1996-2005, 2006, 2007 R. Grafl, A. Krall, C. Kruegel,
7 C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring,
8 E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich,
9 J. Wenninger, Institut f. Computersprachen - TU Wien
11 This file is part of CACAO.
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2, or (at
16 your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
30 #define NO_IP 0 /* proper native code, without interpreter's ip */
40 #include "mm/memory.h"
42 #if defined(ENABLE_THREADS)
43 # include "threads/native/lock.h"
45 # include "threads/none/lock.h"
48 #include "toolbox/hashtable.h"
49 #include "toolbox/logging.h"
51 #include "vm/jit/disass.h"
52 #include "vm/jit/intrp/intrp.h"
54 #include "vmcore/options.h"
57 s4 no_super=0; /* option: just use replication, but no dynamic superinsts */
59 static char MAYBE_UNUSED superend[]={
60 #include <java-superend.i>
63 const char * const prim_names[]={
64 #include <java-names.i>
68 #define INST_ADDR(_inst) N_##_inst
69 #include <java-labels.i>
76 Label start; /* NULL if not relocatable */
77 s4 length; /* only includes the jump iff superend is true*/
78 s4 restlength; /* length of the rest (i.e., the jump or (on superend) 0) */
79 char superend; /* true if primitive ends superinstruction, i.e.,
80 unconditional branch, execute, etc. */
83 s4 offset; /* offset of immarg within prim */
84 char rel; /* true if immarg is relative */
85 } immargs[MAX_IMMARGS];
93 typedef struct superstart {
94 struct superstart *next;
95 s4 patcherm; /* offset of patcher, -1 if super has no patcher */
96 u4 length; /* length of superinstruction */
97 u1 *oldsuper; /* reused superinstruction: NULL if there is none */
104 static hashtable hashtable_patchersupers;
105 #define HASHTABLE_PATCHERSUPERS_BITS 14
107 #if defined(ENABLE_THREADS)
108 static java_objectheader *lock_hashtable_patchersupers;
111 /* stuff for -no-replication */
112 typedef struct superreuse {
113 struct superreuse *next;
118 static hashtable hashtable_superreuse;
119 #define HASHTABLE_SUPERREUSE_BITS 14
121 #if defined(ENABLE_THREADS)
122 static java_objectheader *lock_hashtable_superreuse;
125 # define debugp(x...) if (opt_verbose) fprintf(x)
126 #define debugp1(x...)
128 /* statistics variables */
130 u4 count_supers = 0; /* dynamic superinstructions, including replicas */
131 u4 count_supers_unique = 0; /* dynamic superinstructions, without replicas */
132 u4 count_supers_reused = 0; /* reused dynamic superinstructions */
133 u4 count_patchers_exec = 0; /* executed patchers */
134 u4 count_patchers_last = 0; /* executed last patchers */
135 u4 count_patchers_ins = 0; /* patchers inserted in patchersupers table */
136 u4 count_patchers = 0; /* superstarts with patcherm!=-1 */
137 u4 count_supers_nopatch= 0; /* superinstructions for code without patchers */
138 u4 count_supers_patch = 0; /* superinstructions for code with patchers */
139 u4 count_dispatches = 0; /* dynamic superinstructions generated */
140 u4 count_disp_nonreloc = 0; /* */
141 u4 count_insts_reloc = 0; /* relocatable insts compiled (append_prim) */
142 u4 count_insts = 0; /* compiled insts (gen_inst) */
143 u4 count_native_code = 0; /* native code bytes */
144 u4 count_native_saved = 0; /* reclaimed native code */
146 /* determine priminfos */
149 java_objectheader *engine2(Inst *ip0, Cell * sp, Cell * fp);
151 static int compare_labels(const void *pa, const void *pb)
153 char *a = *(char **)pa;
154 char *b = *(char **)pb;
158 static Label bsearch_next(Label key, Label *a, u4 n)
159 /* a is sorted; return the label >=key that is the closest in a;
160 return NULL if there is no label in a >=key */
172 return bsearch_next(key, a+mid+1, n-mid-1);
174 return bsearch_next(key, a, mid+1);
178 each VM instruction <x> looks like this:
181 I_<x>: /* entry point */
183 J_<x>: /* start of dispatch */
184 NEXT_P1_5; /* dispatch except GOTO */
185 K_<x>: /* just before goto */
186 DO_GOTO; /* goto part of dispatch */
189 * We need I_<x> for threaded code: this is the code address stored in
190 * direct threaded code.
192 * We need the <skip> to detect non-relocatability (by comparing
193 * engine and engine2 with different skips). H_<x> is there to ensure
194 * that gcc does not think that <skip> is dead code.
196 * We need J_<x> to implement dynamic superinstructions: copy
197 * everything between I_<x> and J_<x> to get a VM instruction without
200 * At the end of a dynamic superinstruction we need a dispatch. We
201 * need the DO_GOTO to work around gcc bug 15242: we copy everything
202 * up to K_<x> and then copy the indirect jump from &&before_goto.
207 static void check_prims(Label symbols1[])
210 Label *symbols2, *symbols3, *js1, *ks1, *ks1sorted;
211 Label after_goto, before_goto2;
215 fprintf(stderr, "Compiled with gcc-" __VERSION__ "\n");
217 for (i=0; symbols1[i]!=0; i++)
224 symbols2=(Inst *)engine2(0,0,0);
226 symbols3=(Inst *)engine3(0,0,0);
232 before_goto = ks1[i+1]; /* after ks and after after_last */
233 after_goto = ks1[i+2];
234 before_goto2 = (ks1+(symbols2-symbols1))[i+1];
235 /* some gccs reorder the code; below we look for the next K label
236 with binary search, and whether it belongs to the current
237 instruction; prepare for this by sorting the K labels */
238 ks1sorted = (Label *)alloca((i+1)*sizeof(Label));
239 MCOPY(ks1sorted,ks1,Label,i+1);
240 qsort(ks1sorted, i+1, sizeof(Label), compare_labels);
242 /* check whether the "goto *" is relocatable */
243 goto_len = after_goto-before_goto;
244 debugp(stderr, "goto * %p %p len=%d\n",
245 before_goto,before_goto2,goto_len);
246 if (memcmp(before_goto, before_goto2, goto_len)!=0) { /* unequal */
247 opt_no_dynamic = true;
248 debugp(stderr," not relocatable, disabling dynamic code generation\n");
252 priminfos = calloc(i,sizeof(PrimInfo));
253 for (i=0; symbols1[i]!=0; i++) {
254 /* check whether the VM instruction i has the same code in
255 symbols1 and symbols2 (then it is relocatable). Also, for real
256 native code (not activated), check where the versions in
257 symbols1 and symbols 3 differ; these are places for
259 int prim_len = js1[i]-symbols1[i];
260 PrimInfo *pi=&priminfos[i];
262 char *s1 = (char *)symbols1[i];
263 char *s2 = (char *)symbols2[i];
264 char *s3 = (char *)symbols3[i];
265 Label endlabel = bsearch_next(s1+1,ks1sorted,npriminfos+1);
268 pi->superend = superend[i]|no_super;
269 pi->length = prim_len;
270 pi->restlength = endlabel - symbols1[i] - pi->length;
273 debugp(stderr, "%-15s %3d %p %p %p len=%3ld restlen=%2ld s-end=%1d",
274 prim_names[i], i, s1, s2, s3, (long)(pi->length), (long)(pi->restlength), pi->superend);
275 if (endlabel == NULL) {
276 pi->start = NULL; /* not relocatable */
277 if (pi->length<0) pi->length=1000;
278 debugp(stderr,"\n non_reloc: no K label > start found\n");
281 if (js1[i] > endlabel && !pi->superend) {
282 pi->start = NULL; /* not relocatable */
283 pi->length = endlabel-symbols1[i];
284 debugp(stderr,"\n non_reloc: there is a K label before the J label (restlength<0)\n");
287 if (js1[i] < pi->start && !pi->superend) {
288 pi->start = NULL; /* not relocatable */
289 pi->length = endlabel-symbols1[i];
290 debugp(stderr,"\n non_reloc: J label before I label (length<0)\n");
293 assert(pi->length>=0);
294 assert(pi->restlength >=0);
295 while (j<(pi->length+pi->restlength)) {
297 if (s1[j] != s2[j]) {
298 pi->start = NULL; /* not relocatable */
299 debugp(stderr,"\n non_reloc: engine1!=engine2 offset %3d",j);
300 /* assert(j<prim_len); */
305 /* only used for NO_IP native code generation */
306 /* here we find differences in inline arguments */
315 static bool is_relocatable(ptrint p)
317 return !opt_no_dynamic && priminfos[p].start != NULL;
321 void append_prim(codegendata *cd, ptrint p)
323 PrimInfo *pi = &priminfos[p];
324 debugp1(stderr,"append_prim %p %s\n",cd->lastmcodeptr, prim_names[p]);
325 if (cd->ncodeptr + pi->length + pi->restlength + goto_len >
326 cd->ncodebase + cd->ncodesize) {
327 cd->ncodeptr = codegen_ncode_increase(cd, cd->ncodeptr);
329 memcpy(cd->ncodeptr, pi->start, pi->length);
330 cd->ncodeptr += pi->length;
334 static void init_dynamic_super(codegendata *cd)
336 cd->lastpatcheroffset = -1;
337 cd->dynsuperm = -1; /* the next VM instr may be non-relocatable */
338 cd->dynsupern = cd->ncodeptr - cd->ncodebase;
342 /******************* -no-replication stuff **********************/
344 /* bugs: superinstructions are inserted into the table only after the
345 end of a method, so there may be replication within a method even
346 with -no-replication. Moreover, there is a race condition that can
347 cause varying amounts of replication between different threads;
348 this could only be avoided by eliminating the bug above and having
351 static u4 hash_superreuse(u1 *code, u4 length)
352 /* calculates a hash value for given code length */
357 for (i=0; i<(length&(~3)); i+=sizeof(u4)) {
358 r += *(s4 *)(code+i); /* !! align each superinstruction */
360 return (r+(r>>HASHTABLE_SUPERREUSE_BITS))&((1<<HASHTABLE_SUPERREUSE_BITS)-1);
363 static void superreuse_insert(u1 *code, u4 length)
365 u4 slot = hash_superreuse(code, length);
366 superreuse **listp = (superreuse **)&hashtable_superreuse.ptr[slot];
367 superreuse *sr = NEW(superreuse);
371 LOCK_MONITOR_ENTER(lock_hashtable_superreuse);
376 LOCK_MONITOR_EXIT(lock_hashtable_superreuse);
378 count_supers_unique++;
381 static u1 *superreuse_lookup(u1 *code, u4 length)
382 /* returns earlier instances code address, or NULL */
384 u4 slot = hash_superreuse(code, length);
385 superreuse *sr = (superreuse *)hashtable_superreuse.ptr[slot];
386 /* since there is another race condition anyway, we don't bother
387 locking here; both race conditions don't affect the correctness */
388 for (; sr != NULL; sr = sr->next)
389 if (length == sr->length && memcmp(code, sr->code, length)==0)
394 /******************* patcher stuff *******************************/
396 void patchersuper_rewrite(Inst *p)
397 /* p is the address of a patcher; if this is the last patcher in
398 a dynamic superinstruction, rewrite the threaded code to use
399 the dynamic superinstruction; the data for that comes from
400 hashtable_patchersupers */
402 ptrint key = (ptrint)p;
403 u4 slot = ((key + (key>>HASHTABLE_PATCHERSUPERS_BITS)) &
404 ((1<<HASHTABLE_PATCHERSUPERS_BITS)-1));
405 superstart **listp = (superstart **)&hashtable_patchersupers.ptr[slot];
407 count_patchers_exec++;
409 LOCK_MONITOR_ENTER(lock_hashtable_patchersupers);
411 for (; ss=*listp, ss!=NULL; listp = &(ss->next)) {
412 if (p == ((Inst *)(ss->mcodebase + ss->patcherm))) {
414 if (ss->oldsuper != NULL)
415 target = (Inst)ss->oldsuper;
417 target = (Inst)(ss->ncodebase + ss->dynsupern);
418 *(Inst *)(ss->mcodebase + ss->dynsuperm) = target;
419 debugp1(stderr, "patcher rewrote %p into %p\n",
420 ss->mcodebase + ss->dynsuperm, target);
422 FREE(ss, superstart);
423 count_patchers_last++;
428 LOCK_MONITOR_EXIT(lock_hashtable_patchersupers);
431 static void hashtable_patchersupers_insert(superstart *ss)
433 ptrint key = (ptrint)(ss->mcodebase + ss->patcherm);
434 u4 slot = ((key + (key>>HASHTABLE_PATCHERSUPERS_BITS)) &
435 ((1<<HASHTABLE_PATCHERSUPERS_BITS)-1));
436 void **listp = &hashtable_patchersupers.ptr[slot];
438 LOCK_MONITOR_ENTER(lock_hashtable_patchersupers);
440 ss->next = (superstart *)*listp;
443 LOCK_MONITOR_EXIT(lock_hashtable_patchersupers);
445 count_patchers_ins++;
448 void dynamic_super_rewrite(codegendata *cd)
449 /* rewrite the threaded code in the superstarts list to point to
450 the dynamic superinstructions */
452 superstart *ss = cd->superstarts;
454 for (; ss != NULL; ss = ssnext) {
456 if (opt_no_replication && ss->oldsuper == NULL)
457 superreuse_insert(cd->ncodebase + ss->dynsupern, ss->length);
458 if (ss->patcherm == -1) {
459 assert(ss->oldsuper == NULL);
460 *(Inst *)(cd->mcodebase + ss->dynsuperm) =
461 (Inst)(cd->ncodebase + ss->dynsupern);
462 debugp1(stderr, "rewrote %p into %p\n",
463 cd->mcodebase + ss->dynsuperm, cd->ncodebase + ss->dynsupern);
464 count_supers_nopatch++;
466 /* fprintf(stderr,"%p stage2\n", ss); */
467 ss->mcodebase = cd->mcodebase;
468 ss->ncodebase = cd->ncodebase;
469 hashtable_patchersupers_insert(ss);
470 count_supers_patch++;
475 static void new_dynamic_super(codegendata *cd)
476 /* add latest dynamic superinstruction to the appropriate table
477 for rewriting the threaded code either on relocation (if there
478 is no patcher), or when the last patcher is executed (if there
483 u1 *code = cd->ncodebase + cd->dynsupern;
484 u4 length = cd->ncodeptr - code;
486 if (opt_no_replication) {
487 oldsuper = superreuse_lookup(code, length);
488 if (oldsuper != NULL) {
489 count_supers_reused++;
490 count_native_saved += length;
492 if (cd->lastpatcheroffset == -1) {
493 *(Inst *)(cd->mcodebase + cd->dynsuperm) = (Inst)oldsuper;
494 debugp1(stderr, "rewrote %p into %p (reused)\n",
495 cd->mcodebase + ss->dynsuperm, oldsuper);
500 if (cd->lastpatcheroffset == -1) {
501 ss = DNEW(superstart);
503 ss = NEW(superstart);
505 /* fprintf(stderr,"%p stage1\n", ss); */
507 ss->patcherm = cd->lastpatcheroffset;
508 ss->oldsuper = oldsuper;
510 ss->dynsuperm = cd->dynsuperm;
511 ss->dynsupern = cd->dynsupern;
512 ss->next = cd->superstarts;
513 cd->superstarts = ss;
514 count_native_code += cd->ncodeptr - code;
517 void append_dispatch(codegendata *cd)
519 debugp1(stderr,"append_dispatch %p\n",cd->lastmcodeptr);
520 if (cd->lastinstwithoutdispatch != ~0) {
521 PrimInfo *pi = &priminfos[cd->lastinstwithoutdispatch];
523 memcpy(cd->ncodeptr, pi->start + pi->length, pi->restlength);
524 cd->ncodeptr += pi->restlength;
525 memcpy(cd->ncodeptr, before_goto, goto_len);
526 cd->ncodeptr += goto_len;
527 cd->lastinstwithoutdispatch = ~0;
528 new_dynamic_super(cd);
531 init_dynamic_super(cd);
534 static void compile_prim_dyn(codegendata *cd, ptrint p)
535 /* compile prim #p dynamically (mod flags etc.) */
539 assert(p<npriminfos);
540 if (!is_relocatable(p)) {
541 if (cd->lastinstwithoutdispatch != ~0) /* only count real dispatches */
542 count_disp_nonreloc++;
543 append_dispatch(cd); /* to previous dynamic superinstruction */
546 if (cd->dynsuperm == -1)
547 cd->dynsuperm = cd->lastmcodeptr - cd->mcodebase;
549 cd->lastinstwithoutdispatch = p;
550 if (priminfos[p].superend)
555 static void replace_patcher(codegendata *cd, ptrint p)
556 /* compile p dynamically, and note that there is a patcher here */
558 if (opt_no_quicksuper) {
561 cd->lastpatcheroffset = cd->lastmcodeptr - cd->mcodebase;
562 compile_prim_dyn(cd, p);
566 void gen_inst1(codegendata *cd, ptrint instr)
568 /* actually generate the threaded code instruction */
570 debugp1(stderr, "gen_inst %p, %s\n", cd->lastmcodeptr, prim_names[instr]);
571 *((Inst *) cd->lastmcodeptr) = vm_prim[instr];
573 case N_PATCHER_ACONST: replace_patcher(cd, N_ACONST1); break;
574 case N_PATCHER_ARRAYCHECKCAST: replace_patcher(cd, N_ARRAYCHECKCAST); break;
575 case N_PATCHER_GETSTATIC_INT: replace_patcher(cd, N_GETSTATIC_INT); break;
576 case N_PATCHER_GETSTATIC_LONG: replace_patcher(cd, N_GETSTATIC_LONG); break;
577 case N_PATCHER_GETSTATIC_CELL: replace_patcher(cd, N_GETSTATIC_CELL); break;
578 case N_PATCHER_PUTSTATIC_INT: replace_patcher(cd, N_PUTSTATIC_INT); break;
579 case N_PATCHER_PUTSTATIC_LONG: replace_patcher(cd, N_PUTSTATIC_LONG); break;
580 case N_PATCHER_PUTSTATIC_CELL: replace_patcher(cd, N_PUTSTATIC_CELL); break;
581 case N_PATCHER_GETFIELD_INT: replace_patcher(cd, N_GETFIELD_INT); break;
582 case N_PATCHER_GETFIELD_LONG: replace_patcher(cd, N_GETFIELD_LONG); break;
583 case N_PATCHER_GETFIELD_CELL: replace_patcher(cd, N_GETFIELD_CELL); break;
584 case N_PATCHER_PUTFIELD_INT: replace_patcher(cd, N_PUTFIELD_INT); break;
585 case N_PATCHER_PUTFIELD_LONG: replace_patcher(cd, N_PUTFIELD_LONG); break;
586 case N_PATCHER_PUTFIELD_CELL: replace_patcher(cd, N_PUTFIELD_CELL); break;
587 case N_PATCHER_MULTIANEWARRAY: replace_patcher(cd, N_MULTIANEWARRAY); break;
588 case N_PATCHER_INVOKESTATIC: replace_patcher(cd, N_INVOKESTATIC); break;
589 case N_PATCHER_INVOKESPECIAL: replace_patcher(cd, N_INVOKESPECIAL); break;
590 case N_PATCHER_INVOKEVIRTUAL: replace_patcher(cd, N_INVOKEVIRTUAL); break;
591 case N_PATCHER_INVOKEINTERFACE:replace_patcher(cd, N_INVOKEINTERFACE); break;
592 case N_PATCHER_CHECKCAST: replace_patcher(cd, N_CHECKCAST); break;
593 case N_PATCHER_INSTANCEOF: replace_patcher(cd, N_INSTANCEOF); break;
594 case N_PATCHER_NATIVECALL:
596 replace_patcher(cd, N_TRACENATIVECALL);
598 replace_patcher(cd, N_NATIVECALL);
600 case N_TRANSLATE: break;
603 compile_prim_dyn(cd, instr);
609 void finish_ss(codegendata *cd)
610 /* conclude the last static super and compile it dynamically */
613 if (cd->lastmcodeptr != NULL) {
614 gen_inst1(cd, *(ptrint *)(cd->lastmcodeptr));
615 cd->lastmcodeptr = NULL;
621 void gen_inst(codegendata *cd, ptrint instr)
623 cd->lastmcodeptr = cd->mcodeptr;
624 gen_inst1(cd, instr);
625 cd->mcodeptr += sizeof(Inst);
626 cd->lastmcodeptr = NULL;
629 void gen_inst(codegendata *cd, ptrint instr)
631 /* vmgen-0.6.2 generates gen_... calls with Inst ** as first
632 parameter, but we need to pass in cd to make lastmcodeptr
634 ptrint *lastmcodeptr = (ptrint *)cd->lastmcodeptr;
636 if (lastmcodeptr != NULL) {
639 assert((u1 *) lastmcodeptr < cd->mcodeptr &&
640 cd->mcodeptr < (u1 *) (lastmcodeptr + 40));
642 combo = peephole_opt(*lastmcodeptr, instr, peeptable);
645 *lastmcodeptr = combo;
651 /* actually generate the threaded code instruction */
653 *((Inst *) cd->mcodeptr) = (Inst) instr;
655 cd->lastmcodeptr = cd->mcodeptr;
656 cd->mcodeptr += sizeof(Inst);
660 void print_dynamic_super_statistics(void)
662 dolog("count_supers = %d", count_supers );
663 dolog("count_supers_unique = %d", count_supers_unique );
664 dolog("count_supers_reused = %d", count_supers_reused );
665 dolog("count_patchers_exec = %d", count_patchers_exec );
666 dolog("count_patchers_last = %d", count_patchers_last );
667 dolog("count_patchers_ins = %d", count_patchers_ins );
668 dolog("count_patchers = %d", count_patchers );
669 dolog("count_supers_nopatch= %d", count_supers_nopatch);
670 dolog("count_supers_patch = %d", count_supers_patch );
671 dolog("count_dispatches = %d", count_dispatches );
672 dolog("count_disp_nonreloc = %d", count_disp_nonreloc );
673 dolog("count_insts_reloc = %d", count_insts_reloc );
674 dolog("count_insts = %d", count_insts );
675 dolog("count_native_code = %d", count_native_code );
676 dolog("count_native_saved = %d", count_native_saved );
679 #if defined(ENABLE_DISASSEMBLER)
680 void disassemble_prim(int n)
682 PrimInfo *p = &(priminfos[n]);
683 u1 *start = vm_prim[n];
685 #if defined(ENABLE_JIT)
686 printf("%s (len=%d, restlen=%d):\n",prim_names[n],p->length, p->restlength);
687 disassemble(start, start + p->length + p->restlength);
690 #endif /* defined(ENABLE_DISASSEMBLER) */
692 void dynamic_super_init(void)
694 check_prims(vm_prim);
696 #if defined(ENABLE_DISASSEMBLER)
698 disassemble_prim(N_IADD);
699 disassemble_prim(N_ILOAD);
700 disassemble_prim(N_GETFIELD_INT);
704 hashtable_create(&hashtable_patchersupers, 1<<HASHTABLE_PATCHERSUPERS_BITS);
705 if (opt_no_replication)
706 hashtable_create(&hashtable_superreuse, 1<<HASHTABLE_SUPERREUSE_BITS);
708 #if defined(ENABLE_THREADS)
709 /* create patchersupers hashtable lock object */
711 lock_hashtable_patchersupers = NEW(java_objectheader);
712 lock_hashtable_superreuse = NEW(java_objectheader);
714 lock_init_object_lock(lock_hashtable_patchersupers);
715 lock_init_object_lock(lock_hashtable_superreuse);