1 /* src/vm/jit/intrp/dynamic-super.c dynamic superinstruction support
3 Copyright (C) 1995,1996,1997,1998,2000,2003,2004 Free Software Foundation, Inc.
6 Copyright (C) 1996-2005, 2006, 2007 R. Grafl, A. Krall, C. Kruegel,
7 C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring,
8 E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich,
9 J. Wenninger, Institut f. Computersprachen - TU Wien
11 This file is part of CACAO.
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2, or (at
16 your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 $Id: dynamic-super.c 7366 2007-02-15 19:48:11Z twisti $
32 #define NO_IP 0 /* proper native code, without interpreter's ip */
42 #include "mm/memory.h"
44 #if defined(ENABLE_THREADS)
45 # include "threads/native/lock.h"
47 # include "threads/none/lock.h"
50 #include "toolbox/hashtable.h"
51 #include "toolbox/logging.h"
53 #include "vm/jit/disass.h"
54 #include "vm/jit/intrp/intrp.h"
56 #include "vmcore/options.h"
59 s4 no_super=0; /* option: just use replication, but no dynamic superinsts */
61 static char MAYBE_UNUSED superend[]={
62 #include <java-superend.i>
65 const char * const prim_names[]={
66 #include <java-names.i>
70 #define INST_ADDR(_inst) N_##_inst
71 #include <java-labels.i>
78 Label start; /* NULL if not relocatable */
79 s4 length; /* only includes the jump iff superend is true*/
80 s4 restlength; /* length of the rest (i.e., the jump or (on superend) 0) */
81 char superend; /* true if primitive ends superinstruction, i.e.,
82 unconditional branch, execute, etc. */
85 s4 offset; /* offset of immarg within prim */
86 char rel; /* true if immarg is relative */
87 } immargs[MAX_IMMARGS];
95 typedef struct superstart {
96 struct superstart *next;
97 s4 patcherm; /* offset of patcher, -1 if super has no patcher */
98 u4 length; /* length of superinstruction */
99 u1 *oldsuper; /* reused superinstruction: NULL if there is none */
106 static hashtable hashtable_patchersupers;
107 #define HASHTABLE_PATCHERSUPERS_BITS 14
109 #if defined(ENABLE_THREADS)
110 static java_objectheader *lock_hashtable_patchersupers;
113 /* stuff for -no-replication */
114 typedef struct superreuse {
115 struct superreuse *next;
120 static hashtable hashtable_superreuse;
121 #define HASHTABLE_SUPERREUSE_BITS 14
123 #if defined(ENABLE_THREADS)
124 static java_objectheader *lock_hashtable_superreuse;
127 # define debugp(x...) if (opt_verbose) fprintf(x)
128 #define debugp1(x...)
130 /* statistics variables */
132 u4 count_supers = 0; /* dynamic superinstructions, including replicas */
133 u4 count_supers_unique = 0; /* dynamic superinstructions, without replicas */
134 u4 count_supers_reused = 0; /* reused dynamic superinstructions */
135 u4 count_patchers_exec = 0; /* executed patchers */
136 u4 count_patchers_last = 0; /* executed last patchers */
137 u4 count_patchers_ins = 0; /* patchers inserted in patchersupers table */
138 u4 count_patchers = 0; /* superstarts with patcherm!=-1 */
139 u4 count_supers_nopatch= 0; /* superinstructions for code without patchers */
140 u4 count_supers_patch = 0; /* superinstructions for code with patchers */
141 u4 count_dispatches = 0; /* dynamic superinstructions generated */
142 u4 count_disp_nonreloc = 0; /* */
143 u4 count_insts_reloc = 0; /* relocatable insts compiled (append_prim) */
144 u4 count_insts = 0; /* compiled insts (gen_inst) */
145 u4 count_native_code = 0; /* native code bytes */
146 u4 count_native_saved = 0; /* reclaimed native code */
148 /* determine priminfos */
151 java_objectheader *engine2(Inst *ip0, Cell * sp, Cell * fp);
153 static int compare_labels(const void *pa, const void *pb)
155 char *a = *(char **)pa;
156 char *b = *(char **)pb;
160 static Label bsearch_next(Label key, Label *a, u4 n)
161 /* a is sorted; return the label >=key that is the closest in a;
162 return NULL if there is no label in a >=key */
174 return bsearch_next(key, a+mid+1, n-mid-1);
176 return bsearch_next(key, a, mid+1);
180 each VM instruction <x> looks like this:
183 I_<x>: /* entry point */
185 J_<x>: /* start of dispatch */
186 NEXT_P1_5; /* dispatch except GOTO */
187 K_<x>: /* just before goto */
188 DO_GOTO; /* goto part of dispatch */
191 * We need I_<x> for threaded code: this is the code address stored in
192 * direct threaded code.
194 * We need the <skip> to detect non-relocatability (by comparing
195 * engine and engine2 with different skips). H_<x> is there to ensure
196 * that gcc does not think that <skip> is dead code.
198 * We need J_<x> to implement dynamic superinstructions: copy
199 * everything between I_<x> and J_<x> to get a VM instruction without
202 * At the end of a dynamic superinstruction we need a dispatch. We
203 * need the DO_GOTO to work around gcc bug 15242: we copy everything
204 * up to K_<x> and then copy the indirect jump from &&before_goto.
209 static void check_prims(Label symbols1[])
212 Label *symbols2, *symbols3, *js1, *ks1, *ks1sorted;
213 Label after_goto, before_goto2;
217 fprintf(stderr, "Compiled with gcc-" __VERSION__ "\n");
219 for (i=0; symbols1[i]!=0; i++)
226 symbols2=(Inst *)engine2(0,0,0);
228 symbols3=(Inst *)engine3(0,0,0);
234 before_goto = ks1[i+1]; /* after ks and after after_last */
235 after_goto = ks1[i+2];
236 before_goto2 = (ks1+(symbols2-symbols1))[i+1];
237 /* some gccs reorder the code; below we look for the next K label
238 with binary search, and whether it belongs to the current
239 instruction; prepare for this by sorting the K labels */
240 ks1sorted = (Label *)alloca((i+1)*sizeof(Label));
241 MCOPY(ks1sorted,ks1,Label,i+1);
242 qsort(ks1sorted, i+1, sizeof(Label), compare_labels);
244 /* check whether the "goto *" is relocatable */
245 goto_len = after_goto-before_goto;
246 debugp(stderr, "goto * %p %p len=%d\n",
247 before_goto,before_goto2,goto_len);
248 if (memcmp(before_goto, before_goto2, goto_len)!=0) { /* unequal */
249 opt_no_dynamic = true;
250 debugp(stderr," not relocatable, disabling dynamic code generation\n");
254 priminfos = calloc(i,sizeof(PrimInfo));
255 for (i=0; symbols1[i]!=0; i++) {
256 /* check whether the VM instruction i has the same code in
257 symbols1 and symbols2 (then it is relocatable). Also, for real
258 native code (not activated), check where the versions in
259 symbols1 and symbols 3 differ; these are places for
261 int prim_len = js1[i]-symbols1[i];
262 PrimInfo *pi=&priminfos[i];
264 char *s1 = (char *)symbols1[i];
265 char *s2 = (char *)symbols2[i];
266 char *s3 = (char *)symbols3[i];
267 Label endlabel = bsearch_next(s1+1,ks1sorted,npriminfos+1);
270 pi->superend = superend[i]|no_super;
271 pi->length = prim_len;
272 pi->restlength = endlabel - symbols1[i] - pi->length;
275 debugp(stderr, "%-15s %3d %p %p %p len=%3ld restlen=%2ld s-end=%1d",
276 prim_names[i], i, s1, s2, s3, (long)(pi->length), (long)(pi->restlength), pi->superend);
277 if (endlabel == NULL) {
278 pi->start = NULL; /* not relocatable */
279 if (pi->length<0) pi->length=1000;
280 debugp(stderr,"\n non_reloc: no K label > start found\n");
283 if (js1[i] > endlabel && !pi->superend) {
284 pi->start = NULL; /* not relocatable */
285 pi->length = endlabel-symbols1[i];
286 debugp(stderr,"\n non_reloc: there is a K label before the J label (restlength<0)\n");
289 if (js1[i] < pi->start && !pi->superend) {
290 pi->start = NULL; /* not relocatable */
291 pi->length = endlabel-symbols1[i];
292 debugp(stderr,"\n non_reloc: J label before I label (length<0)\n");
295 assert(pi->length>=0);
296 assert(pi->restlength >=0);
297 while (j<(pi->length+pi->restlength)) {
299 if (s1[j] != s2[j]) {
300 pi->start = NULL; /* not relocatable */
301 debugp(stderr,"\n non_reloc: engine1!=engine2 offset %3d",j);
302 /* assert(j<prim_len); */
307 /* only used for NO_IP native code generation */
308 /* here we find differences in inline arguments */
317 static bool is_relocatable(ptrint p)
319 return !opt_no_dynamic && priminfos[p].start != NULL;
323 void append_prim(codegendata *cd, ptrint p)
325 PrimInfo *pi = &priminfos[p];
326 debugp1(stderr,"append_prim %p %s\n",cd->lastmcodeptr, prim_names[p]);
327 if (cd->ncodeptr + pi->length + pi->restlength + goto_len >
328 cd->ncodebase + cd->ncodesize) {
329 cd->ncodeptr = codegen_ncode_increase(cd, cd->ncodeptr);
331 memcpy(cd->ncodeptr, pi->start, pi->length);
332 cd->ncodeptr += pi->length;
336 static void init_dynamic_super(codegendata *cd)
338 cd->lastpatcheroffset = -1;
339 cd->dynsuperm = -1; /* the next VM instr may be non-relocatable */
340 cd->dynsupern = cd->ncodeptr - cd->ncodebase;
344 /******************* -no-replication stuff **********************/
346 /* bugs: superinstructions are inserted into the table only after the
347 end of a method, so there may be replication within a method even
348 with -no-replication. Moreover, there is a race condition that can
349 cause varying amounts of replication between different threads;
350 this could only be avoided by eliminating the bug above and having
353 static u4 hash_superreuse(u1 *code, u4 length)
354 /* calculates a hash value for given code length */
359 for (i=0; i<(length&(~3)); i+=sizeof(u4)) {
360 r += *(s4 *)(code+i); /* !! align each superinstruction */
362 return (r+(r>>HASHTABLE_SUPERREUSE_BITS))&((1<<HASHTABLE_SUPERREUSE_BITS)-1);
365 static void superreuse_insert(u1 *code, u4 length)
367 u4 slot = hash_superreuse(code, length);
368 superreuse **listp = (superreuse **)&hashtable_superreuse.ptr[slot];
369 superreuse *sr = NEW(superreuse);
373 LOCK_MONITOR_ENTER(lock_hashtable_superreuse);
378 LOCK_MONITOR_EXIT(lock_hashtable_superreuse);
380 count_supers_unique++;
383 static u1 *superreuse_lookup(u1 *code, u4 length)
384 /* returns earlier instances code address, or NULL */
386 u4 slot = hash_superreuse(code, length);
387 superreuse *sr = (superreuse *)hashtable_superreuse.ptr[slot];
388 /* since there is another race condition anyway, we don't bother
389 locking here; both race conditions don't affect the correctness */
390 for (; sr != NULL; sr = sr->next)
391 if (length == sr->length && memcmp(code, sr->code, length)==0)
396 /******************* patcher stuff *******************************/
398 void patchersuper_rewrite(Inst *p)
399 /* p is the address of a patcher; if this is the last patcher in
400 a dynamic superinstruction, rewrite the threaded code to use
401 the dynamic superinstruction; the data for that comes from
402 hashtable_patchersupers */
404 ptrint key = (ptrint)p;
405 u4 slot = ((key + (key>>HASHTABLE_PATCHERSUPERS_BITS)) &
406 ((1<<HASHTABLE_PATCHERSUPERS_BITS)-1));
407 superstart **listp = (superstart **)&hashtable_patchersupers.ptr[slot];
409 count_patchers_exec++;
411 LOCK_MONITOR_ENTER(lock_hashtable_patchersupers);
413 for (; ss=*listp, ss!=NULL; listp = &(ss->next)) {
414 if (p == ((Inst *)(ss->mcodebase + ss->patcherm))) {
416 if (ss->oldsuper != NULL)
417 target = (Inst)ss->oldsuper;
419 target = (Inst)(ss->ncodebase + ss->dynsupern);
420 *(Inst *)(ss->mcodebase + ss->dynsuperm) = target;
421 debugp1(stderr, "patcher rewrote %p into %p\n",
422 ss->mcodebase + ss->dynsuperm, target);
424 FREE(ss, superstart);
425 count_patchers_last++;
430 LOCK_MONITOR_EXIT(lock_hashtable_patchersupers);
433 static void hashtable_patchersupers_insert(superstart *ss)
435 ptrint key = (ptrint)(ss->mcodebase + ss->patcherm);
436 u4 slot = ((key + (key>>HASHTABLE_PATCHERSUPERS_BITS)) &
437 ((1<<HASHTABLE_PATCHERSUPERS_BITS)-1));
438 void **listp = &hashtable_patchersupers.ptr[slot];
440 LOCK_MONITOR_ENTER(lock_hashtable_patchersupers);
442 ss->next = (superstart *)*listp;
445 LOCK_MONITOR_EXIT(lock_hashtable_patchersupers);
447 count_patchers_ins++;
450 void dynamic_super_rewrite(codegendata *cd)
451 /* rewrite the threaded code in the superstarts list to point to
452 the dynamic superinstructions */
454 superstart *ss = cd->superstarts;
456 for (; ss != NULL; ss = ssnext) {
458 if (opt_no_replication && ss->oldsuper == NULL)
459 superreuse_insert(cd->ncodebase + ss->dynsupern, ss->length);
460 if (ss->patcherm == -1) {
461 assert(ss->oldsuper == NULL);
462 *(Inst *)(cd->mcodebase + ss->dynsuperm) =
463 (Inst)(cd->ncodebase + ss->dynsupern);
464 debugp1(stderr, "rewrote %p into %p\n",
465 cd->mcodebase + ss->dynsuperm, cd->ncodebase + ss->dynsupern);
466 count_supers_nopatch++;
468 /* fprintf(stderr,"%p stage2\n", ss); */
469 ss->mcodebase = cd->mcodebase;
470 ss->ncodebase = cd->ncodebase;
471 hashtable_patchersupers_insert(ss);
472 count_supers_patch++;
477 static void new_dynamic_super(codegendata *cd)
478 /* add latest dynamic superinstruction to the appropriate table
479 for rewriting the threaded code either on relocation (if there
480 is no patcher), or when the last patcher is executed (if there
485 u1 *code = cd->ncodebase + cd->dynsupern;
486 u4 length = cd->ncodeptr - code;
488 if (opt_no_replication) {
489 oldsuper = superreuse_lookup(code, length);
490 if (oldsuper != NULL) {
491 count_supers_reused++;
492 count_native_saved += length;
494 if (cd->lastpatcheroffset == -1) {
495 *(Inst *)(cd->mcodebase + cd->dynsuperm) = (Inst)oldsuper;
496 debugp1(stderr, "rewrote %p into %p (reused)\n",
497 cd->mcodebase + ss->dynsuperm, oldsuper);
502 if (cd->lastpatcheroffset == -1) {
503 ss = DNEW(superstart);
505 ss = NEW(superstart);
507 /* fprintf(stderr,"%p stage1\n", ss); */
509 ss->patcherm = cd->lastpatcheroffset;
510 ss->oldsuper = oldsuper;
512 ss->dynsuperm = cd->dynsuperm;
513 ss->dynsupern = cd->dynsupern;
514 ss->next = cd->superstarts;
515 cd->superstarts = ss;
516 count_native_code += cd->ncodeptr - code;
519 void append_dispatch(codegendata *cd)
521 debugp1(stderr,"append_dispatch %p\n",cd->lastmcodeptr);
522 if (cd->lastinstwithoutdispatch != ~0) {
523 PrimInfo *pi = &priminfos[cd->lastinstwithoutdispatch];
525 memcpy(cd->ncodeptr, pi->start + pi->length, pi->restlength);
526 cd->ncodeptr += pi->restlength;
527 memcpy(cd->ncodeptr, before_goto, goto_len);
528 cd->ncodeptr += goto_len;
529 cd->lastinstwithoutdispatch = ~0;
530 new_dynamic_super(cd);
533 init_dynamic_super(cd);
536 static void compile_prim_dyn(codegendata *cd, ptrint p)
537 /* compile prim #p dynamically (mod flags etc.) */
541 assert(p<npriminfos);
542 if (!is_relocatable(p)) {
543 if (cd->lastinstwithoutdispatch != ~0) /* only count real dispatches */
544 count_disp_nonreloc++;
545 append_dispatch(cd); /* to previous dynamic superinstruction */
548 if (cd->dynsuperm == -1)
549 cd->dynsuperm = cd->lastmcodeptr - cd->mcodebase;
551 cd->lastinstwithoutdispatch = p;
552 if (priminfos[p].superend)
557 static void replace_patcher(codegendata *cd, ptrint p)
558 /* compile p dynamically, and note that there is a patcher here */
560 if (opt_no_quicksuper) {
563 cd->lastpatcheroffset = cd->lastmcodeptr - cd->mcodebase;
564 compile_prim_dyn(cd, p);
568 void gen_inst1(codegendata *cd, ptrint instr)
570 /* actually generate the threaded code instruction */
572 debugp1(stderr, "gen_inst %p, %s\n", cd->lastmcodeptr, prim_names[instr]);
573 *((Inst *) cd->lastmcodeptr) = vm_prim[instr];
575 case N_PATCHER_ACONST: replace_patcher(cd, N_ACONST1); break;
576 case N_PATCHER_ARRAYCHECKCAST: replace_patcher(cd, N_ARRAYCHECKCAST); break;
577 case N_PATCHER_GETSTATIC_INT: replace_patcher(cd, N_GETSTATIC_INT); break;
578 case N_PATCHER_GETSTATIC_LONG: replace_patcher(cd, N_GETSTATIC_LONG); break;
579 case N_PATCHER_GETSTATIC_CELL: replace_patcher(cd, N_GETSTATIC_CELL); break;
580 case N_PATCHER_PUTSTATIC_INT: replace_patcher(cd, N_PUTSTATIC_INT); break;
581 case N_PATCHER_PUTSTATIC_LONG: replace_patcher(cd, N_PUTSTATIC_LONG); break;
582 case N_PATCHER_PUTSTATIC_CELL: replace_patcher(cd, N_PUTSTATIC_CELL); break;
583 case N_PATCHER_GETFIELD_INT: replace_patcher(cd, N_GETFIELD_INT); break;
584 case N_PATCHER_GETFIELD_LONG: replace_patcher(cd, N_GETFIELD_LONG); break;
585 case N_PATCHER_GETFIELD_CELL: replace_patcher(cd, N_GETFIELD_CELL); break;
586 case N_PATCHER_PUTFIELD_INT: replace_patcher(cd, N_PUTFIELD_INT); break;
587 case N_PATCHER_PUTFIELD_LONG: replace_patcher(cd, N_PUTFIELD_LONG); break;
588 case N_PATCHER_PUTFIELD_CELL: replace_patcher(cd, N_PUTFIELD_CELL); break;
589 case N_PATCHER_MULTIANEWARRAY: replace_patcher(cd, N_MULTIANEWARRAY); break;
590 case N_PATCHER_INVOKESTATIC: replace_patcher(cd, N_INVOKESTATIC); break;
591 case N_PATCHER_INVOKESPECIAL: replace_patcher(cd, N_INVOKESPECIAL); break;
592 case N_PATCHER_INVOKEVIRTUAL: replace_patcher(cd, N_INVOKEVIRTUAL); break;
593 case N_PATCHER_INVOKEINTERFACE:replace_patcher(cd, N_INVOKEINTERFACE); break;
594 case N_PATCHER_CHECKCAST: replace_patcher(cd, N_CHECKCAST); break;
595 case N_PATCHER_INSTANCEOF: replace_patcher(cd, N_INSTANCEOF); break;
596 case N_PATCHER_NATIVECALL:
598 replace_patcher(cd, N_TRACENATIVECALL);
600 replace_patcher(cd, N_NATIVECALL);
602 case N_TRANSLATE: break;
605 compile_prim_dyn(cd, instr);
611 void finish_ss(codegendata *cd)
612 /* conclude the last static super and compile it dynamically */
615 if (cd->lastmcodeptr != NULL) {
616 gen_inst1(cd, *(ptrint *)(cd->lastmcodeptr));
617 cd->lastmcodeptr = NULL;
623 void gen_inst(codegendata *cd, ptrint instr)
625 cd->lastmcodeptr = cd->mcodeptr;
626 gen_inst1(cd, instr);
627 cd->mcodeptr += sizeof(Inst);
628 cd->lastmcodeptr = NULL;
631 void gen_inst(codegendata *cd, ptrint instr)
633 /* vmgen-0.6.2 generates gen_... calls with Inst ** as first
634 parameter, but we need to pass in cd to make lastmcodeptr
636 ptrint *lastmcodeptr = (ptrint *)cd->lastmcodeptr;
638 if (lastmcodeptr != NULL) {
641 assert((u1 *) lastmcodeptr < cd->mcodeptr &&
642 cd->mcodeptr < (u1 *) (lastmcodeptr + 40));
644 combo = peephole_opt(*lastmcodeptr, instr, peeptable);
647 *lastmcodeptr = combo;
653 /* actually generate the threaded code instruction */
655 *((Inst *) cd->mcodeptr) = (Inst) instr;
657 cd->lastmcodeptr = cd->mcodeptr;
658 cd->mcodeptr += sizeof(Inst);
662 void print_dynamic_super_statistics(void)
664 dolog("count_supers = %d", count_supers );
665 dolog("count_supers_unique = %d", count_supers_unique );
666 dolog("count_supers_reused = %d", count_supers_reused );
667 dolog("count_patchers_exec = %d", count_patchers_exec );
668 dolog("count_patchers_last = %d", count_patchers_last );
669 dolog("count_patchers_ins = %d", count_patchers_ins );
670 dolog("count_patchers = %d", count_patchers );
671 dolog("count_supers_nopatch= %d", count_supers_nopatch);
672 dolog("count_supers_patch = %d", count_supers_patch );
673 dolog("count_dispatches = %d", count_dispatches );
674 dolog("count_disp_nonreloc = %d", count_disp_nonreloc );
675 dolog("count_insts_reloc = %d", count_insts_reloc );
676 dolog("count_insts = %d", count_insts );
677 dolog("count_native_code = %d", count_native_code );
678 dolog("count_native_saved = %d", count_native_saved );
681 #if defined(ENABLE_DISASSEMBLER)
682 void disassemble_prim(int n)
684 PrimInfo *p = &(priminfos[n]);
685 u1 *start = vm_prim[n];
687 #if defined(ENABLE_JIT)
688 printf("%s (len=%d, restlen=%d):\n",prim_names[n],p->length, p->restlength);
689 disassemble(start, start + p->length + p->restlength);
692 #endif /* defined(ENABLE_DISASSEMBLER) */
694 void dynamic_super_init(void)
696 check_prims(vm_prim);
698 #if defined(ENABLE_DISASSEMBLER)
700 disassemble_prim(N_IADD);
701 disassemble_prim(N_ILOAD);
702 disassemble_prim(N_GETFIELD_INT);
706 hashtable_create(&hashtable_patchersupers, 1<<HASHTABLE_PATCHERSUPERS_BITS);
707 if (opt_no_replication)
708 hashtable_create(&hashtable_superreuse, 1<<HASHTABLE_SUPERREUSE_BITS);
710 #if defined(ENABLE_THREADS)
711 /* create patchersupers hashtable lock object */
713 lock_hashtable_patchersupers = NEW(java_objectheader);
714 lock_hashtable_superreuse = NEW(java_objectheader);
716 lock_init_object_lock(lock_hashtable_patchersupers);
717 lock_init_object_lock(lock_hashtable_superreuse);