1 /* src/vm/jit/intrp/dynamic-super.c dynamic superinstruction support
3 Copyright (C) 1995,1996,1997,1998,2000,2003,2004 Free Software Foundation, Inc.
6 Copyright (C) 1996-2005, 2006, 2007 R. Grafl, A. Krall, C. Kruegel,
7 C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring,
8 E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich,
9 J. Wenninger, Institut f. Computersprachen - TU Wien
11 This file is part of CACAO.
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2, or (at
16 your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
30 #define NO_IP 0 /* proper native code, without interpreter's ip */
40 #include "mm/memory.h"
42 #include "threads/lock-common.h"
44 #include "toolbox/hashtable.h"
45 #include "toolbox/logging.h"
47 #include "vm/jit/disass.h"
48 #include "vm/jit/intrp/intrp.h"
50 #include "vmcore/options.h"
53 s4 no_super=0; /* option: just use replication, but no dynamic superinsts */
55 static char MAYBE_UNUSED superend[]={
56 #include <java-superend.i>
59 const char * const prim_names[]={
60 #include <java-names.i>
64 #define INST_ADDR(_inst) N_##_inst
65 #include <java-labels.i>
72 Label start; /* NULL if not relocatable */
73 s4 length; /* only includes the jump iff superend is true*/
74 s4 restlength; /* length of the rest (i.e., the jump or (on superend) 0) */
75 char superend; /* true if primitive ends superinstruction, i.e.,
76 unconditional branch, execute, etc. */
79 s4 offset; /* offset of immarg within prim */
80 char rel; /* true if immarg is relative */
81 } immargs[MAX_IMMARGS];
89 typedef struct superstart {
90 struct superstart *next;
91 s4 patcherm; /* offset of patcher, -1 if super has no patcher */
92 u4 length; /* length of superinstruction */
93 u1 *oldsuper; /* reused superinstruction: NULL if there is none */
100 static hashtable hashtable_patchersupers;
101 #define HASHTABLE_PATCHERSUPERS_BITS 14
103 #if defined(ENABLE_THREADS)
104 static java_objectheader *lock_hashtable_patchersupers;
107 /* stuff for -no-replication */
108 typedef struct superreuse {
109 struct superreuse *next;
114 static hashtable hashtable_superreuse;
115 #define HASHTABLE_SUPERREUSE_BITS 14
117 #if defined(ENABLE_THREADS)
118 static java_objectheader *lock_hashtable_superreuse;
121 # define debugp(x...) if (opt_verbose) fprintf(x)
122 #define debugp1(x...)
124 /* statistics variables */
126 u4 count_supers = 0; /* dynamic superinstructions, including replicas */
127 u4 count_supers_unique = 0; /* dynamic superinstructions, without replicas */
128 u4 count_supers_reused = 0; /* reused dynamic superinstructions */
129 u4 count_patchers_exec = 0; /* executed patchers */
130 u4 count_patchers_last = 0; /* executed last patchers */
131 u4 count_patchers_ins = 0; /* patchers inserted in patchersupers table */
132 u4 count_patchers = 0; /* superstarts with patcherm!=-1 */
133 u4 count_supers_nopatch= 0; /* superinstructions for code without patchers */
134 u4 count_supers_patch = 0; /* superinstructions for code with patchers */
135 u4 count_dispatches = 0; /* dynamic superinstructions generated */
136 u4 count_disp_nonreloc = 0; /* */
137 u4 count_insts_reloc = 0; /* relocatable insts compiled (append_prim) */
138 u4 count_insts = 0; /* compiled insts (gen_inst) */
139 u4 count_native_code = 0; /* native code bytes */
140 u4 count_native_saved = 0; /* reclaimed native code */
142 /* determine priminfos */
145 java_objectheader *engine2(Inst *ip0, Cell * sp, Cell * fp);
147 static int compare_labels(const void *pa, const void *pb)
149 char *a = *(char **)pa;
150 char *b = *(char **)pb;
154 static Label bsearch_next(Label key, Label *a, u4 n)
155 /* a is sorted; return the label >=key that is the closest in a;
156 return NULL if there is no label in a >=key */
168 return bsearch_next(key, a+mid+1, n-mid-1);
170 return bsearch_next(key, a, mid+1);
174 each VM instruction <x> looks like this:
177 I_<x>: /* entry point */
179 J_<x>: /* start of dispatch */
180 NEXT_P1_5; /* dispatch except GOTO */
181 K_<x>: /* just before goto */
182 DO_GOTO; /* goto part of dispatch */
185 * We need I_<x> for threaded code: this is the code address stored in
186 * direct threaded code.
188 * We need the <skip> to detect non-relocatability (by comparing
189 * engine and engine2 with different skips). H_<x> is there to ensure
190 * that gcc does not think that <skip> is dead code.
192 * We need J_<x> to implement dynamic superinstructions: copy
193 * everything between I_<x> and J_<x> to get a VM instruction without
196 * At the end of a dynamic superinstruction we need a dispatch. We
197 * need the DO_GOTO to work around gcc bug 15242: we copy everything
198 * up to K_<x> and then copy the indirect jump from &&before_goto.
203 static void check_prims(Label symbols1[])
206 Label *symbols2, *symbols3, *js1, *ks1, *ks1sorted;
207 Label after_goto, before_goto2;
211 fprintf(stderr, "Compiled with gcc-" __VERSION__ "\n");
213 for (i=0; symbols1[i]!=0; i++)
220 symbols2=(Inst *)engine2(0,0,0);
222 symbols3=(Inst *)engine3(0,0,0);
228 before_goto = ks1[i+1]; /* after ks and after after_last */
229 after_goto = ks1[i+2];
230 before_goto2 = (ks1+(symbols2-symbols1))[i+1];
231 /* some gccs reorder the code; below we look for the next K label
232 with binary search, and whether it belongs to the current
233 instruction; prepare for this by sorting the K labels */
234 ks1sorted = (Label *)alloca((i+1)*sizeof(Label));
235 MCOPY(ks1sorted,ks1,Label,i+1);
236 qsort(ks1sorted, i+1, sizeof(Label), compare_labels);
238 /* check whether the "goto *" is relocatable */
239 goto_len = after_goto-before_goto;
240 debugp(stderr, "goto * %p %p len=%d\n",
241 before_goto,before_goto2,goto_len);
242 if (memcmp(before_goto, before_goto2, goto_len)!=0) { /* unequal */
243 opt_no_dynamic = true;
244 debugp(stderr," not relocatable, disabling dynamic code generation\n");
248 priminfos = calloc(i,sizeof(PrimInfo));
249 for (i=0; symbols1[i]!=0; i++) {
250 /* check whether the VM instruction i has the same code in
251 symbols1 and symbols2 (then it is relocatable). Also, for real
252 native code (not activated), check where the versions in
253 symbols1 and symbols 3 differ; these are places for
255 int prim_len = js1[i]-symbols1[i];
256 PrimInfo *pi=&priminfos[i];
258 char *s1 = (char *)symbols1[i];
259 char *s2 = (char *)symbols2[i];
260 char *s3 = (char *)symbols3[i];
261 Label endlabel = bsearch_next(s1+1,ks1sorted,npriminfos+1);
264 pi->superend = superend[i]|no_super;
265 pi->length = prim_len;
266 pi->restlength = endlabel - symbols1[i] - pi->length;
269 debugp(stderr, "%-15s %3d %p %p %p len=%3ld restlen=%2ld s-end=%1d",
270 prim_names[i], i, s1, s2, s3, (long)(pi->length), (long)(pi->restlength), pi->superend);
271 if (endlabel == NULL) {
272 pi->start = NULL; /* not relocatable */
273 if (pi->length<0) pi->length=1000;
274 debugp(stderr,"\n non_reloc: no K label > start found\n");
277 if (js1[i] > endlabel && !pi->superend) {
278 pi->start = NULL; /* not relocatable */
279 pi->length = endlabel-symbols1[i];
280 debugp(stderr,"\n non_reloc: there is a K label before the J label (restlength<0)\n");
283 if (js1[i] < pi->start && !pi->superend) {
284 pi->start = NULL; /* not relocatable */
285 pi->length = endlabel-symbols1[i];
286 debugp(stderr,"\n non_reloc: J label before I label (length<0)\n");
289 assert(pi->length>=0);
290 assert(pi->restlength >=0);
291 while (j<(pi->length+pi->restlength)) {
293 if (s1[j] != s2[j]) {
294 pi->start = NULL; /* not relocatable */
295 debugp(stderr,"\n non_reloc: engine1!=engine2 offset %3d",j);
296 /* assert(j<prim_len); */
301 /* only used for NO_IP native code generation */
302 /* here we find differences in inline arguments */
311 static bool is_relocatable(ptrint p)
313 return !opt_no_dynamic && priminfos[p].start != NULL;
317 void append_prim(codegendata *cd, ptrint p)
319 PrimInfo *pi = &priminfos[p];
320 debugp1(stderr,"append_prim %p %s\n",cd->lastmcodeptr, prim_names[p]);
321 if (cd->ncodeptr + pi->length + pi->restlength + goto_len >
322 cd->ncodebase + cd->ncodesize) {
323 cd->ncodeptr = codegen_ncode_increase(cd, cd->ncodeptr);
325 memcpy(cd->ncodeptr, pi->start, pi->length);
326 cd->ncodeptr += pi->length;
330 static void init_dynamic_super(codegendata *cd)
332 cd->lastpatcheroffset = -1;
333 cd->dynsuperm = -1; /* the next VM instr may be non-relocatable */
334 cd->dynsupern = cd->ncodeptr - cd->ncodebase;
338 /******************* -no-replication stuff **********************/
340 /* bugs: superinstructions are inserted into the table only after the
341 end of a method, so there may be replication within a method even
342 with -no-replication. Moreover, there is a race condition that can
343 cause varying amounts of replication between different threads;
344 this could only be avoided by eliminating the bug above and having
347 static u4 hash_superreuse(u1 *code, u4 length)
348 /* calculates a hash value for given code length */
353 for (i=0; i<(length&(~3)); i+=sizeof(u4)) {
354 r += *(s4 *)(code+i); /* !! align each superinstruction */
356 return (r+(r>>HASHTABLE_SUPERREUSE_BITS))&((1<<HASHTABLE_SUPERREUSE_BITS)-1);
359 static void superreuse_insert(u1 *code, u4 length)
361 u4 slot = hash_superreuse(code, length);
362 superreuse **listp = (superreuse **)&hashtable_superreuse.ptr[slot];
363 superreuse *sr = NEW(superreuse);
367 LOCK_MONITOR_ENTER(lock_hashtable_superreuse);
372 LOCK_MONITOR_EXIT(lock_hashtable_superreuse);
374 count_supers_unique++;
377 static u1 *superreuse_lookup(u1 *code, u4 length)
378 /* returns earlier instances code address, or NULL */
380 u4 slot = hash_superreuse(code, length);
381 superreuse *sr = (superreuse *)hashtable_superreuse.ptr[slot];
382 /* since there is another race condition anyway, we don't bother
383 locking here; both race conditions don't affect the correctness */
384 for (; sr != NULL; sr = sr->next)
385 if (length == sr->length && memcmp(code, sr->code, length)==0)
390 /******************* patcher stuff *******************************/
392 void patchersuper_rewrite(Inst *p)
393 /* p is the address of a patcher; if this is the last patcher in
394 a dynamic superinstruction, rewrite the threaded code to use
395 the dynamic superinstruction; the data for that comes from
396 hashtable_patchersupers */
398 ptrint key = (ptrint)p;
399 u4 slot = ((key + (key>>HASHTABLE_PATCHERSUPERS_BITS)) &
400 ((1<<HASHTABLE_PATCHERSUPERS_BITS)-1));
401 superstart **listp = (superstart **)&hashtable_patchersupers.ptr[slot];
403 count_patchers_exec++;
405 LOCK_MONITOR_ENTER(lock_hashtable_patchersupers);
407 for (; ss=*listp, ss!=NULL; listp = &(ss->next)) {
408 if (p == ((Inst *)(ss->mcodebase + ss->patcherm))) {
410 if (ss->oldsuper != NULL)
411 target = (Inst)ss->oldsuper;
413 target = (Inst)(ss->ncodebase + ss->dynsupern);
414 *(Inst *)(ss->mcodebase + ss->dynsuperm) = target;
415 debugp1(stderr, "patcher rewrote %p into %p\n",
416 ss->mcodebase + ss->dynsuperm, target);
418 FREE(ss, superstart);
419 count_patchers_last++;
424 LOCK_MONITOR_EXIT(lock_hashtable_patchersupers);
427 static void hashtable_patchersupers_insert(superstart *ss)
429 ptrint key = (ptrint)(ss->mcodebase + ss->patcherm);
430 u4 slot = ((key + (key>>HASHTABLE_PATCHERSUPERS_BITS)) &
431 ((1<<HASHTABLE_PATCHERSUPERS_BITS)-1));
432 void **listp = &hashtable_patchersupers.ptr[slot];
434 LOCK_MONITOR_ENTER(lock_hashtable_patchersupers);
436 ss->next = (superstart *)*listp;
439 LOCK_MONITOR_EXIT(lock_hashtable_patchersupers);
441 count_patchers_ins++;
444 void dynamic_super_rewrite(codegendata *cd)
445 /* rewrite the threaded code in the superstarts list to point to
446 the dynamic superinstructions */
448 superstart *ss = cd->superstarts;
450 for (; ss != NULL; ss = ssnext) {
452 if (opt_no_replication && ss->oldsuper == NULL)
453 superreuse_insert(cd->ncodebase + ss->dynsupern, ss->length);
454 if (ss->patcherm == -1) {
455 assert(ss->oldsuper == NULL);
456 *(Inst *)(cd->mcodebase + ss->dynsuperm) =
457 (Inst)(cd->ncodebase + ss->dynsupern);
458 debugp1(stderr, "rewrote %p into %p\n",
459 cd->mcodebase + ss->dynsuperm, cd->ncodebase + ss->dynsupern);
460 count_supers_nopatch++;
462 /* fprintf(stderr,"%p stage2\n", ss); */
463 ss->mcodebase = cd->mcodebase;
464 ss->ncodebase = cd->ncodebase;
465 hashtable_patchersupers_insert(ss);
466 count_supers_patch++;
471 static void new_dynamic_super(codegendata *cd)
472 /* add latest dynamic superinstruction to the appropriate table
473 for rewriting the threaded code either on relocation (if there
474 is no patcher), or when the last patcher is executed (if there
479 u1 *code = cd->ncodebase + cd->dynsupern;
480 u4 length = cd->ncodeptr - code;
482 if (opt_no_replication) {
483 oldsuper = superreuse_lookup(code, length);
484 if (oldsuper != NULL) {
485 count_supers_reused++;
486 count_native_saved += length;
488 if (cd->lastpatcheroffset == -1) {
489 *(Inst *)(cd->mcodebase + cd->dynsuperm) = (Inst)oldsuper;
490 debugp1(stderr, "rewrote %p into %p (reused)\n",
491 cd->mcodebase + ss->dynsuperm, oldsuper);
496 if (cd->lastpatcheroffset == -1) {
497 ss = DNEW(superstart);
499 ss = NEW(superstart);
501 /* fprintf(stderr,"%p stage1\n", ss); */
503 ss->patcherm = cd->lastpatcheroffset;
504 ss->oldsuper = oldsuper;
506 ss->dynsuperm = cd->dynsuperm;
507 ss->dynsupern = cd->dynsupern;
508 ss->next = cd->superstarts;
509 cd->superstarts = ss;
510 count_native_code += cd->ncodeptr - code;
513 void append_dispatch(codegendata *cd)
515 debugp1(stderr,"append_dispatch %p\n",cd->lastmcodeptr);
516 if (cd->lastinstwithoutdispatch != ~0) {
517 PrimInfo *pi = &priminfos[cd->lastinstwithoutdispatch];
519 memcpy(cd->ncodeptr, pi->start + pi->length, pi->restlength);
520 cd->ncodeptr += pi->restlength;
521 memcpy(cd->ncodeptr, before_goto, goto_len);
522 cd->ncodeptr += goto_len;
523 cd->lastinstwithoutdispatch = ~0;
524 new_dynamic_super(cd);
527 init_dynamic_super(cd);
530 static void compile_prim_dyn(codegendata *cd, ptrint p)
531 /* compile prim #p dynamically (mod flags etc.) */
535 assert(p<npriminfos);
536 if (!is_relocatable(p)) {
537 if (cd->lastinstwithoutdispatch != ~0) /* only count real dispatches */
538 count_disp_nonreloc++;
539 append_dispatch(cd); /* to previous dynamic superinstruction */
542 if (cd->dynsuperm == -1)
543 cd->dynsuperm = cd->lastmcodeptr - cd->mcodebase;
545 cd->lastinstwithoutdispatch = p;
546 if (priminfos[p].superend)
551 static void replace_patcher(codegendata *cd, ptrint p)
552 /* compile p dynamically, and note that there is a patcher here */
554 if (opt_no_quicksuper) {
557 cd->lastpatcheroffset = cd->lastmcodeptr - cd->mcodebase;
558 compile_prim_dyn(cd, p);
562 void gen_inst1(codegendata *cd, ptrint instr)
564 /* actually generate the threaded code instruction */
566 debugp1(stderr, "gen_inst %p, %s\n", cd->lastmcodeptr, prim_names[instr]);
567 *((Inst *) cd->lastmcodeptr) = vm_prim[instr];
569 case N_PATCHER_ACONST: replace_patcher(cd, N_ACONST1); break;
570 case N_PATCHER_ARRAYCHECKCAST: replace_patcher(cd, N_ARRAYCHECKCAST); break;
571 case N_PATCHER_GETSTATIC_INT: replace_patcher(cd, N_GETSTATIC_INT); break;
572 case N_PATCHER_GETSTATIC_LONG: replace_patcher(cd, N_GETSTATIC_LONG); break;
573 case N_PATCHER_GETSTATIC_CELL: replace_patcher(cd, N_GETSTATIC_CELL); break;
574 case N_PATCHER_PUTSTATIC_INT: replace_patcher(cd, N_PUTSTATIC_INT); break;
575 case N_PATCHER_PUTSTATIC_LONG: replace_patcher(cd, N_PUTSTATIC_LONG); break;
576 case N_PATCHER_PUTSTATIC_CELL: replace_patcher(cd, N_PUTSTATIC_CELL); break;
577 case N_PATCHER_GETFIELD_INT: replace_patcher(cd, N_GETFIELD_INT); break;
578 case N_PATCHER_GETFIELD_LONG: replace_patcher(cd, N_GETFIELD_LONG); break;
579 case N_PATCHER_GETFIELD_CELL: replace_patcher(cd, N_GETFIELD_CELL); break;
580 case N_PATCHER_PUTFIELD_INT: replace_patcher(cd, N_PUTFIELD_INT); break;
581 case N_PATCHER_PUTFIELD_LONG: replace_patcher(cd, N_PUTFIELD_LONG); break;
582 case N_PATCHER_PUTFIELD_CELL: replace_patcher(cd, N_PUTFIELD_CELL); break;
583 case N_PATCHER_MULTIANEWARRAY: replace_patcher(cd, N_MULTIANEWARRAY); break;
584 case N_PATCHER_INVOKESTATIC: replace_patcher(cd, N_INVOKESTATIC); break;
585 case N_PATCHER_INVOKESPECIAL: replace_patcher(cd, N_INVOKESPECIAL); break;
586 case N_PATCHER_INVOKEVIRTUAL: replace_patcher(cd, N_INVOKEVIRTUAL); break;
587 case N_PATCHER_INVOKEINTERFACE:replace_patcher(cd, N_INVOKEINTERFACE); break;
588 case N_PATCHER_CHECKCAST: replace_patcher(cd, N_CHECKCAST); break;
589 case N_PATCHER_INSTANCEOF: replace_patcher(cd, N_INSTANCEOF); break;
590 case N_PATCHER_NATIVECALL:
592 replace_patcher(cd, N_TRACENATIVECALL);
594 replace_patcher(cd, N_NATIVECALL);
596 case N_TRANSLATE: break;
599 compile_prim_dyn(cd, instr);
605 void finish_ss(codegendata *cd)
606 /* conclude the last static super and compile it dynamically */
609 if (cd->lastmcodeptr != NULL) {
610 gen_inst1(cd, *(ptrint *)(cd->lastmcodeptr));
611 cd->lastmcodeptr = NULL;
617 void gen_inst(codegendata *cd, ptrint instr)
619 cd->lastmcodeptr = cd->mcodeptr;
620 gen_inst1(cd, instr);
621 cd->mcodeptr += sizeof(Inst);
622 cd->lastmcodeptr = NULL;
625 void gen_inst(codegendata *cd, ptrint instr)
627 /* vmgen-0.6.2 generates gen_... calls with Inst ** as first
628 parameter, but we need to pass in cd to make lastmcodeptr
630 ptrint *lastmcodeptr = (ptrint *)cd->lastmcodeptr;
632 if (lastmcodeptr != NULL) {
635 assert((u1 *) lastmcodeptr < cd->mcodeptr &&
636 cd->mcodeptr < (u1 *) (lastmcodeptr + 40));
638 combo = peephole_opt(*lastmcodeptr, instr, peeptable);
641 *lastmcodeptr = combo;
647 /* actually generate the threaded code instruction */
649 *((Inst *) cd->mcodeptr) = (Inst) instr;
651 cd->lastmcodeptr = cd->mcodeptr;
652 cd->mcodeptr += sizeof(Inst);
656 void print_dynamic_super_statistics(void)
658 dolog("count_supers = %d", count_supers );
659 dolog("count_supers_unique = %d", count_supers_unique );
660 dolog("count_supers_reused = %d", count_supers_reused );
661 dolog("count_patchers_exec = %d", count_patchers_exec );
662 dolog("count_patchers_last = %d", count_patchers_last );
663 dolog("count_patchers_ins = %d", count_patchers_ins );
664 dolog("count_patchers = %d", count_patchers );
665 dolog("count_supers_nopatch= %d", count_supers_nopatch);
666 dolog("count_supers_patch = %d", count_supers_patch );
667 dolog("count_dispatches = %d", count_dispatches );
668 dolog("count_disp_nonreloc = %d", count_disp_nonreloc );
669 dolog("count_insts_reloc = %d", count_insts_reloc );
670 dolog("count_insts = %d", count_insts );
671 dolog("count_native_code = %d", count_native_code );
672 dolog("count_native_saved = %d", count_native_saved );
675 #if defined(ENABLE_DISASSEMBLER)
676 void disassemble_prim(int n)
678 PrimInfo *p = &(priminfos[n]);
679 u1 *start = vm_prim[n];
681 #if defined(ENABLE_JIT)
682 printf("%s (len=%d, restlen=%d):\n",prim_names[n],p->length, p->restlength);
683 disassemble(start, start + p->length + p->restlength);
686 #endif /* defined(ENABLE_DISASSEMBLER) */
688 void dynamic_super_init(void)
690 check_prims(vm_prim);
692 #if defined(ENABLE_DISASSEMBLER)
694 disassemble_prim(N_IADD);
695 disassemble_prim(N_ILOAD);
696 disassemble_prim(N_GETFIELD_INT);
700 hashtable_create(&hashtable_patchersupers, 1<<HASHTABLE_PATCHERSUPERS_BITS);
701 if (opt_no_replication)
702 hashtable_create(&hashtable_superreuse, 1<<HASHTABLE_SUPERREUSE_BITS);
704 #if defined(ENABLE_THREADS)
705 /* create patchersupers hashtable lock object */
707 lock_hashtable_patchersupers = NEW(java_objectheader);
708 lock_hashtable_superreuse = NEW(java_objectheader);
710 lock_init_object_lock(lock_hashtable_patchersupers);
711 lock_init_object_lock(lock_hashtable_superreuse);