1 /* src/vm/jit/verify/typecheck.c - typechecking (part of bytecode verification)
3 Copyright (C) 1996-2005, 2006, 2007, 2008
4 CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
6 This file is part of CACAO.
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2, or (at
11 your option) any later version.
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 What's the purpose of the `typechecker`?
28 ----------------------------------------
30 The typechecker analyses (the intermediate repr. of) the bytecode of
31 each method and ensures that for each instruction the values on the
32 stack and in local variables are of the correct type whenever the
33 instruction is executed.
35 type checking is a mandatory part of bytecode verification.
38 How does the typechecker work?
39 ------------------------------
41 The JVM stack and the local variables are not statically typed, so the
42 typechecker has to *infer* the static types of stack slots and local
43 variables at each point of the method. The JVM spec imposes a lot of
44 restrictions on the bytecode in order to guarantee that this is always
47 Basically the typechecker solves the data flow equations of the method.
48 This is done in the usual way for a forward data flow analysis: Starting
49 from the entry point of the method the typechecker follows the CFG and
50 records the type of each stack slot and local variable at each point[1].
51 When two or more control flow paths merge at a point, the union of the
52 types for each slot/variable is taken. The algorithm continues to follow
53 all possible paths[2] until the recorded types do not change anymore (ie.
54 the equations have been solved).
56 If the solution has been reached and the resulting types are valid for
57 all instructions, then type checking terminates with success, otherwise
58 an exception is thrown.
61 Why is this code so damn complicated?
62 -------------------------------------
64 Short answer: The devil's in the details.
66 While the basic operation of the typechecker is no big deal, there are
67 many properties of Java bytecode which make type checking hard. Some of
68 them are not even addressed in the JVM spec. Some problems and their
71 *) Finding a good representation of the union of two reference types is
72 difficult because of multiple inheritance of interfaces.
74 Solution: The typeinfo system can represent such "merged" types by a
75 list of proper subclasses of a class. Example:
77 typeclass=java.lang.Object merged={ InterfaceA, InterfaceB }
79 represents the result of merging two interface types "InterfaceA"
82 *) When the code of a method is verified, there may still be unresolved
83 references to classes/methods/fields in the code, which we may not force
84 to be resolved eagerly. (A similar problem arises because of the special
85 checks for protected members.)
87 Solution: The typeinfo system knows how to deal with unresolved
88 class references. Whenever a check has to be performed for an
89 unresolved type, the type is annotated with constraints representing
90 the check. Later, when the type is resolved, the constraints are
91 checked. (See the constrain_unresolved_... and the resolve_...
94 *) Checks for uninitialized object instances are hard because after the
95 invocation of <init> on an uninitialized object *all* slots/variables
96 referring to this object (and exactly those slots/variables) must be
97 marked as initialized.
99 Solution: The JVM spec describes a solution, which has been
100 implemented in this typechecker.
102 Note that some checks mentioned in the JVM spec are unnecessary[4] and
103 not performed by either the reference implementation, or this implementation.
108 [1] Actually only the types of slots/variables at the start of each
109 basic block are remembered. Within a basic block the algorithm only keeps
110 the types of the slots/variables for the "current" instruction which is
113 [2] Actually the algorithm iterates through the basic block list until
114 there are no more changes. Theoretically it would be wise to sort the
115 basic blocks topologically beforehand, but the number of average/max
116 iterations observed is so low, that this was not deemed necessary.
118 [3] This is similar to a method proposed by: Alessandro Coglio et al., A
119 Formal Specification of Java Class Loading, Technical Report, Kestrel
120 Institute April 2000, revised July 2000
121 http://www.kestrel.edu/home/people/coglio/loading.pdf
122 An important difference is that Coglio's subtype constraints are checked
123 after loading, while our constraints are checked when the field/method
124 is accessed for the first time, so we can guarantee lexically correct
127 [4] Alessandro Coglio
128 Improving the official specification of Java bytecode verification
129 Proceedings of the 3rd ECOOP Workshop on Formal Techniques for Java Programs
131 citeseer.ist.psu.edu/article/coglio03improving.html
140 #include "vm/types.h"
142 #ifdef ENABLE_VERIFIER
144 #include "mm/memory.hpp"
146 #include "native/native.hpp"
148 #include "toolbox/logging.hpp"
150 #include "vm/access.hpp"
151 #include "vm/array.hpp"
152 #include "vm/jit/builtin.hpp"
153 #include "vm/exceptions.hpp"
154 #include "vm/global.h"
155 #include "vm/globals.hpp"
156 #include "vm/loader.hpp"
157 #include "vm/options.h"
158 #include "vm/primitive.hpp"
159 #include "vm/resolve.hpp"
161 #include "vm/jit/jit.hpp"
162 #include "vm/jit/parse.hpp"
163 #include "vm/jit/show.hpp"
165 #include "vm/jit/verify/typecheck-common.hpp"
167 #if defined(__cplusplus)
171 /****************************************************************************/
172 /* MACROS FOR VARIABLE TYPE CHECKING */
173 /****************************************************************************/
175 #define TYPECHECK_CHECK_TYPE(i,tp,msg) \
177 if (VAR(i)->type != (tp)) { \
178 exceptions_throw_verifyerror(state->m, (msg)); \
183 #define TYPECHECK_INT(i) \
184 TYPECHECK_CHECK_TYPE(i,TYPE_INT,"Expected to find integer value")
185 #define TYPECHECK_LNG(i) \
186 TYPECHECK_CHECK_TYPE(i,TYPE_LNG,"Expected to find long value")
187 #define TYPECHECK_FLT(i) \
188 TYPECHECK_CHECK_TYPE(i,TYPE_FLT,"Expected to find float value")
189 #define TYPECHECK_DBL(i) \
190 TYPECHECK_CHECK_TYPE(i,TYPE_DBL,"Expected to find double value")
191 #define TYPECHECK_ADR(i) \
192 TYPECHECK_CHECK_TYPE(i,TYPE_ADR,"Expected to find object value")
194 #define TYPECHECK_INT_OP(o) TYPECHECK_INT((o).varindex)
195 #define TYPECHECK_LNG_OP(o) TYPECHECK_LNG((o).varindex)
196 #define TYPECHECK_FLT_OP(o) TYPECHECK_FLT((o).varindex)
197 #define TYPECHECK_DBL_OP(o) TYPECHECK_DBL((o).varindex)
198 #define TYPECHECK_ADR_OP(o) TYPECHECK_ADR((o).varindex)
201 /* typestate_save_invars *******************************************************
203 Save the invars of the current basic block in the space reserved by
206 This function must be called before an instruction modifies a variable
207 that is an invar of the current block. In such cases the invars of the
208 block must be saved, and restored at the end of the analysis of this
209 basic block, so that the invars again reflect the *input* to this basic
210 block (and do not randomly contain types that appear within the block).
213 state............current state of the verifier
215 *******************************************************************************/
218 typestate_save_invars(verifier_state *state)
223 LOG("saving invars");
225 if (!state->savedindices) {
226 LOG("allocating savedindices buffer");
227 pindex = (s4*) DumpMemory::allocate(sizeof(s4) * state->m->maxstack);
228 state->savedindices = pindex;
229 index = state->numlocals + VERIFIER_EXTRA_VARS;
230 for (i=0; i<state->m->maxstack; ++i)
236 typecheck_copy_types(state, state->bptr->invars, state->savedindices,
237 state->bptr->indepth);
239 /* set the invars of the block to the saved variables */
240 /* and remember the original invars */
242 state->savedinvars = state->bptr->invars;
243 state->bptr->invars = state->savedindices;
247 /* typestate_restore_invars ***************************************************
249 Restore the invars of the current basic block that have been previously
250 saved by `typestate_save_invars`.
253 state............current state of the verifier
255 *******************************************************************************/
258 typestate_restore_invars(verifier_state *state)
260 TYPECHECK_COUNT(stat_savedstack);
261 LOG("restoring saved invars");
263 /* restore the invars pointer */
265 state->bptr->invars = state->savedinvars;
267 /* copy the types back */
269 typecheck_copy_types(state, state->savedindices, state->bptr->invars,
270 state->bptr->indepth);
272 /* mark that there are no saved invars currently */
274 state->savedinvars = NULL;
278 /* handle_fieldaccess **********************************************************
280 Verify an ICMD_{GET,PUT}{STATIC,FIELD}(CONST)?
283 state............the current state of the verifier
286 true.............successful verification,
287 false............an exception has been thrown.
289 *******************************************************************************/
292 handle_fieldaccess(verifier_state *state,
300 #define TYPECHECK_VARIABLESBASED
301 #define EXCEPTION do { return false; } while (0)
302 #define VERIFY_ERROR(msg) TYPECHECK_VERIFYERROR_bool(msg)
303 #include <typecheck-fields.inc>
306 #undef TYPECHECK_VARIABLESBASED
312 /* handle_invocation ***********************************************************
314 Verify an ICMD_INVOKE* instruction.
317 state............the current state of the verifier
320 true.............successful verification,
321 false............an exception has been thrown.
323 *******************************************************************************/
326 handle_invocation(verifier_state *state)
329 varinfo *dv; /* output variable of current instruction */
332 dv = VAROP(state->iptr->dst);
334 #define TYPECHECK_VARIABLESBASED
335 #define OP1 VAR(state->iptr->sx.s23.s2.args[0])
336 #include <typecheck-invoke.inc>
338 #undef TYPECHECK_VARIABLESBASED
344 /* handle_builtin **************************************************************
346 Verify the call of a builtin method.
349 state............the current state of the verifier
352 true.............successful verification,
353 false............an exception has been thrown.
355 *******************************************************************************/
358 handle_builtin(verifier_state *state)
361 varinfo *dv; /* output variable of current instruction */
364 dv = VAROP(state->iptr->dst);
366 #define TYPECHECK_VARIABLESBASED
367 #define OP1 state->iptr->sx.s23.s2.args[0]
368 #include <typecheck-builtins.inc>
370 #undef TYPECHECK_VARIABLESBASED
375 /* handle_multianewarray *******************************************************
377 Verify a MULTIANEWARRAY instruction.
380 state............the current state of the verifier
383 true.............successful verification,
384 false............an exception has been thrown.
386 *******************************************************************************/
389 handle_multianewarray(verifier_state *state)
392 varinfo *dv; /* output variable of current instruction */
395 dv = VAROP(state->iptr->dst);
397 #define TYPECHECK_VARIABLESBASED
398 #define VERIFY_ERROR(msg) TYPECHECK_VERIFYERROR_bool(msg)
399 #include <typecheck-multianewarray.inc>
401 #undef TYPECHECK_VARIABLESBASED
406 /* typecheck_invalidate_locals *************************************************
408 Invalidate locals that are overwritten by writing to the given local.
411 state............the current state of the verifier
412 index............the index of the local that is written
413 twoword..........true, if a two-word type is written
415 *******************************************************************************/
417 static void typecheck_invalidate_locals(verifier_state *state, s4 index, bool twoword)
422 jitdata *jd = state->jd;
423 s4 *localmap = jd->local_map;
424 varinfo *vars = jd->var;
426 javaindex = jd->reverselocalmap[index];
428 /* invalidate locals of two-word type at index javaindex-1 */
431 localmap += 5 * (javaindex-1);
432 for (t=0; t<5; ++t) {
433 varindex = *localmap++;
434 if (varindex >= 0 && IS_2_WORD_TYPE(vars[varindex].type)) {
435 LOG1("invalidate local %d", varindex);
436 vars[varindex].type = TYPE_VOID;
441 localmap += 5 * javaindex;
444 /* invalidate locals at index javaindex */
446 for (t=0; t<5; ++t) {
447 varindex = *localmap++;
449 LOG1("invalidate local %d", varindex);
450 vars[varindex].type = TYPE_VOID;
454 /* if a two-word type is written, invalidate locals at index javaindex+1 */
457 for (t=0; t<5; ++t) {
458 varindex = *localmap++;
460 LOG1("invalidate local %d", varindex);
461 vars[varindex].type = TYPE_VOID;
468 /* macros used by the generated code ******************************************/
470 #define EXCEPTION do { return false; } while (0)
471 #define VERIFY_ERROR(msg) TYPECHECK_VERIFYERROR_bool(msg)
473 #define CHECK_LOCAL_TYPE(index, t) \
475 if (!typevector_checktype(jd->var, (index), (t))) \
476 VERIFY_ERROR("Local variable type mismatch"); \
479 #define STORE_LOCAL(t, index) \
482 typecheck_invalidate_locals(state, (index), false); \
483 typevector_store(jd->var, (index), (temp_t), NULL); \
486 #define STORE_LOCAL_2_WORD(t, index) \
489 typecheck_invalidate_locals(state, (index), true); \
490 typevector_store(jd->var, (index), (temp_t), NULL); \
493 #define REACH_BLOCK(target) \
495 if (!typestate_reach(state, (target), \
496 state->bptr->outvars, jd->var, \
497 state->bptr->outdepth)) \
501 #define REACH(target) REACH_BLOCK((target).block)
504 /* handle_basic_block **********************************************************
506 Perform bytecode verification of a basic block.
509 state............the current state of the verifier
512 true.............successful verification,
513 false............an exception has been thrown.
515 *******************************************************************************/
518 handle_basic_block(verifier_state *state)
520 int opcode; /* current opcode */
521 int len; /* for counting instructions, etc. */
522 bool superblockend; /* true if no fallthrough to next block */
523 instruction *iptr; /* the current instruction */
524 basicblock *tbptr; /* temporary for target block */
525 bool maythrow; /* true if this instruction may throw */
528 branch_target_t *table;
529 lookup_target_t *lookup;
530 jitdata *jd = state->jd;
532 varinfo constvalue; /* for PUT*CONST */
533 constant_FMIref *fieldref;
535 LOGSTR1("\n---- BLOCK %04d ------------------------------------------------\n",state->bptr->nr);
538 superblockend = false;
539 state->bptr->flags = BBFINISHED;
541 /* prevent compiler warnings */
544 /* determine the active exception handlers for this block */
545 /* XXX could use a faster algorithm with sorted lists or */
548 for (ex = state->jd->exceptiontable; ex ; ex = ex->down) {
549 if ((ex->start->nr <= state->bptr->nr) && (ex->end->nr > state->bptr->nr)) {
550 LOG1("active handler L%03d", ex->handler->nr);
551 state->handlers[len++] = ex;
554 state->handlers[len] = NULL;
556 /* init variable types at the start of this block */
557 typevector_copy_inplace(state->bptr->inlocals, jd->var, state->numlocals);
559 DOLOG(show_basicblock(jd, state->bptr, SHOW_STACK));
560 DOLOG(typecheck_print_vararray(stdout, jd, state->bptr->invars,
561 state->bptr->indepth));
562 DOLOG(typevector_print(stdout, jd->var, state->numlocals));
565 /* loop over the instructions */
566 len = state->bptr->icount;
567 state->iptr = state->bptr->iinstr;
569 TYPECHECK_COUNT(stat_ins);
573 DOLOG(typevector_print(stdout, jd->var, state->numlocals));
575 DOLOG(show_icmd(jd, state->iptr, false, SHOW_STACK)); LOGNL; LOGFLUSH;
582 /* include generated code for ICMDs verification */
584 #define TYPECHECK_VARIABLESBASED
586 #define METHOD (state->m)
588 #define BPTR (state->bptr)
589 #include <typecheck-variablesbased-gen.inc>
594 #undef TYPECHECK_VARIABLESBASED
597 LOG1("ICMD %d\n", opcode);
598 TYPECHECK_VERIFYERROR_bool("Missing ICMD code during typecheck");
601 /* reach exception handlers for this instruction */
604 TYPECHECK_COUNT(stat_ins_maythrow);
605 TYPECHECK_MARK(state->stat_maythrow);
606 LOG("reaching exception handlers");
608 while (state->handlers[i]) {
609 TYPECHECK_COUNT(stat_handlers_reached);
610 if (state->handlers[i]->catchtype.any)
611 VAR(state->exinvars)->typeinfo.typeclass = state->handlers[i]->catchtype;
613 VAR(state->exinvars)->typeinfo.typeclass.cls = class_java_lang_Throwable;
614 if (!typestate_reach(state,
615 state->handlers[i]->handler,
616 &(state->exinvars), jd->var, 1))
622 LOG("\t\tnext instruction");
624 } /* while instructions */
626 LOG("instructions done");
628 DOLOG(typecheck_print_vararray(stdout, jd, state->bptr->outvars,
629 state->bptr->outdepth));
630 DOLOG(typevector_print(stdout, jd->var, state->numlocals));
633 /* propagate stack and variables to the following block */
634 if (!superblockend) {
635 LOG("reaching following block");
636 tbptr = state->bptr->next;
637 while (tbptr->flags == BBDELETED) {
639 #ifdef TYPECHECK_DEBUG
640 /* this must be checked in parse.c */
641 if ((tbptr->nr) >= state->basicblockcount)
642 TYPECHECK_VERIFYERROR_bool("Control flow falls off the last block");
645 if (!typestate_reach(state,tbptr,state->bptr->outvars, jd->var,
646 state->bptr->outdepth))
650 /* We may have to restore the types of the instack slots. They
651 * have been saved if an <init> call inside the block has
652 * modified the instack types. (see INVOKESPECIAL) */
654 if (state->savedinvars)
655 typestate_restore_invars(state);
661 /****************************************************************************/
663 /* This is the main function of the bytecode verifier. It is called */
664 /* directly after analyse_stack. */
667 /* meth.............the method to verify */
668 /* cdata............codegendata for the method */
669 /* rdata............registerdata for the method */
672 /* true.............successful verification */
673 /* false............an exception has been thrown */
675 /****************************************************************************/
677 #define MAXPARAMS 255
679 bool typecheck(jitdata *jd)
683 varinfo *savedlocals;
684 verifier_state state; /* current state of the verifier */
686 /* collect statistics */
688 #ifdef TYPECHECK_STATISTICS
689 int count_iterations = 0;
690 TYPECHECK_COUNT(stat_typechecked);
691 TYPECHECK_COUNT_FREQ(stat_locals,jd->maxlocals,STAT_LOCALS);
692 TYPECHECK_COUNT_FREQ(stat_blocks,cdata->method->basicblockcount/10,STAT_BLOCKS);
693 TYPECHECK_COUNTIF(cdata->method->exceptiontablelength != 0,stat_methods_with_handlers);
694 state.stat_maythrow = false;
697 /* get required compiler data */
702 /* some logging on entry */
705 LOGSTR("\n==============================================================================\n");
706 DOLOG( show_method(jd, SHOW_STACK) );
707 LOGSTR("\n==============================================================================\n");
708 LOGMETHOD("Entering typecheck: ",cd->method);
710 /* initialize the verifier state */
715 state.basicblockcount = jd->basicblockcount;
716 state.basicblocks = jd->basicblocks;
717 state.savedindices = NULL;
718 state.savedinvars = NULL;
720 /* check that the basicblock numbers are valid */
723 jit_check_basicblock_numbers(jd);
726 /* check if this method is an instance initializer method */
728 state.initmethod = (state.m->name == utf_init);
730 /* initialize the basic block flags for the following CFG traversal */
732 typecheck_init_flags(&state, BBFINISHED);
734 /* number of local variables */
736 /* In <init> methods we use an extra local variable to indicate whether */
737 /* the 'this' reference has been initialized. */
738 /* TYPE_VOID...means 'this' has not been initialized, */
739 /* TYPE_INT....means 'this' has been initialized. */
741 state.numlocals = state.jd->localcount;
742 state.validlocals = state.numlocals;
743 if (state.initmethod)
744 state.numlocals++; /* VERIFIER_EXTRA_LOCALS */
749 LOG("reverselocalmap:");
750 for (i=0; i<state.validlocals; ++i) {
751 LOG2(" %i => javaindex %i", i, jd->reverselocalmap[i]);
754 /* allocate the buffer of active exception handlers */
756 state.handlers = (exception_entry**) DumpMemory::allocate(sizeof(exception_entry*) * (state.jd->exceptiontablelength + 1));
758 /* save local variables */
760 savedlocals = (varinfo*) DumpMemory::allocate(sizeof(varinfo) * state.numlocals);
761 MCOPY(savedlocals, jd->var, varinfo, state.numlocals);
763 /* initialized local variables of first block */
765 if (!typecheck_init_locals(&state, true))
768 /* initialize invars of exception handlers */
770 state.exinvars = state.numlocals;
771 VAR(state.exinvars)->type = TYPE_ADR;
772 typeinfo_init_classinfo(&(VAR(state.exinvars)->typeinfo),
773 class_java_lang_Throwable); /* changed later */
775 LOG("Exception handler stacks set.\n");
777 /* loop while there are still blocks to be checked */
779 TYPECHECK_COUNT(count_iterations);
781 state.repeat = false;
783 state.bptr = state.basicblocks;
785 for (; state.bptr; state.bptr = state.bptr->next) {
786 LOGSTR1("---- BLOCK %04d, ",state.bptr->nr);
787 LOGSTR1("blockflags: %d\n",state.bptr->flags);
790 /* verify reached block */
791 if (state.bptr->flags == BBTYPECHECK_REACHED) {
792 if (!handle_basic_block(&state))
797 LOGIF(state.repeat,"state.repeat == true");
798 } while (state.repeat);
802 #ifdef TYPECHECK_STATISTICS
803 LOG1("Typechecker did %4d iterations",count_iterations);
804 TYPECHECK_COUNT_FREQ(stat_iterations,count_iterations,STAT_ITERATIONS);
805 TYPECHECK_COUNTIF(state.jsrencountered,stat_typechecked_jsr);
806 TYPECHECK_COUNTIF(state.stat_maythrow,stat_methods_maythrow);
809 /* reset the flags of blocks we haven't reached */
811 typecheck_reset_flags(&state);
815 MCOPY(jd->var, savedlocals, varinfo, state.numlocals);
817 /* everything's ok */
819 LOGimp("exiting typecheck");
823 #if defined(__cplusplus)
827 #endif /* ENABLE_VERIFIER */
830 * These are local overrides for various environment variables in Emacs.
831 * Please do not remove this and leave it at the end of the file, where
832 * Emacs will automagically detect them.
833 * ---------------------------------------------------------------------
836 * indent-tabs-mode: t
840 * vim:noexpandtab:sw=4:ts=4: