1 /* src/vm/jit/verify/typecheck.c - typechecking (part of bytecode verification)
3 Copyright (C) 1996-2005, 2006, 2007 R. Grafl, A. Krall, C. Kruegel,
4 C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring,
5 E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich,
6 J. Wenninger, Institut f. Computersprachen - TU Wien
8 This file is part of CACAO.
10 This program is free software; you can redistribute it and/or
11 modify it under the terms of the GNU General Public License as
12 published by the Free Software Foundation; either version 2, or (at
13 your option) any later version.
15 This program is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25 $Id: typecheck.c 8330 2007-08-16 18:15:51Z twisti $
31 What's the purpose of the `typechecker`?
32 ----------------------------------------
34 The typechecker analyses (the intermediate repr. of) the bytecode of
35 each method and ensures that for each instruction the values on the
36 stack and in local variables are of the correct type whenever the
37 instruction is executed.
39 type checking is a mandatory part of bytecode verification.
42 How does the typechecker work?
43 ------------------------------
45 The JVM stack and the local variables are not statically typed, so the
46 typechecker has to *infer* the static types of stack slots and local
47 variables at each point of the method. The JVM spec imposes a lot of
48 restrictions on the bytecode in order to guarantee that this is always
51 Basically the typechecker solves the data flow equations of the method.
52 This is done in the usual way for a forward data flow analysis: Starting
53 from the entry point of the method the typechecker follows the CFG and
54 records the type of each stack slot and local variable at each point[1].
55 When two or more control flow paths merge at a point, the union of the
56 types for each slot/variable is taken. The algorithm continues to follow
57 all possible paths[2] until the recorded types do not change anymore (ie.
58 the equations have been solved).
60 If the solution has been reached and the resulting types are valid for
61 all instructions, then type checking terminates with success, otherwise
62 an exception is thrown.
65 Why is this code so damn complicated?
66 -------------------------------------
68 Short answer: The devil's in the details.
70 While the basic operation of the typechecker is no big deal, there are
71 many properties of Java bytecode which make type checking hard. Some of
72 them are not even addressed in the JVM spec. Some problems and their
75 *) Finding a good representation of the union of two reference types is
76 difficult because of multiple inheritance of interfaces.
78 Solution: The typeinfo system can represent such "merged" types by a
79 list of proper subclasses of a class. Example:
81 typeclass=java.lang.Object merged={ InterfaceA, InterfaceB }
83 represents the result of merging two interface types "InterfaceA"
86 *) When the code of a method is verified, there may still be unresolved
87 references to classes/methods/fields in the code, which we may not force
88 to be resolved eagerly. (A similar problem arises because of the special
89 checks for protected members.)
91 Solution: The typeinfo system knows how to deal with unresolved
92 class references. Whenever a check has to be performed for an
93 unresolved type, the type is annotated with constraints representing
94 the check. Later, when the type is resolved, the constraints are
95 checked. (See the constrain_unresolved_... and the resolve_...
98 *) Checks for uninitialized object instances are hard because after the
99 invocation of <init> on an uninitialized object *all* slots/variables
100 referring to this object (and exactly those slots/variables) must be
101 marked as initialized.
103 Solution: The JVM spec describes a solution, which has been
104 implemented in this typechecker.
106 Note that some checks mentioned in the JVM spec are unnecessary[4] and
107 not performed by either the reference implementation, or this implementation.
112 [1] Actually only the types of slots/variables at the start of each
113 basic block are remembered. Within a basic block the algorithm only keeps
114 the types of the slots/variables for the "current" instruction which is
117 [2] Actually the algorithm iterates through the basic block list until
118 there are no more changes. Theoretically it would be wise to sort the
119 basic blocks topologically beforehand, but the number of average/max
120 iterations observed is so low, that this was not deemed necessary.
122 [3] This is similar to a method proposed by: Alessandro Coglio et al., A
123 Formal Specification of Java Class Loading, Technical Report, Kestrel
124 Institute April 2000, revised July 2000
125 http://www.kestrel.edu/home/people/coglio/loading.pdf
126 An important difference is that Coglio's subtype constraints are checked
127 after loading, while our constraints are checked when the field/method
128 is accessed for the first time, so we can guarantee lexically correct
131 [4] Alessandro Coglio
132 Improving the official specification of Java bytecode verification
133 Proceedings of the 3rd ECOOP Workshop on Formal Techniques for Java Programs
135 citeseer.ist.psu.edu/article/coglio03improving.html
144 #include "vm/types.h"
146 #ifdef ENABLE_VERIFIER
148 #include "mm/memory.h"
150 #include "native/native.h"
152 #include "toolbox/logging.h"
154 #include "vm/access.h"
155 #include "vm/array.h"
156 #include "vm/builtin.h"
157 #include "vm/exceptions.h"
158 #include "vm/global.h"
159 #include "vm/primitive.h"
160 #include "vm/resolve.h"
162 #include "vm/jit/jit.h"
163 #include "vm/jit/parse.h"
164 #include "vm/jit/show.h"
166 #include "vmcore/loader.h"
167 #include "vmcore/options.h"
169 #include <typecheck-common.h>
172 /****************************************************************************/
173 /* MACROS FOR VARIABLE TYPE CHECKING */
174 /****************************************************************************/
176 #define TYPECHECK_CHECK_TYPE(i,tp,msg) \
178 if (VAR(i)->type != (tp)) { \
179 exceptions_throw_verifyerror(state->m, (msg)); \
184 #define TYPECHECK_INT(i) \
185 TYPECHECK_CHECK_TYPE(i,TYPE_INT,"Expected to find integer value")
186 #define TYPECHECK_LNG(i) \
187 TYPECHECK_CHECK_TYPE(i,TYPE_LNG,"Expected to find long value")
188 #define TYPECHECK_FLT(i) \
189 TYPECHECK_CHECK_TYPE(i,TYPE_FLT,"Expected to find float value")
190 #define TYPECHECK_DBL(i) \
191 TYPECHECK_CHECK_TYPE(i,TYPE_DBL,"Expected to find double value")
192 #define TYPECHECK_ADR(i) \
193 TYPECHECK_CHECK_TYPE(i,TYPE_ADR,"Expected to find object value")
195 #define TYPECHECK_INT_OP(o) TYPECHECK_INT((o).varindex)
196 #define TYPECHECK_LNG_OP(o) TYPECHECK_LNG((o).varindex)
197 #define TYPECHECK_FLT_OP(o) TYPECHECK_FLT((o).varindex)
198 #define TYPECHECK_DBL_OP(o) TYPECHECK_DBL((o).varindex)
199 #define TYPECHECK_ADR_OP(o) TYPECHECK_ADR((o).varindex)
202 /* typestate_save_invars *******************************************************
204 Save the invars of the current basic block in the space reserved by
207 This function must be called before an instruction modifies a variable
208 that is an invar of the current block. In such cases the invars of the
209 block must be saved, and restored at the end of the analysis of this
210 basic block, so that the invars again reflect the *input* to this basic
211 block (and do not randomly contain types that appear within the block).
214 state............current state of the verifier
216 *******************************************************************************/
219 typestate_save_invars(verifier_state *state)
224 LOG("saving invars");
226 if (!state->savedindices) {
227 LOG("allocating savedindices buffer");
228 pindex = DMNEW(s4, state->m->maxstack);
229 state->savedindices = pindex;
230 index = state->numlocals + VERIFIER_EXTRA_VARS;
231 for (i=0; i<state->m->maxstack; ++i)
237 typecheck_copy_types(state, state->bptr->invars, state->savedindices,
238 state->bptr->indepth);
240 /* set the invars of the block to the saved variables */
241 /* and remember the original invars */
243 state->savedinvars = state->bptr->invars;
244 state->bptr->invars = state->savedindices;
248 /* typestate_restore_invars ***************************************************
250 Restore the invars of the current basic block that have been previously
251 saved by `typestate_save_invars`.
254 state............current state of the verifier
256 *******************************************************************************/
259 typestate_restore_invars(verifier_state *state)
261 TYPECHECK_COUNT(stat_savedstack);
262 LOG("restoring saved invars");
264 /* restore the invars pointer */
266 state->bptr->invars = state->savedinvars;
268 /* copy the types back */
270 typecheck_copy_types(state, state->savedindices, state->bptr->invars,
271 state->bptr->indepth);
273 /* mark that there are no saved invars currently */
275 state->savedinvars = NULL;
279 /* handle_fieldaccess **********************************************************
281 Verify an ICMD_{GET,PUT}{STATIC,FIELD}(CONST)?
284 state............the current state of the verifier
287 true.............successful verification,
288 false............an exception has been thrown.
290 *******************************************************************************/
293 handle_fieldaccess(verifier_state *state,
301 #define TYPECHECK_VARIABLESBASED
302 #define EXCEPTION do { return false; } while (0)
303 #define VERIFY_ERROR(msg) TYPECHECK_VERIFYERROR_bool(msg)
304 #include <typecheck-fields.inc>
307 #undef TYPECHECK_VARIABLESBASED
313 /* handle_invocation ***********************************************************
315 Verify an ICMD_INVOKE* instruction.
318 state............the current state of the verifier
321 true.............successful verification,
322 false............an exception has been thrown.
324 *******************************************************************************/
327 handle_invocation(verifier_state *state)
330 varinfo *dv; /* output variable of current instruction */
333 dv = VAROP(state->iptr->dst);
335 #define TYPECHECK_VARIABLESBASED
336 #define OP1 VAR(state->iptr->sx.s23.s2.args[0])
337 #include <typecheck-invoke.inc>
339 #undef TYPECHECK_VARIABLESBASED
345 /* handle_builtin **************************************************************
347 Verify the call of a builtin method.
350 state............the current state of the verifier
353 true.............successful verification,
354 false............an exception has been thrown.
356 *******************************************************************************/
359 handle_builtin(verifier_state *state)
362 varinfo *dv; /* output variable of current instruction */
365 dv = VAROP(state->iptr->dst);
367 #define TYPECHECK_VARIABLESBASED
368 #define OP1 state->iptr->sx.s23.s2.args[0]
369 #include <typecheck-builtins.inc>
371 #undef TYPECHECK_VARIABLESBASED
376 /* handle_multianewarray *******************************************************
378 Verify a MULTIANEWARRAY instruction.
381 state............the current state of the verifier
384 true.............successful verification,
385 false............an exception has been thrown.
387 *******************************************************************************/
390 handle_multianewarray(verifier_state *state)
393 varinfo *dv; /* output variable of current instruction */
396 dv = VAROP(state->iptr->dst);
398 #define TYPECHECK_VARIABLESBASED
399 #define VERIFY_ERROR(msg) TYPECHECK_VERIFYERROR_bool(msg)
400 #include <typecheck-multianewarray.inc>
402 #undef TYPECHECK_VARIABLESBASED
407 /* typecheck_invalidate_locals *************************************************
409 Invalidate locals that are overwritten by writing to the given local.
412 state............the current state of the verifier
413 index............the index of the local that is written
414 twoword..........true, if a two-word type is written
416 *******************************************************************************/
418 static void typecheck_invalidate_locals(verifier_state *state, s4 index, bool twoword)
423 jitdata *jd = state->jd;
424 s4 *localmap = jd->local_map;
425 varinfo *vars = jd->var;
427 javaindex = state->reverselocalmap[index];
429 /* invalidate locals of two-word type at index javaindex-1 */
432 localmap += 5 * (javaindex-1);
433 for (t=0; t<5; ++t) {
434 varindex = *localmap++;
435 if (varindex >= 0 && IS_2_WORD_TYPE(vars[varindex].type)) {
436 LOG1("invalidate local %d", varindex);
437 vars[varindex].type = TYPE_VOID;
442 localmap += 5 * javaindex;
445 /* invalidate locals at index javaindex */
447 for (t=0; t<5; ++t) {
448 varindex = *localmap++;
450 LOG1("invalidate local %d", varindex);
451 vars[varindex].type = TYPE_VOID;
455 /* if a two-word type is written, invalidate locals at index javaindex+1 */
458 for (t=0; t<5; ++t) {
459 varindex = *localmap++;
461 LOG1("invalidate local %d", varindex);
462 vars[varindex].type = TYPE_VOID;
469 /* macros used by the generated code ******************************************/
471 #define EXCEPTION do { return false; } while (0)
472 #define VERIFY_ERROR(msg) TYPECHECK_VERIFYERROR_bool(msg)
474 #define CHECK_LOCAL_TYPE(index, t) \
476 if (!typevector_checktype(jd->var, (index), (t))) \
477 VERIFY_ERROR("Local variable type mismatch"); \
480 #define STORE_LOCAL(t, index) \
483 typecheck_invalidate_locals(state, (index), false); \
484 typevector_store(jd->var, (index), (temp_t), NULL); \
487 #define STORE_LOCAL_2_WORD(t, index) \
490 typecheck_invalidate_locals(state, (index), true); \
491 typevector_store(jd->var, (index), (temp_t), NULL); \
494 #define REACH_BLOCK(target) \
496 if (!typestate_reach(state, (target), \
497 state->bptr->outvars, jd->var, \
498 state->bptr->outdepth)) \
502 #define REACH(target) REACH_BLOCK((target).block)
505 /* handle_basic_block **********************************************************
507 Perform bytecode verification of a basic block.
510 state............the current state of the verifier
513 true.............successful verification,
514 false............an exception has been thrown.
516 *******************************************************************************/
519 handle_basic_block(verifier_state *state)
521 int opcode; /* current opcode */
522 int len; /* for counting instructions, etc. */
523 bool superblockend; /* true if no fallthrough to next block */
524 instruction *iptr; /* the current instruction */
525 basicblock *tbptr; /* temporary for target block */
526 bool maythrow; /* true if this instruction may throw */
529 branch_target_t *table;
530 lookup_target_t *lookup;
531 jitdata *jd = state->jd;
533 varinfo constvalue; /* for PUT*CONST */
534 constant_FMIref *fieldref;
536 LOGSTR1("\n---- BLOCK %04d ------------------------------------------------\n",state->bptr->nr);
539 superblockend = false;
540 state->bptr->flags = BBFINISHED;
542 /* prevent compiler warnings */
545 /* determine the active exception handlers for this block */
546 /* XXX could use a faster algorithm with sorted lists or */
549 for (ex = state->jd->exceptiontable; ex ; ex = ex->down) {
550 if ((ex->start->nr <= state->bptr->nr) && (ex->end->nr > state->bptr->nr)) {
551 LOG1("active handler L%03d", ex->handler->nr);
552 state->handlers[len++] = ex;
555 state->handlers[len] = NULL;
557 /* init variable types at the start of this block */
558 typevector_copy_inplace(state->bptr->inlocals, jd->var, state->numlocals);
560 DOLOG(show_basicblock(jd, state->bptr, SHOW_STACK));
561 DOLOG(typecheck_print_vararray(stdout, jd, state->bptr->invars,
562 state->bptr->indepth));
563 DOLOG(typevector_print(stdout, jd->var, state->numlocals));
566 /* loop over the instructions */
567 len = state->bptr->icount;
568 state->iptr = state->bptr->iinstr;
570 TYPECHECK_COUNT(stat_ins);
574 DOLOG(typevector_print(stdout, jd->var, state->numlocals));
576 DOLOG(show_icmd(jd, state->iptr, false, SHOW_STACK)); LOGNL; LOGFLUSH;
583 /* include generated code for ICMDs verification */
585 #define TYPECHECK_VARIABLESBASED
587 #define METHOD (state->m)
589 #define BPTR (state->bptr)
590 #include <typecheck-variablesbased-gen.inc>
595 #undef TYPECHECK_VARIABLESBASED
598 LOG1("ICMD %d\n", opcode);
599 TYPECHECK_VERIFYERROR_bool("Missing ICMD code during typecheck");
602 /* reach exception handlers for this instruction */
605 TYPECHECK_COUNT(stat_ins_maythrow);
606 TYPECHECK_MARK(state->stat_maythrow);
607 LOG("reaching exception handlers");
609 while (state->handlers[i]) {
610 TYPECHECK_COUNT(stat_handlers_reached);
611 if (state->handlers[i]->catchtype.any)
612 VAR(state->exinvars)->typeinfo.typeclass = state->handlers[i]->catchtype;
614 VAR(state->exinvars)->typeinfo.typeclass.cls = class_java_lang_Throwable;
615 if (!typestate_reach(state,
616 state->handlers[i]->handler,
617 &(state->exinvars), jd->var, 1))
623 LOG("\t\tnext instruction");
625 } /* while instructions */
627 LOG("instructions done");
629 DOLOG(typecheck_print_vararray(stdout, jd, state->bptr->outvars,
630 state->bptr->outdepth));
631 DOLOG(typevector_print(stdout, jd->var, state->numlocals));
634 /* propagate stack and variables to the following block */
635 if (!superblockend) {
636 LOG("reaching following block");
637 tbptr = state->bptr->next;
638 while (tbptr->flags == BBDELETED) {
640 #ifdef TYPECHECK_DEBUG
641 /* this must be checked in parse.c */
642 if ((tbptr->nr) >= state->basicblockcount)
643 TYPECHECK_VERIFYERROR_bool("Control flow falls off the last block");
646 if (!typestate_reach(state,tbptr,state->bptr->outvars, jd->var,
647 state->bptr->outdepth))
651 /* We may have to restore the types of the instack slots. They
652 * have been saved if an <init> call inside the block has
653 * modified the instack types. (see INVOKESPECIAL) */
655 if (state->savedinvars)
656 typestate_restore_invars(state);
662 /****************************************************************************/
664 /* This is the main function of the bytecode verifier. It is called */
665 /* directly after analyse_stack. */
668 /* meth.............the method to verify */
669 /* cdata............codegendata for the method */
670 /* rdata............registerdata for the method */
673 /* true.............successful verification */
674 /* false............an exception has been thrown */
676 /****************************************************************************/
678 #define MAXPARAMS 255
680 bool typecheck(jitdata *jd)
684 varinfo *savedlocals;
685 verifier_state state; /* current state of the verifier */
689 /* collect statistics */
691 #ifdef TYPECHECK_STATISTICS
692 int count_iterations = 0;
693 TYPECHECK_COUNT(stat_typechecked);
694 TYPECHECK_COUNT_FREQ(stat_locals,jd->maxlocals,STAT_LOCALS);
695 TYPECHECK_COUNT_FREQ(stat_blocks,cdata->method->basicblockcount/10,STAT_BLOCKS);
696 TYPECHECK_COUNTIF(cdata->method->exceptiontablelength != 0,stat_methods_with_handlers);
697 state.stat_maythrow = false;
700 /* get required compiler data */
705 /* some logging on entry */
708 LOGSTR("\n==============================================================================\n");
709 DOLOG( show_method(jd, SHOW_STACK) );
710 LOGSTR("\n==============================================================================\n");
711 LOGMETHOD("Entering typecheck: ",cd->method);
713 /* initialize the verifier state */
718 state.basicblockcount = jd->basicblockcount;
719 state.basicblocks = jd->basicblocks;
720 state.savedindices = NULL;
721 state.savedinvars = NULL;
723 /* check that the basicblock numbers are valid */
726 jit_check_basicblock_numbers(jd);
729 /* check if this method is an instance initializer method */
731 state.initmethod = (state.m->name == utf_init);
733 /* initialize the basic block flags for the following CFG traversal */
735 typecheck_init_flags(&state, BBFINISHED);
737 /* number of local variables */
739 /* In <init> methods we use an extra local variable to indicate whether */
740 /* the 'this' reference has been initialized. */
741 /* TYPE_VOID...means 'this' has not been initialized, */
742 /* TYPE_INT....means 'this' has been initialized. */
744 state.numlocals = state.jd->localcount;
745 state.validlocals = state.numlocals;
746 if (state.initmethod)
747 state.numlocals++; /* VERIFIER_EXTRA_LOCALS */
749 state.reverselocalmap = DMNEW(s4, state.validlocals);
750 for (i=0; i<jd->maxlocals; ++i)
751 for (t=0; t<5; ++t) {
752 s4 varindex = jd->local_map[5*i + t];
754 state.reverselocalmap[varindex] = i;
758 LOG("reverselocalmap:");
759 for (i=0; i<state.validlocals; ++i) {
760 LOG2(" %i => javaindex %i", i, state.reverselocalmap[i]);
763 /* allocate the buffer of active exception handlers */
765 state.handlers = DMNEW(exception_entry*, state.jd->exceptiontablelength + 1);
767 /* save local variables */
769 savedlocals = DMNEW(varinfo, state.numlocals);
770 MCOPY(savedlocals, jd->var, varinfo, state.numlocals);
772 /* initialized local variables of first block */
774 if (!typecheck_init_locals(&state, true))
777 /* initialize invars of exception handlers */
779 state.exinvars = state.numlocals;
780 VAR(state.exinvars)->type = TYPE_ADR;
781 typeinfo_init_classinfo(&(VAR(state.exinvars)->typeinfo),
782 class_java_lang_Throwable); /* changed later */
784 LOG("Exception handler stacks set.\n");
786 /* loop while there are still blocks to be checked */
788 TYPECHECK_COUNT(count_iterations);
790 state.repeat = false;
792 state.bptr = state.basicblocks;
794 for (; state.bptr; state.bptr = state.bptr->next) {
795 LOGSTR1("---- BLOCK %04d, ",state.bptr->nr);
796 LOGSTR1("blockflags: %d\n",state.bptr->flags);
799 /* verify reached block */
800 if (state.bptr->flags == BBTYPECHECK_REACHED) {
801 if (!handle_basic_block(&state))
806 LOGIF(state.repeat,"state.repeat == true");
807 } while (state.repeat);
811 #ifdef TYPECHECK_STATISTICS
812 LOG1("Typechecker did %4d iterations",count_iterations);
813 TYPECHECK_COUNT_FREQ(stat_iterations,count_iterations,STAT_ITERATIONS);
814 TYPECHECK_COUNTIF(state.jsrencountered,stat_typechecked_jsr);
815 TYPECHECK_COUNTIF(state.stat_maythrow,stat_methods_maythrow);
818 /* reset the flags of blocks we haven't reached */
820 typecheck_reset_flags(&state);
824 MCOPY(jd->var, savedlocals, varinfo, state.numlocals);
826 /* everything's ok */
828 LOGimp("exiting typecheck");
831 #endif /* ENABLE_VERIFIER */
834 * These are local overrides for various environment variables in Emacs.
835 * Please do not remove this and leave it at the end of the file, where
836 * Emacs will automagically detect them.
837 * ---------------------------------------------------------------------
840 * indent-tabs-mode: t
844 * vim:noexpandtab:sw=4:ts=4: