doc/handbook/runtime.tex

   1 \chapter{Run Time System}
   2
   3 \section{Introduction}
   4
   5 \section{Object layout}
   6
   7 \section{Method invocation}
   8
   9 Java and the programming language Theta do not implement multiple
  10 inheritance, but single inheritance with multiple subtyping. This important
  11 difference makes object layout and method dispatch more efficient. Although
  12 the bidirectional layout was designed for a language with multiple
  13 subtyping, it has the problem that more than one {\em vtbl} pointer has to
  14 be included in objects. The CACAO JIT compiler \cite{KrGr97b} moves the
  15 dispatch header of the bidirectional layout into the class information with
  16 negative offsets from the {\em vtbl}. For each implemented interface there
  17 is a distinct interface {\em vtbl}. Unimplemented interfaces are
  18 represented by null pointers.
  19 %
  20 \begin{figure*}[htb]
  21 \begin{center}
  22 \setlength{\unitlength}{1mm}
  23 \begin{picture}(114,72)
  24 \put( 0,42){\vector(1,0){12}}
  25 \put( 0,42){\makebox(8,6){\it objptr}}
  26 \put(12,60){\makebox(24,6){\it object}}
  27 \put(48,63){\makebox(30,6){\it class}}
  28 \put(90,66){\makebox(24,6){\it code}}
  29 \put(12,42){\makebox(24,18){\it instance data}}
  30 \put(12,36){\makebox(24,6){\it class pointer}}
  31 \put(12,42){\line(1,0){24}}
  32 \put(36,39){\vector(1,0){12}}
  33 \put(48,57){\makebox(30,6){\it method pointer}}
  34 \put(48,57){\line(1,0){30}}
  35 \put(48,51){\makebox(30,6){\it method pointer}}
  36 \put(48,51){\line(1,0){30}}
  37 \put(48,45){\makebox(30,6){\it method pointer}}
  38 \put(48,45){\line(1,0){30}}
  39 \put(48,39){\makebox(30,6){\it class info}}
  40 \put(48,27){\makebox(30,6){\it interface pointer}}
  41 \put(48,33){\line(1,0){30}}
  42 \put(48,33){\makebox(30,6){\it interface pointer}}
  43 \put(48, 0){\makebox(30,6){\it method pointer}}
  44 \put(48, 6){\makebox(30,6){\it method pointer}}
  45 \put(48, 6){\line(1,0){30}}
  46 \put(48,21){\makebox(30,6){\it interfaces}}
  47 \put(90,36){\framebox(24,6){\it method code}}
  48 \put(44,15){\vector(1,0){4}}
  49 \put(44,15){\line(0,1){15}}
  50 \put(44,30){\line(1,0){4}}
  51 \put(40, 0){\vector(1,0){8}}
  52 \put(40, 0){\line(0,1){36}}
  53 \put(40,36){\line(1,0){8}}
  54 \put(78, 3){\line(1,0){8}}
  55 \put(86, 3){\line(0,1){51}}
  56 \put(78, 9){\line(1,0){4}}
  57 \put(82, 9){\line(0,1){39}}
  58 \put(78,18){\line(1,0){4}}
  59 \put(78,54){\line(1,0){8}}
  60 \put(86,36){\vector(1,0){4}}
  61 \put(90,48){\framebox(24,6){\it method code}}
  62 \put(78,48){\vector(1,0){12}}
  63 \put(90,60){\framebox(24,6){\it method code}}
  64 \put(78,60){\vector(1,0){12}}
  65 \thicklines
  66 \put(48, 0){\framebox(30,12){}}
  67 \put(48,15){\framebox(30,6){\it method pointer}}
  68 \put(12,36){\framebox(24,24){}}
  69 \put(48,27){\framebox(30,36){}}
  70 \put(48,39){\line(1,0){30}}
  71 \end{picture}
  72 \caption{CACAO object and compact class descriptor layout}
  73 \label{objectcompactlayout}
  74 \end{center}
  75 \end{figure*}
  76
  77 To call a virtual method, two memory access instructions are necessary
  78 (load the class pointer, load the method pointer) followed by the call
  79 instruction. Calling an interface method needs an additional indirection.
  80 %
  81 \begin{figure*}[htb]
  82 \begin{center}
  83 \setlength{\unitlength}{1mm}
  84 \begin{picture}(114,42)
  85 \put(0,15){\vector(1,0){12}}
  86 \put(0,15){\makebox(12,6){\it objptr}}
  87 \put(12,33){\makebox(24,6){\it object}}
  88 \put(48,36){\makebox(30,6){\it class}}
  89 \put(90,39){\makebox(24,6){\it code}}
  90 \put(12,15){\makebox(24,18){\it instance data}}
  91 \put(12,9){\makebox(24,6){\it class pointer}}
  92 \put(12,15){\line(1,0){24}}
  93 \put(36,12){\vector(1,0){12}}
  94 \put(48,30){\makebox(30,6){\it method pointer}}
  95 \put(48,30){\line(1,0){30}}
  96 \put(48,24){\makebox(30,6){\it method pointer}}
  97 \put(48,24){\line(1,0){30}}
  98 \put(48,18){\makebox(30,6){\it method pointer}}
  99 \put(48,18){\line(1,0){30}}
 100 \put(48,12){\makebox(30,6){\it class info}}
 101 \put(48,0){\makebox(30,6){\it ifmethod pointer}}
 102 \put(48,6){\line(1,0){30}}
 103 \put(48,6){\makebox(30,6){\it ifmethod pointer}}
 104 \put(90,3){\framebox(24,6){\it method code}}
 105 \put(78,3){\vector(1,0){12}}
 106 \put(82,3){\line(0,1){18}}
 107 \put(78,21){\line(1,0){4}}
 108 \put(78,27){\line(1,0){8}}
 109 \put(78,9){\line(1,0){8}}
 110 \put(86,9){\line(0,1){18}}
 111 \put(86,18){\vector(1,0){4}}
 112 \put(90,18){\framebox(24,6){\it method code}}
 113 \put(78,33){\vector(1,0){12}}
 114 \put(90,33){\framebox(24,6){\it method code}}
 115 \thicklines
 116 \put(12,9){\framebox(24,24){}}
 117 \put(48,0){\framebox(30,36){}}
 118 \put(48,12){\line(1,0){30}}
 119 \end{picture}
 120 \caption{CACAO object and fast class descriptor layout}
 121 \label{objectfastlayout}
 122 \end{center}
 123 \end{figure*}
 124
 125 In the faster scheme, we store interface methods in an additional table at
 126 negative offsets from the {\em vtbl} (see figure~\ref{objectfastlayout}).
 127 Segregating the interface virtual function table keeps the standard virtual
 128 function table small and allows interface methods to be called with just
 129 two memory accesses. The memory consumption of virtual function tables
 130 containing interface and class methods would be {\em number of (classes +
 131 interfaces) $\times$ number of distinct methods}. The memory consumption of
 132 the interface tables is only {\em number of classes which implement
 133 interfaces $\times$ number of interface methods}. Colouring can be used to
 134 reduce the number of distinct offsets for interface methods further, but
 135 complicates dynamic class loading, leading to renumbering and code
 136 patching.
 137
 138 The Jalapeno virtual machine implements an interface method invocation
 139 similar to the fast class descriptor layout of CACAO, but instead of
 140 colouring hashing of the method indices is used \cite{Alpern+01}. The table
 141 for the interface method pointers is allocated with a fixed size much
 142 smaller than the number of interface methods. When two method indices hash
 143 to the same offset, a conflict resolving stub is called instead of the
 144 interface methods directly. For conflict resolution the stub is passed the
 145 method index in a scratch register as additional argument. An interface
 146 method invocation can be executed with the following four machine
 147 instructions:
 148 %
 149 \begin{verbatim}
 150 LD  vtblptr,(obj)                  ; load vtbl pointer
 151 LD  mptr,hash(method_ptr)(vtblptr) ; load method pointer
 152 MV  mindex,idreg                   ; load method index
 153 JSR (mptr)                         ; call method (or conflict stub)
 154 \end{verbatim}
 155 %
 156 The number of machine instructions is the same as in the compact class
 157 descriptor layout of CACAO, but the indirection is eliminated, which
 158 reduces the number of cycles needed for the execution of this instruction
 159 sequence on a pipelined architecture. Compared to the old interface method
 160 invocation in the Jalapeno VM, which searched the superclass hierarchy for
 161 a matching method signature, the new method resulted in changes in the
 162 run-time from one percent slowdowns to speedups up to 51 percent.
 163
 164
 165 \section{Run time type checking}
 166 \label{sectionruntimetypechecking}
 167
 168 Since type tests for trees are more (space) efficient than type tests for
 169 DAGs, a possible solution is to split a DAG into a tree part and the
 170 remaining graph. For languages with single inheritance and multiple
 171 subtyping this partitioning of the class hierarchy is already done in the
 172 language itself.
 173
 174 \begin{figure}[htb]
 175 \begin{center}
 176 \setlength{\unitlength}{1mm}
 177 \begin{picture}(86,26)
 178 \thicklines
 179 \put(23, 3){\circle{6}}
 180 \put(43, 3){\circle{6}}
 181 \put(63, 3){\circle{6}}
 182 \put(33,13){\circle{6}}
 183 \put(53,13){\circle{6}}
 184 \put(43,23){\circle{6}}
 185 \put(20, 0){\makebox(6,6){D}}
 186 \put(40, 0){\makebox(6,6){E}}
 187 \put(60, 0){\makebox(6,6){F}}
 188 \put(30,10){\makebox(6,6){B}}
 189 \put(50,10){\makebox(6,6){C}}
 190 \put(40,20){\makebox(6,6){A}}
 191 \put(19, 0){\makebox(0,6)[r]{\{3,0\}}}
 192 \put(47, 0){\makebox(6,6)[l]{\{4,0\}}}
 193 \put(67, 0){\makebox(0,6)[l]{\{6,0\}}}
 194 \put(29,10){\makebox(0,6)[r]{\{2,2\}}}
 195 \put(57,10){\makebox(0,6)[l]{\{5,1\}}}
 196 \put(47,20){\makebox(0,6)[l]{\{1,5\}}}
 197 \put(25, 5){\line( 1,1){6}}
 198 \put(35,15){\line( 1,1){6}}
 199 \put(41, 5){\line(-1,1){6}}
 200 \put(51,15){\line(-1,1){6}}
 201 \put(61, 5){\line(-1,1){6}}
 202 \end{picture}
 203 \caption{Relative numbering with \{{\tt baseval}, {\tt diffval}\} pairs}
 204 \label{relnumbering}
 205 \end{center}
 206 \end{figure}
 207
 208 CACAO uses a subtype test based on relative numbering for classes and a
 209 kind of matrix implementation for interfaces. Two numbers {\em low} and
 210 {\em high} are stored for each class in the class hierarchy. A depth first
 211 traversal of the hierarchy increments a counter for each class and assigns
 212 the counter to the {\em low} field when the class is first encountered and
 213 assigns the counter to the {\em high} field when the traversal leaves the
 214 class. In languages with dynamic class loading a renumbering of the
 215 hierarchy is needed whenever a class is added. A class {\em sub} is a
 216 subtype of another class {\em super}, if {\em super.low $\le$ sub.low $\le$
 217 super.high}. Since a range check is implemented more efficiently by an
 218 unsigned comparison, CACAO stores the difference between the {\em low} and
 219 {\em high} values and compares it against the difference of the {\em low}
 220 values of both classes. The code for {\tt instanceof} looks similar to:
 221
 222 \begin{verbatim}
 223 return (unsigned) (sub->vftbl->baseval - super->vftbl->baseval) <=
 224        (unsigned) (super->vftbl->diffval);
 225 \end{verbatim}
 226
 227 For leaf nodes in the class hierarchy the {\tt diffval} is 0 which results
 228 in a faster test (a simple comparison of the {\tt baseval} fields of the
 229 sub- and superclass). In general a JIT compiler can generate the faster
 230 test only for final classes. An AOT compiler or a JIT compiler which does
 231 patching of the already generated machine code may additionally replace
 232 both the {\tt baseval} and the {\tt diffval} of the superclass by a
 233 constant. Currently CACAO uses constants only when dynamic class loading is
 234 not used.
 235
 236 CACAO stores an interface table at negative offsets from the virtual method
 237 table (see figure~\ref{objectcompactlayout}). This table is needed for the
 238 invocation of interface methods. The same table is additionally used by the
 239 subtype test for interfaces. If the table is empty for the index of the
 240 superclass, the subtype test fails. The code for {\tt instanceof} looks
 241 similar to:
 242
 243 \begin{verbatim}
 244 return (sub->vftbl->interfacetable[-super->index] != NULL);
 245 \end{verbatim}
 246
 247 Both subtype tests can be implemented by very few machine code instructions
 248 without using branches which are expensive on modern processors.
 249
 250
 251 \section{Exception handling}
 252
 253 \subsection{Introduction}
 254
 255 Exceptions in Java occur either implicitly or explicitly. Typical
 256 implicit exceptions are references to the {\tt null} pointer, array
 257 index out of bounds and division by zero. Exceptions also can be
 258 raised explicitly with the {\tt throw} instruction. To handle
 259 exceptions occurring during execution, code which can raise an
 260 exception is included in a {\tt try} block. An efficient
 261 implementation of exception handling has to take care of managing {\tt
 262 try} blocks and to check for implicit exceptions efficiently .
 263
 264
 265 \subsection{Known implementation techniques}
 266
 267 Three standard methods exist for implementing exception handling:
 268
 269 \begin{itemize}
 270
 271 \item dynamically create a linked list of {\tt try} block data
 272       structures,
 273
 274 \item use static {\tt try} block tables and search these tables at run
 275       time (suggested for JavaVM interpreters),
 276
 277 \item use functions with two return values.
 278
 279 \end{itemize}
 280
 281 The first method has been used in portable implementations of
 282 exception handling for C++ \cite{Cameron+92} or Ada \cite{Giering+94}
 283 using {\tt setjmp} and {\tt longjmp}. A linked exception handling data
 284 structure is created when entering a {\tt try} block and the structure
 285 is discarded when leaving the protected block. Java requires precise
 286 exceptions. It means that all expressions evaluated before the
 287 exception raising instruction must have finished and all expressions
 288 after the raising instruction must not have been started. Therefore,
 289 in practice, some instructions may not be moved freely. In the case of
 290 subroutine calls, the callee-saved registers must be restored to their
 291 original value. The data structure can be used to store such
 292 information. The disadvantage of this method is that creating and
 293 discarding of the data structure takes some time even if an exception
 294 is never raised.
 295
 296 The second method has been suggested for an efficient exception
 297 handling implementation of C++ \cite{KoenigStroustrup90} and is used
 298 in Java implementations. For every method, the JavaVM maintains an
 299 exception table. This exception table contains the program counter of
 300 the start and the end of the {\tt try} block, the program counter of
 301 the exception handler and the type of the exception. A JavaVM
 302 interpreter can easily interpret this structure and dispatch to the
 303 corresponding handler code. If the byte code is translated to native
 304 code, the equivalent technique is more complicated.
 305
 306 To simplify restoration of the registers, the old CACAO implementation
 307 used a different scheme \cite{KrGr97b}. A method has two return
 308 values: the real return value and an exception value stored in a
 309 register. After each method call, the exception register is checked
 310 and, if it is non-zero, the exception handling code is executed. Since
 311 an exception is rarely raised, the branch is easy to predict and
 312 cheap. Entering and leaving a {\tt try} block have no associated
 313 costs. At compile time, the dispatch information contained in the
 314 exception table is translated into method dispatching native code.
 315
 316 Run time checks for null pointers and array bounds are quite frequent,
 317 but can be eliminated in many cases. It is often possible to move a
 318 loop invariant null pointer check before the loop or to eliminate a
 319 bound check. Some more sophisticated compilers use these code motion
 320 techniques.
 321
 322
 323 \subsection{Motivation for a change}
 324
 325 \begin{table*}
 326 \begin{center}
 327 \begin{tabular}[b]{|l|c|c|c|c|c|}
 328 \hline
 329                     & JavaLex & javac & espresso & Toba & java\_cup \\
 330 \hline
 331 null pointer checks &  6859   &  8197 &  11114   & 5825 &   7406    \\
 332 \hline
 333 method calls        &  3226   &  7498 &   7515   & 4401 &   5310    \\
 334 \hline
 335 try blocks          &    20   &   113 &     44   &   28 &     27    \\
 336 \hline
 337 \end{tabular}
 338 \caption{Number of pointer checks, method invocations and try blocks}
 339 \label{MethodTry}
 340 \end{center}
 341 \end{table*}
 342
 343 The old CACAO implementation was simple, but it only makes sense if
 344 the number of try blocks is high. We made an empirical study to count
 345 the numbers of static occurrences of method invocations and of {\tt
 346 try} blocks in some applications (see table \ref{MethodTry}). The
 347 number of method invocations is two magnitudes bigger than the number
 348 of {\tt try} blocks. Furthermore, an exception is rarely raised during
 349 program execution. This led us to a new implementation of exception
 350 handling.
 351
 352
 353 \subsection{The new exception handling scheme}
 354
 355 The new exception handling scheme is similar to that in a JavaVM
 356 interpreter. If an exception occurs, information in the exception
 357 table is interpreted. However native code complicates the matter.
 358
 359 The pointers to Java byte code must be replaced by pointers to native
 360 code. It requires that, during native code generation, the order of
 361 basic blocks not be allowed to change. If basic blocks are eliminated
 362 because of dead code, the information about a block can not be
 363 discarded if there is a pointer to it in the exception table.
 364
 365 A CACAO stack frame only contains copies of saved or spilled
 366 registers. There is no saved frame pointer. The size of a stack frame
 367 is only contained in the instructions which allocate and deallocate
 368 the stack. Therefore, to support exception handling, additional
 369 information has to be stored elsewhere.
 370
 371 The code for a method needs access to constants (mostly address
 372 constants). Since a global constant table would be too large for short
 373 address ranges and, because methods are compiled on demand, every
 374 method has its own constant area which is allocated directly before
 375 the start of the method code (see fig.\ \ref{methodlayout}). A
 376 register is reserved which contains the method pointer. Constants are
 377 addressed relative to the method pointer.
 378
 379 \begin{figure}[htb]
 380 \begin{center}
 381 \setlength{\unitlength}{1mm}
 382 \begin{picture}(50,18)
 383 \put(24,6){\vector(-1,0){6}}
 384 \put(24,3){\makebox(25,6){\it method pointer}}
 385 \put(0,0){\makebox(18, 6){\it constants}}
 386 \put(0,6){\makebox(18,12){\it code}}
 387 \thicklines
 388 \put(0,6){\line(1,0){18}}
 389 \put(0,0){\framebox(18,18){}}
 390 \end{picture}
 391 \caption{CACAO method layout}
 392 \label{methodlayout}
 393 \end{center}
 394 \end{figure}
 395
 396 During a method call, the method pointer of the calling method is
 397 destroyed. However the return address is stored in a register which is
 398 preserved during execution of the called method and has to be used for
 399 returning from the method. After a method return, the method pointer
 400 of the calling method is recomputed using the return address. The
 401 following code for a method call demonstrates the method calling
 402 convention:
 403
 404 \begin{verbatim}
 405 LDQ cp,(obj)     ; load class pointer
 406 LDQ mp,met(cp)   ; load method pointer
 407 JSR ra,(mp)      ; call method
 408 LDA mp=ra+offset ; recompute method pointer
 409 \end{verbatim}
 410
 411 At the beginning of the constant area, there are fields which contain
 412 the necessary information for register restoration:
 413
 414 \begin{mylist}{{\tt framesize}}
 415 \item[{\tt framesize}] the size of the stack frame
 416 \item[{\tt isleaf}]    a flag which is true if the method is a leaf
 417 method
 418 \item[{\tt intsave}]   number of saved integer registers
 419 \item[{\tt floatsave}] number of saved floating point registers
 420 \item[{\tt extable}]   the exception table -- similar to the JavaVM
 421 table
 422 \end{mylist}
 423
 424 The exception handler first checks if the current executing method has
 425 an associated handler and may dispatch to this handler. If there is no
 426 handler, it unwinds the stack and searches the parent methods for a
 427 handler. The information in the constant area is used to restore the
 428 registers and update the stack pointer. The return address and the
 429 offset in the immediately following {\tt LDA} instruction is used to
 430 recompute the method pointer.
 431
 432 \begin{table*}
 433 \begin{center}
 434 \begin{tabular}[b]{|l|c|c|c|c|c|}
 435 \hline
 436           & JavaLex & javac  & espresso & Toba  & java\_cup \\ \hline
 437 CACAO old &  61629  & 156907 &  122951  & 67602 &   87489   \\ \hline
 438 CACAO new &  37523  &  86346 &   69212  & 41315 &   52386   \\ \hline
 439 \end{tabular}
 440 \caption{Number of generated native instructions}
 441 \label{CodeSize}
 442 \end{center}
 443 \end{table*}
 444
 445 The change to the new scheme allowed us to implement the null pointer
 446 check for free. We protect the first 64 Kbyte of memory against read
 447 and write access. If an bus error is raised, we catch the signal and
 448 check if the faulting address is within the first 64K. If this is the
 449 case, we dispatch to our exception handler, otherwise we propagate the
 450 signal. This gives faster programs and reduces the work of the
 451 compiler in generating pointer checking code. As shown in table
 452 \ref{MethodTry}, the numbers of null pointer checks are quite high.
 453
 454
 455 \section{Garbage collector}
 456
 457