1 {-# LANGUAGE TypeFamilies, StandaloneDeriving, FlexibleInstances, FlexibleContexts, UndecidableInstances, RecordWildCards, OverloadedStrings, TypeSynonymInstances, MultiParamTypeClasses #-}
2 module JVM.Assembler where
5 import Control.Applicative
6 import Data.Ix (inRange)
10 import qualified Data.Binary as Binary
11 import qualified Data.Binary.Get as Get
14 import qualified Data.ByteString.Lazy as B
16 import qualified Data.Set as S
17 import qualified Data.Map as M
19 import Data.BinaryState
28 deriving (Eq, Ord, Enum, Show)
37 deriving (Eq, Ord, Enum, Show)
40 codeStackSize :: Word16,
41 codeMaxLocals :: Word16,
43 codeInstructions :: [Instruction],
44 codeExceptionsN :: Word16,
45 codeExceptions :: [CodeException],
47 codeAttributes :: [AttributeInfo] }
50 data CodeException = CodeException {
54 eCatchType :: Word16 }
57 instance BinaryState Integer CodeException where
58 put (CodeException {..}) = do
64 get = CodeException <$> get <*> get <*> get <*> get
66 instance BinaryState Integer AttributeInfo where
68 let sz = 6 + attributeLength a -- full size of AttributeInfo structure
69 liftOffset (fromIntegral sz) Binary.put a
73 instance BinaryState Integer Code where
78 forM_ codeInstructions put
80 forM_ codeExceptions put
82 forM_ codeAttributes put
88 bytes <- replicateM (fromIntegral len) get
89 let bytecode = B.pack bytes
90 code = decodeWith readInstructions 0 bytecode
92 excs <- replicateM (fromIntegral excn) get
94 attrs <- replicateM (fromIntegral nAttrs) get
95 return $ Code stackSz locals len code excn excs nAttrs attrs
97 readInstructions :: GetState Integer [Instruction]
104 next <- readInstructions
125 | SIPUSH Word16 -- 17
134 | ILOAD_ IMM -- 26, 27, 28, 29
135 | LLOAD_ IMM -- 30, 31, 32, 33
136 | FLOAD_ IMM -- 34, 35, 36, 37
137 | DLOAD_ IMM -- 38, 39, 40, 41
138 | ALOAD_ IMM -- 42, 43, 44, 45
152 | ISTORE_ IMM -- 59, 60, 61, 62
153 | LSTORE_ IMM -- 63, 64, 65, 66
154 | FSTORE_ IMM -- 67, 68, 69, 70
155 | DSTORE_ IMM -- 71, 72, 73, 74
156 | ASTORE_ IMM -- 75, 76, 77, 78
210 | IINC Word8 Word8 -- 132
227 | FCMP CMP -- 149, 150
228 | DCMP CMP -- 151, 152
229 | IF CMP -- 153, 154, 155, 156, 157, 158
230 | IF_ICMP CMP Word16 -- 159, 160, 161, 162, 163, 164
231 | IF_ACMP CMP Word16 -- 165, 166
235 | TABLESWITCH Word32 Word32 Word32 [Word32] -- 170
236 | LOOKUPSWITCH Word32 Word32 [(Word32, Word32)] -- 171
242 | GETSTATIC Word16 -- 178
243 | PUTSTATIC Word16 -- 179
244 | GETFIELD Word16 -- 180
245 | PUTFIELD Word16 -- 181
246 | INVOKEVIRTUAL Word16 -- 182
247 | INVOKESPECIAL Word16 -- 183
248 | INVOKESTATIC Word16 -- 184
249 | INVOKEINTERFACE Word16 Word8 -- 185
251 | NEWARRAY Word8 -- 188, see ArrayType
252 | ANEWARRAY Word16 -- 189
255 | CHECKCAST Word16 -- 192
256 | INSTANCEOF Word16 -- 193
257 | MONITORENTER -- 194
259 | WIDE Word8 Instruction -- 196
260 | MULTINANEWARRAY Word16 Word8 -- 197
261 | IFNULL Word16 -- 198
262 | IFNONNULL Word16 -- 199
263 | GOTO_W Word32 -- 200
264 | JSR_W Word32 -- 201
276 deriving (Eq, Show, Enum)
278 imm :: Word8 -> (IMM -> Instruction) -> Word8 -> GetState s Instruction
279 imm base constr x = return $ constr $ toEnum $ fromIntegral (x-base)
281 putImm :: Word8 -> IMM -> PutState Integer ()
282 putImm base i = putByte $ base + (fromIntegral $ fromEnum i)
284 atype2byte :: ArrayType -> Word8
285 atype2byte T_BOOLEAN = 4
286 atype2byte T_CHAR = 5
287 atype2byte T_FLOAT = 6
288 atype2byte T_DOUBLE = 7
289 atype2byte T_BYTE = 8
290 atype2byte T_SHORT = 9
291 atype2byte T_INT = 10
292 atype2byte T_LONG = 11
294 byte2atype :: Word8 -> GetState s ArrayType
295 byte2atype 4 = return T_BOOLEAN
296 byte2atype 5 = return T_CHAR
297 byte2atype 6 = return T_FLOAT
298 byte2atype 7 = return T_DOUBLE
299 byte2atype 8 = return T_BYTE
300 byte2atype 9 = return T_SHORT
301 byte2atype 10 = return T_INT
302 byte2atype 11 = return T_LONG
303 byte2atype x = fail $ "Unknown array type byte: " ++ show x
305 instance BinaryState Integer ArrayType where
310 put t = putByte (atype2byte t)
312 put1 :: (BinaryState Integer a) => Word8 -> a -> PutState Integer ()
317 put2 :: (BinaryState Integer a, BinaryState Integer b) => Word8 -> a -> b -> PutState Integer ()
323 instance BinaryState Integer Instruction where
325 put ACONST_NULL = putByte 1
326 put ICONST_M1 = putByte 2
327 put ICONST_0 = putByte 3
328 put ICONST_1 = putByte 4
329 put ICONST_2 = putByte 5
330 put ICONST_3 = putByte 6
331 put ICONST_4 = putByte 7
332 put ICONST_5 = putByte 8
333 put LCONST_0 = putByte 9
334 put LCONST_1 = putByte 10
335 put FCONST_0 = putByte 11
336 put FCONST_1 = putByte 12
337 put FCONST_2 = putByte 13
338 put DCONST_0 = putByte 14
339 put DCONST_1 = putByte 15
340 put (BIPUSH x) = put1 16 x
341 put (SIPUSH x) = put1 17 x
342 put (LDC1 x) = put1 18 x
343 put (LDC2 x) = put1 19 x
344 put (LDC2W x) = put1 20 x
345 put (ILOAD x) = put1 21 x
346 put (LLOAD x) = put1 22 x
347 put (FLOAD x) = put1 23 x
348 put (DLOAD x) = put1 24 x
349 put (ALOAD x) = put1 25 x
350 put (ILOAD_ i) = putImm 26 i
351 put (LLOAD_ i) = putImm 30 i
352 put (FLOAD_ i) = putImm 34 i
353 put (DLOAD_ i) = putImm 38 i
354 put (ALOAD_ i) = putImm 42 i
355 put IALOAD = putByte 46
356 put LALOAD = putByte 47
357 put FALOAD = putByte 48
358 put DALOAD = putByte 49
359 put AALOAD = putByte 50
360 put BALOAD = putByte 51
361 put CALOAD = putByte 52
362 put SALOAD = putByte 53
363 put (ISTORE x) = put1 54 x
364 put (LSTORE x) = put1 55 x
365 put (FSTORE x) = put1 56 x
366 put (DSTORE x) = put1 57 x
367 put (ASTORE x) = put1 58 x
368 put (ISTORE_ i) = putImm 59 i
369 put (LSTORE_ i) = putImm 63 i
370 put (FSTORE_ i) = putImm 67 i
371 put (DSTORE_ i) = putImm 71 i
372 put (ASTORE_ i) = putImm 75 i
373 put IASTORE = putByte 79
374 put LASTORE = putByte 80
375 put FASTORE = putByte 81
376 put DASTORE = putByte 82
377 put AASTORE = putByte 83
378 put BASTORE = putByte 84
379 put CASTORE = putByte 85
380 put SASTORE = putByte 86
382 put POP2 = putByte 88
384 put DUP_X1 = putByte 90
385 put DUP_X2 = putByte 91
386 put DUP2 = putByte 92
387 put DUP2_X1 = putByte 93
388 put DUP2_X2 = putByte 94
389 put SWAP = putByte 95
390 put IADD = putByte 96
391 put LADD = putByte 97
392 put FADD = putByte 98
393 put DADD = putByte 99
394 put ISUB = putByte 100
395 put LSUB = putByte 101
396 put FSUB = putByte 102
397 put DSUB = putByte 103
398 put IMUL = putByte 104
399 put LMUL = putByte 105
400 put FMUL = putByte 106
401 put DMUL = putByte 107
402 put IDIV = putByte 108
403 put LDIV = putByte 109
404 put FDIV = putByte 110
405 put DDIV = putByte 111
406 put IREM = putByte 112
407 put LREM = putByte 113
408 put FREM = putByte 114
409 put DREM = putByte 115
410 put INEG = putByte 116
411 put LNEG = putByte 117
412 put FNEG = putByte 118
413 put DNEG = putByte 119
414 put ISHL = putByte 120
415 put LSHL = putByte 121
416 put ISHR = putByte 122
417 put LSHR = putByte 123
418 put IUSHR = putByte 124
419 put LUSHR = putByte 125
420 put IAND = putByte 126
421 put LAND = putByte 127
422 put IOR = putByte 128
423 put LOR = putByte 129
424 put IXOR = putByte 130
425 put LXOR = putByte 131
426 put (IINC x y) = put2 132 x y
427 put I2L = putByte 133
428 put I2F = putByte 134
429 put I2D = putByte 135
430 put L2I = putByte 136
431 put L2F = putByte 137
432 put L2D = putByte 138
433 put F2I = putByte 139
434 put F2L = putByte 140
435 put F2D = putByte 141
436 put D2I = putByte 142
437 put D2L = putByte 143
438 put D2F = putByte 144
439 put I2B = putByte 145
440 put I2C = putByte 146
441 put I2S = putByte 147
442 put LCMP = putByte 148
443 put (FCMP C_LT) = putByte 149
444 put (FCMP C_GT) = putByte 150
445 put (FCMP c) = fail $ "No such instruction: FCMP " ++ show c
446 put (DCMP C_LT) = putByte 151
447 put (DCMP C_GT) = putByte 152
448 put (DCMP c) = fail $ "No such instruction: DCMP " ++ show c
449 put (IF c) = putByte (fromIntegral $ 153 + fromEnum c)
450 put (IF_ACMP C_EQ x) = put1 165 x
451 put (IF_ACMP C_NE x) = put1 166 x
452 put (IF_ACMP c _) = fail $ "No such instruction: IF_ACMP " ++ show c
453 put (IF_ICMP c x) = putByte (fromIntegral $ 159 + fromEnum c) >> put x
454 put GOTO = putByte 167
455 put (JSR x) = put1 168 x
456 put RET = putByte 169
457 put (TABLESWITCH def low high offs) = do
460 let pads = 4 - (offset `mod` 4)
461 replicateM (fromIntegral pads) (putByte 0)
465 put (LOOKUPSWITCH def n pairs) = do
468 let pads = 4 - (offset `mod` 4)
469 replicateM (fromIntegral pads) (putByte 0)
473 put IRETURN = putByte 172
474 put LRETURN = putByte 173
475 put FRETURN = putByte 174
476 put DRETURN = putByte 175
477 put RETURN = putByte 177
478 put (GETSTATIC x) = put1 178 x
479 put (PUTSTATIC x) = put1 179 x
480 put (GETFIELD x) = put1 180 x
481 put (PUTFIELD x) = put1 181 x
482 put (INVOKEVIRTUAL x) = put1 182 x
483 put (INVOKESPECIAL x) = put1 183 x
484 put (INVOKESTATIC x) = put1 184 x
485 put (INVOKEINTERFACE x c) = put2 185 x c >> putByte 0
486 put (NEW x) = put1 187 x
487 put (NEWARRAY x) = put1 188 x
488 put (ANEWARRAY x) = put1 189 x
489 put ARRAYLENGTH = putByte 190
490 put ATHROW = putByte 191
491 put (CHECKCAST x) = put1 192 x
492 put (INSTANCEOF x) = put1 193 x
493 put MONITORENTER = putByte 194
494 put MONITOREXIT = putByte 195
495 put (WIDE x inst) = put2 196 x inst
496 put (MULTINANEWARRAY x y) = put2 197 x y
497 put (IFNULL x) = put1 198 x
498 put (IFNONNULL x) = put1 199 x
499 put (GOTO_W x) = put1 200 x
500 put (JSR_W x) = put1 201 x
506 1 -> return ACONST_NULL
507 2 -> return ICONST_M1
515 10 -> return LCONST_1
516 11 -> return FCONST_0
517 12 -> return FCONST_1
518 13 -> return FCONST_2
519 14 -> return DCONST_0
520 15 -> return DCONST_1
597 132 -> IINC <$> get <*> get
614 149 -> return $ FCMP C_LT
615 150 -> return $ FCMP C_GT
616 151 -> return $ DCMP C_LT
617 152 -> return $ DCMP C_GT
618 165 -> IF_ACMP C_EQ <$> get
619 166 -> IF_ACMP C_NE <$> get
625 let pads = 4 - (offset `mod` 4)
626 skip (fromIntegral pads)
630 offs <- replicateM (fromIntegral $ high - low + 1) get
631 return $ TABLESWITCH def low high offs
634 let pads = 4 - (offset `mod` 4)
635 skip (fromIntegral pads)
638 pairs <- replicateM (fromIntegral n) get
639 return $ LOOKUPSWITCH def n pairs
640 172 -> return IRETURN
641 173 -> return LRETURN
642 174 -> return FRETURN
643 175 -> return DRETURN
645 178 -> GETSTATIC <$> get
646 179 -> PUTSTATIC <$> get
647 180 -> GETFIELD <$> get
648 181 -> PUTFIELD <$> get
649 182 -> INVOKEVIRTUAL <$> get
650 183 -> INVOKESPECIAL <$> get
651 184 -> INVOKESTATIC <$> get
652 185 -> (INVOKEINTERFACE <$> get <*> get) <* skip 1
654 188 -> NEWARRAY <$> get
655 189 -> ANEWARRAY <$> get
656 190 -> return ARRAYLENGTH
658 192 -> CHECKCAST <$> get
659 193 -> INSTANCEOF <$> get
660 194 -> return MONITORENTER
661 195 -> return MONITOREXIT
662 196 -> WIDE <$> get <*> get
663 197 -> MULTINANEWARRAY <$> get <*> get
664 198 -> IFNULL <$> get
665 199 -> IFNONNULL <$> get
666 200 -> GOTO_W <$> get
668 _ | inRange (59, 62) c -> imm 59 ISTORE_ c
669 | inRange (63, 66) c -> imm 63 LSTORE_ c
670 | inRange (67, 70) c -> imm 67 FSTORE_ c
671 | inRange (71, 74) c -> imm 71 DSTORE_ c
672 | inRange (75, 78) c -> imm 75 ASTORE_ c
673 | inRange (26, 29) c -> imm 26 ILOAD_ c
674 | inRange (30, 33) c -> imm 30 LLOAD_ c
675 | inRange (34, 37) c -> imm 34 FLOAD_ c
676 | inRange (38, 41) c -> imm 38 DLOAD_ c
677 | inRange (42, 45) c -> imm 42 ALOAD_ c
678 | inRange (153, 158) c -> return $ IF (toEnum $ fromIntegral $ c-153)
679 | inRange (159, 164) c -> IF_ICMP (toEnum $ fromIntegral $ c-159) <$> get
680 | otherwise -> fail $ "Unknown instruction byte code: " ++ show c