1 {-# LANGUAGE TypeFamilies, StandaloneDeriving, FlexibleInstances,
2 FlexibleContexts, UndecidableInstances, RecordWildCards, OverloadedStrings,
3 TypeSynonymInstances, MultiParamTypeClasses #-}
4 -- | This module declares data type for JVM instructions, and BinaryState
5 -- instances to read/write them.
21 import Control.Applicative
22 import Data.Ix (inRange)
24 import qualified Data.Binary as Binary
25 import qualified Data.ByteString.Lazy as B
27 import Data.BinaryState
30 -- | Immediate constant. Corresponding value will be added to base opcode.
36 deriving (Eq, Ord, Enum, Show)
38 -- | Comparation operation type. Not all CMP instructions support all operations.
46 deriving (Eq, Ord, Enum, Show)
48 -- | Format of Code method attribute.
50 codeStackSize :: Word16,
51 codeMaxLocals :: Word16,
53 codeInstructions :: [Instruction],
54 codeExceptionsN :: Word16,
55 codeExceptions :: [CodeException],
57 codeAttributes :: Attributes File }
60 -- | Exception descriptor
61 data CodeException = CodeException {
65 eCatchType :: Word16 }
68 instance BinaryState Integer CodeException where
69 put (CodeException {..}) = do
75 get = CodeException <$> get <*> get <*> get <*> get
77 instance BinaryState Integer Attribute where
79 let sz = 6 + attributeLength a -- full size of AttributeInfo structure
80 liftOffset (fromIntegral sz) Binary.put a
84 instance BinaryState Integer Code where
89 forM_ codeInstructions put
91 forM_ codeExceptions put
93 forM_ (attributesList codeAttributes) put
99 bytes <- replicateM (fromIntegral len) get
100 let bytecode = B.pack bytes
101 code = decodeWith readInstructions 0 bytecode
103 excs <- replicateM (fromIntegral excn) get
105 attrs <- replicateM (fromIntegral nAttrs) get
106 return $ Code stackSz locals len code excn excs nAttrs (AP attrs)
108 -- | Read sequence of instructions (to end of stream)
109 readInstructions :: GetState Integer [Instruction]
110 readInstructions = do
116 next <- readInstructions
119 -- | JVM instruction set. For comments, see JVM specification.
137 | BIPUSH Word8 -- ^ 16
138 | SIPUSH Word16 -- ^ 17
140 | LDC2 Word16 -- ^ 19
141 | LDC2W Word16 -- ^ 20
142 | ILOAD Word8 -- ^ 21
143 | LLOAD Word8 -- ^ 22
144 | FLOAD Word8 -- ^ 23
145 | DLOAD Word8 -- ^ 24
146 | ALOAD Word8 -- ^ 25
147 | ILOAD_ IMM -- ^ 26, 27, 28, 29
148 | LLOAD_ IMM -- ^ 30, 31, 32, 33
149 | FLOAD_ IMM -- ^ 34, 35, 36, 37
150 | DLOAD_ IMM -- ^ 38, 39, 40, 41
151 | ALOAD_ IMM -- ^ 42, 43, 44, 45
160 | ISTORE Word8 -- ^ 54
161 | LSTORE Word8 -- ^ 55
162 | FSTORE Word8 -- ^ 56
163 | DSTORE Word8 -- ^ 57
164 | ASTORE Word8 -- ^ 58
165 | ISTORE_ IMM -- ^ 59, 60, 61, 62
166 | LSTORE_ IMM -- ^ 63, 64, 65, 66
167 | FSTORE_ IMM -- ^ 67, 68, 69, 70
168 | DSTORE_ IMM -- ^ 71, 72, 73, 74
169 | ASTORE_ IMM -- ^ 75, 76, 77, 78
223 | IINC Word8 Word8 -- ^ 132
240 | FCMP CMP -- ^ 149, 150
241 | DCMP CMP -- ^ 151, 152
242 | IF CMP Word16 -- ^ 153, 154, 155, 156, 157, 158
243 | IF_ICMP CMP Word16 -- ^ 159, 160, 161, 162, 163, 164
244 | IF_ACMP CMP Word16 -- ^ 165, 166
245 | GOTO Word16 -- ^ 167
246 | JSR Word16 -- ^ 168
248 | TABLESWITCH Word32 Word32 Word32 [Word32] -- ^ 170
249 | LOOKUPSWITCH Word32 Word32 [(Word32, Word32)] -- ^ 171
256 | GETSTATIC Word16 -- ^ 178
257 | PUTSTATIC Word16 -- ^ 179
258 | GETFIELD Word16 -- ^ 180
259 | PUTFIELD Word16 -- ^ 181
260 | INVOKEVIRTUAL Word16 -- ^ 182
261 | INVOKESPECIAL Word16 -- ^ 183
262 | INVOKESTATIC Word16 -- ^ 184
263 | INVOKEINTERFACE Word16 Word8 -- ^ 185
264 | NEW Word16 -- ^ 187
265 | NEWARRAY Word8 -- ^ 188, see @ArrayType@
266 | ANEWARRAY Word16 -- ^ 189
267 | ARRAYLENGTH -- ^ 190
269 | CHECKCAST Word16 -- ^ 192
270 | INSTANCEOF Word16 -- ^ 193
271 | MONITORENTER -- ^ 194
272 | MONITOREXIT -- ^ 195
273 | WIDE Word8 Instruction -- ^ 196
274 | MULTINANEWARRAY Word16 Word8 -- ^ 197
275 | IFNULL Word16 -- ^ 198
276 | IFNONNULL Word16 -- ^ 199
277 | GOTO_W Word32 -- ^ 200
278 | JSR_W Word32 -- ^ 201
281 -- | JVM array type (primitive types)
291 deriving (Eq, Show, Enum)
293 -- | Parse opcode with immediate constant
294 imm :: Word8 -- ^ Base opcode
295 -> (IMM -> Instruction) -- ^ Instruction constructor
296 -> Word8 -- ^ Opcode to parse
297 -> GetState s Instruction
298 imm base constr x = return $ constr $ toEnum $ fromIntegral (x-base)
300 -- | Put opcode with immediate constant
301 putImm :: Word8 -- ^ Base opcode
302 -> IMM -- ^ Constant to add to opcode
303 -> PutState Integer ()
304 putImm base i = putByte $ base + (fromIntegral $ fromEnum i)
306 atype2byte :: ArrayType -> Word8
307 atype2byte T_BOOLEAN = 4
308 atype2byte T_CHAR = 5
309 atype2byte T_FLOAT = 6
310 atype2byte T_DOUBLE = 7
311 atype2byte T_BYTE = 8
312 atype2byte T_SHORT = 9
313 atype2byte T_INT = 10
314 atype2byte T_LONG = 11
316 byte2atype :: Word8 -> GetState s ArrayType
317 byte2atype 4 = return T_BOOLEAN
318 byte2atype 5 = return T_CHAR
319 byte2atype 6 = return T_FLOAT
320 byte2atype 7 = return T_DOUBLE
321 byte2atype 8 = return T_BYTE
322 byte2atype 9 = return T_SHORT
323 byte2atype 10 = return T_INT
324 byte2atype 11 = return T_LONG
325 byte2atype x = fail $ "Unknown array type byte: " ++ show x
327 instance BinaryState Integer ArrayType where
332 put t = putByte (atype2byte t)
334 -- | Put opcode with one argument
335 put1 :: (BinaryState Integer a)
337 -> a -- ^ First argument
338 -> PutState Integer ()
343 put2 :: (BinaryState Integer a, BinaryState Integer b)
345 -> a -- ^ First argument
346 -> b -- ^ Second argument
347 -> PutState Integer ()
353 instance BinaryState Integer Instruction where
355 put ACONST_NULL = putByte 1
356 put ICONST_M1 = putByte 2
357 put ICONST_0 = putByte 3
358 put ICONST_1 = putByte 4
359 put ICONST_2 = putByte 5
360 put ICONST_3 = putByte 6
361 put ICONST_4 = putByte 7
362 put ICONST_5 = putByte 8
363 put LCONST_0 = putByte 9
364 put LCONST_1 = putByte 10
365 put FCONST_0 = putByte 11
366 put FCONST_1 = putByte 12
367 put FCONST_2 = putByte 13
368 put DCONST_0 = putByte 14
369 put DCONST_1 = putByte 15
370 put (BIPUSH x) = put1 16 x
371 put (SIPUSH x) = put1 17 x
372 put (LDC1 x) = put1 18 x
373 put (LDC2 x) = put1 19 x
374 put (LDC2W x) = put1 20 x
375 put (ILOAD x) = put1 21 x
376 put (LLOAD x) = put1 22 x
377 put (FLOAD x) = put1 23 x
378 put (DLOAD x) = put1 24 x
379 put (ALOAD x) = put1 25 x
380 put (ILOAD_ i) = putImm 26 i
381 put (LLOAD_ i) = putImm 30 i
382 put (FLOAD_ i) = putImm 34 i
383 put (DLOAD_ i) = putImm 38 i
384 put (ALOAD_ i) = putImm 42 i
385 put IALOAD = putByte 46
386 put LALOAD = putByte 47
387 put FALOAD = putByte 48
388 put DALOAD = putByte 49
389 put AALOAD = putByte 50
390 put BALOAD = putByte 51
391 put CALOAD = putByte 52
392 put SALOAD = putByte 53
393 put (ISTORE x) = put1 54 x
394 put (LSTORE x) = put1 55 x
395 put (FSTORE x) = put1 56 x
396 put (DSTORE x) = put1 57 x
397 put (ASTORE x) = put1 58 x
398 put (ISTORE_ i) = putImm 59 i
399 put (LSTORE_ i) = putImm 63 i
400 put (FSTORE_ i) = putImm 67 i
401 put (DSTORE_ i) = putImm 71 i
402 put (ASTORE_ i) = putImm 75 i
403 put IASTORE = putByte 79
404 put LASTORE = putByte 80
405 put FASTORE = putByte 81
406 put DASTORE = putByte 82
407 put AASTORE = putByte 83
408 put BASTORE = putByte 84
409 put CASTORE = putByte 85
410 put SASTORE = putByte 86
412 put POP2 = putByte 88
414 put DUP_X1 = putByte 90
415 put DUP_X2 = putByte 91
416 put DUP2 = putByte 92
417 put DUP2_X1 = putByte 93
418 put DUP2_X2 = putByte 94
419 put SWAP = putByte 95
420 put IADD = putByte 96
421 put LADD = putByte 97
422 put FADD = putByte 98
423 put DADD = putByte 99
424 put ISUB = putByte 100
425 put LSUB = putByte 101
426 put FSUB = putByte 102
427 put DSUB = putByte 103
428 put IMUL = putByte 104
429 put LMUL = putByte 105
430 put FMUL = putByte 106
431 put DMUL = putByte 107
432 put IDIV = putByte 108
433 put LDIV = putByte 109
434 put FDIV = putByte 110
435 put DDIV = putByte 111
436 put IREM = putByte 112
437 put LREM = putByte 113
438 put FREM = putByte 114
439 put DREM = putByte 115
440 put INEG = putByte 116
441 put LNEG = putByte 117
442 put FNEG = putByte 118
443 put DNEG = putByte 119
444 put ISHL = putByte 120
445 put LSHL = putByte 121
446 put ISHR = putByte 122
447 put LSHR = putByte 123
448 put IUSHR = putByte 124
449 put LUSHR = putByte 125
450 put IAND = putByte 126
451 put LAND = putByte 127
452 put IOR = putByte 128
453 put LOR = putByte 129
454 put IXOR = putByte 130
455 put LXOR = putByte 131
456 put (IINC x y) = put2 132 x y
457 put I2L = putByte 133
458 put I2F = putByte 134
459 put I2D = putByte 135
460 put L2I = putByte 136
461 put L2F = putByte 137
462 put L2D = putByte 138
463 put F2I = putByte 139
464 put F2L = putByte 140
465 put F2D = putByte 141
466 put D2I = putByte 142
467 put D2L = putByte 143
468 put D2F = putByte 144
469 put I2B = putByte 145
470 put I2C = putByte 146
471 put I2S = putByte 147
472 put LCMP = putByte 148
473 put (FCMP C_LT) = putByte 149
474 put (FCMP C_GT) = putByte 150
475 put (FCMP c) = fail $ "No such instruction: FCMP " ++ show c
476 put (DCMP C_LT) = putByte 151
477 put (DCMP C_GT) = putByte 152
478 put (DCMP c) = fail $ "No such instruction: DCMP " ++ show c
479 put (IF c x) = putByte (fromIntegral $ 153 + fromEnum c) >> put x
480 put (IF_ACMP C_EQ x) = put1 165 x
481 put (IF_ACMP C_NE x) = put1 166 x
482 put (IF_ACMP c _) = fail $ "No such instruction: IF_ACMP " ++ show c
483 put (IF_ICMP c x) = putByte (fromIntegral $ 159 + fromEnum c) >> put x
484 put (GOTO x) = put1 167 x
485 put (JSR x) = put1 168 x
486 put RET = putByte 169
487 put (TABLESWITCH def low high offs) = do
490 let pads = 4 - (offset `mod` 4)
491 replicateM (fromIntegral pads) (putByte 0)
495 put (LOOKUPSWITCH def n pairs) = do
498 let pads = 4 - (offset `mod` 4)
499 replicateM (fromIntegral pads) (putByte 0)
503 put IRETURN = putByte 172
504 put LRETURN = putByte 173
505 put FRETURN = putByte 174
506 put DRETURN = putByte 175
507 put ARETURN = putByte 176
508 put RETURN = putByte 177
509 put (GETSTATIC x) = put1 178 x
510 put (PUTSTATIC x) = put1 179 x
511 put (GETFIELD x) = put1 180 x
512 put (PUTFIELD x) = put1 181 x
513 put (INVOKEVIRTUAL x) = put1 182 x
514 put (INVOKESPECIAL x) = put1 183 x
515 put (INVOKESTATIC x) = put1 184 x
516 put (INVOKEINTERFACE x c) = put2 185 x c >> putByte 0
517 put (NEW x) = put1 187 x
518 put (NEWARRAY x) = put1 188 x
519 put (ANEWARRAY x) = put1 189 x
520 put ARRAYLENGTH = putByte 190
521 put ATHROW = putByte 191
522 put (CHECKCAST x) = put1 192 x
523 put (INSTANCEOF x) = put1 193 x
524 put MONITORENTER = putByte 194
525 put MONITOREXIT = putByte 195
526 put (WIDE x inst) = put2 196 x inst
527 put (MULTINANEWARRAY x y) = put2 197 x y
528 put (IFNULL x) = put1 198 x
529 put (IFNONNULL x) = put1 199 x
530 put (GOTO_W x) = put1 200 x
531 put (JSR_W x) = put1 201 x
537 1 -> return ACONST_NULL
538 2 -> return ICONST_M1
546 10 -> return LCONST_1
547 11 -> return FCONST_0
548 12 -> return FCONST_1
549 13 -> return FCONST_2
550 14 -> return DCONST_0
551 15 -> return DCONST_1
628 132 -> IINC <$> get <*> get
645 149 -> return $ FCMP C_LT
646 150 -> return $ FCMP C_GT
647 151 -> return $ DCMP C_LT
648 152 -> return $ DCMP C_GT
649 165 -> IF_ACMP C_EQ <$> get
650 166 -> IF_ACMP C_NE <$> get
656 let pads = 4 - (offset `mod` 4)
657 skip (fromIntegral pads)
661 offs <- replicateM (fromIntegral $ high - low + 1) get
662 return $ TABLESWITCH def low high offs
665 let pads = 4 - (offset `mod` 4)
666 skip (fromIntegral pads)
669 pairs <- replicateM (fromIntegral n) get
670 return $ LOOKUPSWITCH def n pairs
671 172 -> return IRETURN
672 173 -> return LRETURN
673 174 -> return FRETURN
674 175 -> return DRETURN
675 176 -> return ARETURN
677 178 -> GETSTATIC <$> get
678 179 -> PUTSTATIC <$> get
679 180 -> GETFIELD <$> get
680 181 -> PUTFIELD <$> get
681 182 -> INVOKEVIRTUAL <$> get
682 183 -> INVOKESPECIAL <$> get
683 184 -> INVOKESTATIC <$> get
684 185 -> (INVOKEINTERFACE <$> get <*> get) <* skip 1
686 188 -> NEWARRAY <$> get
687 189 -> ANEWARRAY <$> get
688 190 -> return ARRAYLENGTH
690 192 -> CHECKCAST <$> get
691 193 -> INSTANCEOF <$> get
692 194 -> return MONITORENTER
693 195 -> return MONITOREXIT
694 196 -> WIDE <$> get <*> get
695 197 -> MULTINANEWARRAY <$> get <*> get
696 198 -> IFNULL <$> get
697 199 -> IFNONNULL <$> get
698 200 -> GOTO_W <$> get
700 _ | inRange (59, 62) c -> imm 59 ISTORE_ c
701 | inRange (63, 66) c -> imm 63 LSTORE_ c
702 | inRange (67, 70) c -> imm 67 FSTORE_ c
703 | inRange (71, 74) c -> imm 71 DSTORE_ c
704 | inRange (75, 78) c -> imm 75 ASTORE_ c
705 | inRange (26, 29) c -> imm 26 ILOAD_ c
706 | inRange (30, 33) c -> imm 30 LLOAD_ c
707 | inRange (34, 37) c -> imm 34 FLOAD_ c
708 | inRange (38, 41) c -> imm 38 DLOAD_ c
709 | inRange (42, 45) c -> imm 42 ALOAD_ c
710 | inRange (153, 158) c -> IF (toEnum $ fromIntegral $ c-153) <$> get
711 | inRange (159, 164) c -> IF_ICMP (toEnum $ fromIntegral $ c-159) <$> get
712 | otherwise -> fail $ "Unknown instruction byte code: " ++ show c
714 -- | Encode list of instructions
715 encodeInstructions :: [Instruction] -> B.ByteString
716 encodeInstructions code =
717 let p list = forM_ list put
718 in encodeWith p (0 :: Integer) code
720 -- | Decode Java method
721 decodeMethod :: B.ByteString -> Code
722 decodeMethod str = decodeS (0 :: Integer) str
724 -- | Encode Java method
725 encodeMethod :: Code -> B.ByteString
726 encodeMethod code = encodeS (0 :: Integer) code