1 {-# LANGUAGE TypeFamilies, StandaloneDeriving, FlexibleInstances,
2 FlexibleContexts, UndecidableInstances, RecordWildCards, OverloadedStrings,
3 TypeSynonymInstances, MultiParamTypeClasses #-}
4 -- | This module declares data type for JVM instructions, and BinaryState
5 -- instances to read/write them.
16 import Control.Applicative
18 import qualified Data.Binary as Binary
19 import qualified Data.ByteString.Lazy as B
22 import Data.BinaryState
25 -- | Immediate constant. Corresponding value will be added to base opcode.
31 deriving (Eq, Ord, Enum, Show)
33 -- | Comparation operation type. Not all CMP instructions support all operations.
41 deriving (Eq, Ord, Enum, Show)
43 -- | Format of Code method attribute.
45 codeStackSize :: Word16,
46 codeMaxLocals :: Word16,
48 codeInstructions :: [Instruction],
49 codeExceptionsN :: Word16,
50 codeExceptions :: [CodeException],
52 codeAttributes :: [AttributeInfo] }
55 -- | Exception descriptor
56 data CodeException = CodeException {
60 eCatchType :: Word16 }
63 instance BinaryState Integer CodeException where
64 put (CodeException {..}) = do
70 get = CodeException <$> get <*> get <*> get <*> get
72 instance BinaryState Integer AttributeInfo where
74 let sz = 6 + attributeLength a -- full size of AttributeInfo structure
75 liftOffset (fromIntegral sz) Binary.put a
79 instance BinaryState Integer Code where
84 forM_ codeInstructions put
86 forM_ codeExceptions put
88 forM_ codeAttributes put
94 bytes <- replicateM (fromIntegral len) get
95 let bytecode = B.pack bytes
96 code = decodeWith readInstructions 0 bytecode
98 excs <- replicateM (fromIntegral excn) get
100 attrs <- replicateM (fromIntegral nAttrs) get
101 return $ Code stackSz locals len code excn excs nAttrs attrs
103 -- | Read sequence of instructions (to end of stream)
104 readInstructions :: GetState Integer [Instruction]
105 readInstructions = do
111 next <- readInstructions
114 -- | JVM instruction set
132 | BIPUSH Word8 -- ^ 16
133 | SIPUSH Word16 -- ^ 17
135 | LDC2 Word16 -- ^ 19
136 | LDC2W Word16 -- ^ 20
137 | ILOAD Word8 -- ^ 21
138 | LLOAD Word8 -- ^ 22
139 | FLOAD Word8 -- ^ 23
140 | DLOAD Word8 -- ^ 24
141 | ALOAD Word8 -- ^ 25
142 | ILOAD_ IMM -- ^ 26, 27, 28, 29
143 | LLOAD_ IMM -- ^ 30, 31, 32, 33
144 | FLOAD_ IMM -- ^ 34, 35, 36, 37
145 | DLOAD_ IMM -- ^ 38, 39, 40, 41
146 | ALOAD_ IMM -- ^ 42, 43, 44, 45
155 | ISTORE Word8 -- ^ 54
156 | LSTORE Word8 -- ^ 55
157 | FSTORE Word8 -- ^ 56
158 | DSTORE Word8 -- ^ 57
159 | ASTORE Word8 -- ^ 58
160 | ISTORE_ IMM -- ^ 59, 60, 61, 62
161 | LSTORE_ IMM -- ^ 63, 64, 65, 66
162 | FSTORE_ IMM -- ^ 67, 68, 69, 70
163 | DSTORE_ IMM -- ^ 71, 72, 73, 74
164 | ASTORE_ IMM -- ^ 75, 76, 77, 78
218 | IINC Word8 Word8 -- ^ 132
235 | FCMP CMP -- ^ 149, 150
236 | DCMP CMP -- ^ 151, 152
237 | IF CMP -- ^ 153, 154, 155, 156, 157, 158
238 | IF_ICMP CMP Word16 -- ^ 159, 160, 161, 162, 163, 164
239 | IF_ACMP CMP Word16 -- ^ 165, 166
241 | JSR Word16 -- ^ 168
243 | TABLESWITCH Word32 Word32 Word32 [Word32] -- ^ 170
244 | LOOKUPSWITCH Word32 Word32 [(Word32, Word32)] -- ^ 171
250 | GETSTATIC Word16 -- ^ 178
251 | PUTSTATIC Word16 -- ^ 179
252 | GETFIELD Word16 -- ^ 180
253 | PUTFIELD Word16 -- ^ 181
254 | INVOKEVIRTUAL Word16 -- ^ 182
255 | INVOKESPECIAL Word16 -- ^ 183
256 | INVOKESTATIC Word16 -- ^ 184
257 | INVOKEINTERFACE Word16 Word8 -- ^ 185
258 | NEW Word16 -- ^ 187
259 | NEWARRAY Word8 -- ^ 188, see @ArrayType@
260 | ANEWARRAY Word16 -- ^ 189
261 | ARRAYLENGTH -- ^ 190
263 | CHECKCAST Word16 -- ^ 192
264 | INSTANCEOF Word16 -- ^ 193
265 | MONITORENTER -- ^ 194
266 | MONITOREXIT -- ^ 195
267 | WIDE Word8 Instruction -- ^ 196
268 | MULTINANEWARRAY Word16 Word8 -- ^ 197
269 | IFNULL Word16 -- ^ 198
270 | IFNONNULL Word16 -- ^ 199
271 | GOTO_W Word32 -- ^ 200
272 | JSR_W Word32 -- ^ 201
275 -- ^ JVM array type (primitive types)
285 deriving (Eq, Show, Enum)
287 -- ^ Parse opcode with immediate constant
288 imm :: Word8 -- ^ Base opcode
289 -> (IMM -> Instruction) -- ^ Instruction constructor
290 -> Word8 -- ^ Opcode to parse
291 -> GetState s Instruction
292 imm base constr x = return $ constr $ toEnum $ fromIntegral (x-base)
294 -- ^ Put opcode with immediate constant
295 putImm :: Word8 -- ^ Base opcode
296 -> IMM -- ^ Constant to add to opcode
297 -> PutState Integer ()
298 putImm base i = putByte $ base + (fromIntegral $ fromEnum i)
300 atype2byte :: ArrayType -> Word8
301 atype2byte T_BOOLEAN = 4
302 atype2byte T_CHAR = 5
303 atype2byte T_FLOAT = 6
304 atype2byte T_DOUBLE = 7
305 atype2byte T_BYTE = 8
306 atype2byte T_SHORT = 9
307 atype2byte T_INT = 10
308 atype2byte T_LONG = 11
310 byte2atype :: Word8 -> GetState s ArrayType
311 byte2atype 4 = return T_BOOLEAN
312 byte2atype 5 = return T_CHAR
313 byte2atype 6 = return T_FLOAT
314 byte2atype 7 = return T_DOUBLE
315 byte2atype 8 = return T_BYTE
316 byte2atype 9 = return T_SHORT
317 byte2atype 10 = return T_INT
318 byte2atype 11 = return T_LONG
319 byte2atype x = fail $ "Unknown array type byte: " ++ show x
321 instance BinaryState Integer ArrayType where
326 put t = putByte (atype2byte t)
328 -- ^ Put opcode with one argument
329 put1 :: (BinaryState Integer a)
331 -> a -- ^ First argument
332 -> PutState Integer ()
337 put2 :: (BinaryState Integer a, BinaryState Integer b)
339 -> a -- ^ First argument
340 -> b -- ^ Second argument
341 -> PutState Integer ()
347 instance BinaryState Integer Instruction where
349 put ACONST_NULL = putByte 1
350 put ICONST_M1 = putByte 2
351 put ICONST_0 = putByte 3
352 put ICONST_1 = putByte 4
353 put ICONST_2 = putByte 5
354 put ICONST_3 = putByte 6
355 put ICONST_4 = putByte 7
356 put ICONST_5 = putByte 8
357 put LCONST_0 = putByte 9
358 put LCONST_1 = putByte 10
359 put FCONST_0 = putByte 11
360 put FCONST_1 = putByte 12
361 put FCONST_2 = putByte 13
362 put DCONST_0 = putByte 14
363 put DCONST_1 = putByte 15
364 put (BIPUSH x) = put1 16 x
365 put (SIPUSH x) = put1 17 x
366 put (LDC1 x) = put1 18 x
367 put (LDC2 x) = put1 19 x
368 put (LDC2W x) = put1 20 x
369 put (ILOAD x) = put1 21 x
370 put (LLOAD x) = put1 22 x
371 put (FLOAD x) = put1 23 x
372 put (DLOAD x) = put1 24 x
373 put (ALOAD x) = put1 25 x
374 put (ILOAD_ i) = putImm 26 i
375 put (LLOAD_ i) = putImm 30 i
376 put (FLOAD_ i) = putImm 34 i
377 put (DLOAD_ i) = putImm 38 i
378 put (ALOAD_ i) = putImm 42 i
379 put IALOAD = putByte 46
380 put LALOAD = putByte 47
381 put FALOAD = putByte 48
382 put DALOAD = putByte 49
383 put AALOAD = putByte 50
384 put BALOAD = putByte 51
385 put CALOAD = putByte 52
386 put SALOAD = putByte 53
387 put (ISTORE x) = put1 54 x
388 put (LSTORE x) = put1 55 x
389 put (FSTORE x) = put1 56 x
390 put (DSTORE x) = put1 57 x
391 put (ASTORE x) = put1 58 x
392 put (ISTORE_ i) = putImm 59 i
393 put (LSTORE_ i) = putImm 63 i
394 put (FSTORE_ i) = putImm 67 i
395 put (DSTORE_ i) = putImm 71 i
396 put (ASTORE_ i) = putImm 75 i
397 put IASTORE = putByte 79
398 put LASTORE = putByte 80
399 put FASTORE = putByte 81
400 put DASTORE = putByte 82
401 put AASTORE = putByte 83
402 put BASTORE = putByte 84
403 put CASTORE = putByte 85
404 put SASTORE = putByte 86
406 put POP2 = putByte 88
408 put DUP_X1 = putByte 90
409 put DUP_X2 = putByte 91
410 put DUP2 = putByte 92
411 put DUP2_X1 = putByte 93
412 put DUP2_X2 = putByte 94
413 put SWAP = putByte 95
414 put IADD = putByte 96
415 put LADD = putByte 97
416 put FADD = putByte 98
417 put DADD = putByte 99
418 put ISUB = putByte 100
419 put LSUB = putByte 101
420 put FSUB = putByte 102
421 put DSUB = putByte 103
422 put IMUL = putByte 104
423 put LMUL = putByte 105
424 put FMUL = putByte 106
425 put DMUL = putByte 107
426 put IDIV = putByte 108
427 put LDIV = putByte 109
428 put FDIV = putByte 110
429 put DDIV = putByte 111
430 put IREM = putByte 112
431 put LREM = putByte 113
432 put FREM = putByte 114
433 put DREM = putByte 115
434 put INEG = putByte 116
435 put LNEG = putByte 117
436 put FNEG = putByte 118
437 put DNEG = putByte 119
438 put ISHL = putByte 120
439 put LSHL = putByte 121
440 put ISHR = putByte 122
441 put LSHR = putByte 123
442 put IUSHR = putByte 124
443 put LUSHR = putByte 125
444 put IAND = putByte 126
445 put LAND = putByte 127
446 put IOR = putByte 128
447 put LOR = putByte 129
448 put IXOR = putByte 130
449 put LXOR = putByte 131
450 put (IINC x y) = put2 132 x y
451 put I2L = putByte 133
452 put I2F = putByte 134
453 put I2D = putByte 135
454 put L2I = putByte 136
455 put L2F = putByte 137
456 put L2D = putByte 138
457 put F2I = putByte 139
458 put F2L = putByte 140
459 put F2D = putByte 141
460 put D2I = putByte 142
461 put D2L = putByte 143
462 put D2F = putByte 144
463 put I2B = putByte 145
464 put I2C = putByte 146
465 put I2S = putByte 147
466 put LCMP = putByte 148
467 put (FCMP C_LT) = putByte 149
468 put (FCMP C_GT) = putByte 150
469 put (FCMP c) = fail $ "No such instruction: FCMP " ++ show c
470 put (DCMP C_LT) = putByte 151
471 put (DCMP C_GT) = putByte 152
472 put (DCMP c) = fail $ "No such instruction: DCMP " ++ show c
473 put (IF c) = putByte (fromIntegral $ 153 + fromEnum c)
474 put (IF_ACMP C_EQ x) = put1 165 x
475 put (IF_ACMP C_NE x) = put1 166 x
476 put (IF_ACMP c _) = fail $ "No such instruction: IF_ACMP " ++ show c
477 put (IF_ICMP c x) = putByte (fromIntegral $ 159 + fromEnum c) >> put x
478 put GOTO = putByte 167
479 put (JSR x) = put1 168 x
480 put RET = putByte 169
481 put (TABLESWITCH def low high offs) = do
484 let pads = 4 - (offset `mod` 4)
485 replicateM (fromIntegral pads) (putByte 0)
489 put (LOOKUPSWITCH def n pairs) = do
492 let pads = 4 - (offset `mod` 4)
493 replicateM (fromIntegral pads) (putByte 0)
497 put IRETURN = putByte 172
498 put LRETURN = putByte 173
499 put FRETURN = putByte 174
500 put DRETURN = putByte 175
501 put RETURN = putByte 177
502 put (GETSTATIC x) = put1 178 x
503 put (PUTSTATIC x) = put1 179 x
504 put (GETFIELD x) = put1 180 x
505 put (PUTFIELD x) = put1 181 x
506 put (INVOKEVIRTUAL x) = put1 182 x
507 put (INVOKESPECIAL x) = put1 183 x
508 put (INVOKESTATIC x) = put1 184 x
509 put (INVOKEINTERFACE x c) = put2 185 x c >> putByte 0
510 put (NEW x) = put1 187 x
511 put (NEWARRAY x) = put1 188 x
512 put (ANEWARRAY x) = put1 189 x
513 put ARRAYLENGTH = putByte 190
514 put ATHROW = putByte 191
515 put (CHECKCAST x) = put1 192 x
516 put (INSTANCEOF x) = put1 193 x
517 put MONITORENTER = putByte 194
518 put MONITOREXIT = putByte 195
519 put (WIDE x inst) = put2 196 x inst
520 put (MULTINANEWARRAY x y) = put2 197 x y
521 put (IFNULL x) = put1 198 x
522 put (IFNONNULL x) = put1 199 x
523 put (GOTO_W x) = put1 200 x
524 put (JSR_W x) = put1 201 x
530 1 -> return ACONST_NULL
531 2 -> return ICONST_M1
539 10 -> return LCONST_1
540 11 -> return FCONST_0
541 12 -> return FCONST_1
542 13 -> return FCONST_2
543 14 -> return DCONST_0
544 15 -> return DCONST_1
621 132 -> IINC <$> get <*> get
638 149 -> return $ FCMP C_LT
639 150 -> return $ FCMP C_GT
640 151 -> return $ DCMP C_LT
641 152 -> return $ DCMP C_GT
642 165 -> IF_ACMP C_EQ <$> get
643 166 -> IF_ACMP C_NE <$> get
649 let pads = 4 - (offset `mod` 4)
650 skip (fromIntegral pads)
654 offs <- replicateM (fromIntegral $ high - low + 1) get
655 return $ TABLESWITCH def low high offs
658 let pads = 4 - (offset `mod` 4)
659 skip (fromIntegral pads)
662 pairs <- replicateM (fromIntegral n) get
663 return $ LOOKUPSWITCH def n pairs
664 172 -> return IRETURN
665 173 -> return LRETURN
666 174 -> return FRETURN
667 175 -> return DRETURN
669 178 -> GETSTATIC <$> get
670 179 -> PUTSTATIC <$> get
671 180 -> GETFIELD <$> get
672 181 -> PUTFIELD <$> get
673 182 -> INVOKEVIRTUAL <$> get
674 183 -> INVOKESPECIAL <$> get
675 184 -> INVOKESTATIC <$> get
676 185 -> (INVOKEINTERFACE <$> get <*> get) <* skip 1
678 188 -> NEWARRAY <$> get
679 189 -> ANEWARRAY <$> get
680 190 -> return ARRAYLENGTH
682 192 -> CHECKCAST <$> get
683 193 -> INSTANCEOF <$> get
684 194 -> return MONITORENTER
685 195 -> return MONITOREXIT
686 196 -> WIDE <$> get <*> get
687 197 -> MULTINANEWARRAY <$> get <*> get
688 198 -> IFNULL <$> get
689 199 -> IFNONNULL <$> get
690 200 -> GOTO_W <$> get
692 _ | inRange (59, 62) c -> imm 59 ISTORE_ c
693 | inRange (63, 66) c -> imm 63 LSTORE_ c
694 | inRange (67, 70) c -> imm 67 FSTORE_ c
695 | inRange (71, 74) c -> imm 71 DSTORE_ c
696 | inRange (75, 78) c -> imm 75 ASTORE_ c
697 | inRange (26, 29) c -> imm 26 ILOAD_ c
698 | inRange (30, 33) c -> imm 30 LLOAD_ c
699 | inRange (34, 37) c -> imm 34 FLOAD_ c
700 | inRange (38, 41) c -> imm 38 DLOAD_ c
701 | inRange (42, 45) c -> imm 42 ALOAD_ c
702 | inRange (153, 158) c -> return $ IF (toEnum $ fromIntegral $ c-153)
703 | inRange (159, 164) c -> IF_ICMP (toEnum $ fromIntegral $ c-159) <$> get
704 | otherwise -> fail $ "Unknown instruction byte code: " ++ show c