1 {-# LANGUAGE TypeFamilies, StandaloneDeriving, FlexibleInstances, FlexibleContexts, UndecidableInstances, RecordWildCards, OverloadedStrings, TypeSynonymInstances, MultiParamTypeClasses #-}
2 module JVM.Assembler where
5 import Control.Applicative
6 import Data.Ix (inRange)
10 import qualified Data.Binary as Binary
11 import qualified Data.Binary.Get as Get
14 import qualified Data.ByteString.Lazy as B
16 import qualified Data.Set as S
17 import qualified Data.Map as M
19 import Data.BinaryState
27 deriving (Eq, Ord, Enum, Show)
36 deriving (Eq, Ord, Enum, Show)
38 newtype Code = Code [Instruction]
41 instance BinaryState Integer Code where
42 put (Code list) = forM_ list put
51 return $ Code (x: next)
80 | ILOAD_ IMM -- 26, 27, 28, 29
81 | LLOAD_ IMM -- 30, 31, 32, 33
82 | FLOAD_ IMM -- 34, 35, 36, 37
83 | DLOAD_ IMM -- 38, 39, 40, 41
84 | ALOAD_ IMM -- 42, 43, 44, 45
98 | ISTORE_ IMM -- 59, 60, 61, 62
99 | LSTORE_ IMM -- 63, 64, 65, 66
100 | FSTORE_ IMM -- 67, 68, 69, 70
101 | DSTORE_ IMM -- 71, 72, 73, 74
102 | ASTORE_ IMM -- 75, 76, 77, 78
156 | IINC Word8 Word8 -- 132
173 | FCMP CMP -- 149, 150
174 | DCMP CMP -- 151, 152
175 | IF CMP -- 153, 154, 155, 156, 157, 158
176 | IF_ACMP CMP Word16 -- 165, 166
177 | IF_ICMP CMP Word16 -- 159, 160, 161, 162, 163, 164
181 | TABLESWITCH Word32 Word32 Word32 [Word32] -- 170
182 | LOOKUPSWITCH Word32 Word32 [(Word32, Word32)] -- 171
188 | GETSTATIC Word16 -- 178
189 | PUTSTATIC Word16 -- 179
190 | GETFIELD Word16 -- 180
191 | PUTFIELD Word16 -- 181
192 | INVOKEVIRTUAL Word16 -- 182
193 | INVOKESPECIAL Word16 -- 183
194 | INVOKESTATIC Word16 -- 184
195 | INVOKEINTERFACE Word16 Word8 -- 185
197 | NEWARRAY Word8 -- 188, see ArrayType
198 | ANEWARRAY Word16 -- 189
201 | CHECKCAST Word16 -- 192
202 | INSTANCEOF Word16 -- 193
203 | MONITORENTER -- 194
205 | WIDE Word8 Instruction -- 196
206 | MULTINANEWARRAY Word16 Word8 -- 197
207 | IFNULL Word16 -- 198
208 | IFNONNULL Word16 -- 199
209 | GOTO_W Word32 -- 200
210 | JSR_W Word32 -- 201
222 deriving (Eq, Show, Enum)
224 imm :: Word8 -> (IMM -> Instruction) -> Word8 -> GetState s Instruction
225 imm base constr x = return $ constr $ toEnum $ fromIntegral (x-base)
227 putImm :: Word8 -> IMM -> PutState Integer ()
228 putImm base i = putByte $ base + (fromIntegral $ fromEnum i)
230 atype2byte :: ArrayType -> Word8
231 atype2byte T_BOOLEAN = 4
232 atype2byte T_CHAR = 5
233 atype2byte T_FLOAT = 6
234 atype2byte T_DOUBLE = 7
235 atype2byte T_BYTE = 8
236 atype2byte T_SHORT = 9
237 atype2byte T_INT = 10
238 atype2byte T_LONG = 11
240 byte2atype :: Word8 -> GetState s ArrayType
241 byte2atype 4 = return T_BOOLEAN
242 byte2atype 5 = return T_CHAR
243 byte2atype 6 = return T_FLOAT
244 byte2atype 7 = return T_DOUBLE
245 byte2atype 8 = return T_BYTE
246 byte2atype 9 = return T_SHORT
247 byte2atype 10 = return T_INT
248 byte2atype 11 = return T_LONG
249 byte2atype x = fail $ "Unknown array type byte: " ++ show x
251 instance BinaryState Integer ArrayType where
256 put t = putByte (atype2byte t)
258 put1 :: (BinaryState Integer a) => Word8 -> a -> PutState Integer ()
263 put2 :: (BinaryState Integer a, BinaryState Integer b) => Word8 -> a -> b -> PutState Integer ()
269 instance BinaryState Integer Instruction where
271 put ACONST_NULL = putByte 1
272 put ICONST_M1 = putByte 2
273 put ICONST_0 = putByte 3
274 put ICONST_1 = putByte 4
275 put ICONST_2 = putByte 5
276 put ICONST_3 = putByte 6
277 put ICONST_4 = putByte 7
278 put ICONST_5 = putByte 8
279 put LCONST_0 = putByte 9
280 put LCONST_1 = putByte 10
281 put FCONST_0 = putByte 11
282 put FCONST_1 = putByte 12
283 put FCONST_2 = putByte 13
284 put DCONST_0 = putByte 14
285 put DCONST_1 = putByte 15
286 put (BIPUSH x) = put1 16 x
287 put (SIPUSH x) = put1 17 x
288 put (LDC1 x) = put1 18 x
289 put (LDC2 x) = put1 19 x
290 put (LDC2W x) = put1 20 x
291 put (ILOAD x) = put1 21 x
292 put (LLOAD x) = put1 22 x
293 put (FLOAD x) = put1 23 x
294 put (DLOAD x) = put1 24 x
295 put (ALOAD x) = put1 25 x
296 put (ILOAD_ i) = putImm 26 i
297 put (LLOAD_ i) = putImm 30 i
298 put (FLOAD_ i) = putImm 34 i
299 put (DLOAD_ i) = putImm 38 i
300 put (ALOAD_ i) = putImm 42 i
301 put IALOAD = putByte 46
302 put LALOAD = putByte 47
303 put FALOAD = putByte 48
304 put DALOAD = putByte 49
305 put AALOAD = putByte 50
306 put BALOAD = putByte 51
307 put CALOAD = putByte 52
308 put SALOAD = putByte 53
309 put (ISTORE x) = put1 54 x
310 put (LSTORE x) = put1 55 x
311 put (FSTORE x) = put1 56 x
312 put (DSTORE x) = put1 57 x
313 put (ASTORE x) = put1 58 x
314 put (ISTORE_ i) = putImm 59 i
315 put (LSTORE_ i) = putImm 63 i
316 put (FSTORE_ i) = putImm 67 i
317 put (DSTORE_ i) = putImm 71 i
318 put (ASTORE_ i) = putImm 75 i
319 put IASTORE = putByte 79
320 put LASTORE = putByte 80
321 put FASTORE = putByte 81
322 put DASTORE = putByte 82
323 put AASTORE = putByte 83
324 put BASTORE = putByte 84
325 put CASTORE = putByte 85
326 put SASTORE = putByte 86
328 put POP2 = putByte 88
330 put DUP_X1 = putByte 90
331 put DUP_X2 = putByte 91
332 put DUP2 = putByte 92
333 put DUP2_X1 = putByte 93
334 put DUP2_X2 = putByte 94
335 put SWAP = putByte 95
336 put IADD = putByte 96
337 put LADD = putByte 97
338 put FADD = putByte 98
339 put DADD = putByte 99
340 put ISUB = putByte 100
341 put LSUB = putByte 101
342 put FSUB = putByte 102
343 put DSUB = putByte 103
344 put IMUL = putByte 104
345 put LMUL = putByte 105
346 put FMUL = putByte 106
347 put DMUL = putByte 107
348 put IDIV = putByte 108
349 put LDIV = putByte 109
350 put FDIV = putByte 110
351 put DDIV = putByte 111
352 put IREM = putByte 112
353 put LREM = putByte 113
354 put FREM = putByte 114
355 put DREM = putByte 115
356 put INEG = putByte 116
357 put LNEG = putByte 117
358 put FNEG = putByte 118
359 put DNEG = putByte 119
360 put ISHL = putByte 120
361 put LSHL = putByte 121
362 put ISHR = putByte 122
363 put LSHR = putByte 123
364 put IUSHR = putByte 124
365 put LUSHR = putByte 125
366 put IAND = putByte 126
367 put LAND = putByte 127
368 put IOR = putByte 128
369 put LOR = putByte 129
370 put IXOR = putByte 130
371 put LXOR = putByte 131
372 put (IINC x y) = put2 132 x y
373 put I2L = putByte 133
374 put I2F = putByte 134
375 put I2D = putByte 135
376 put L2I = putByte 136
377 put L2F = putByte 137
378 put L2D = putByte 138
379 put F2I = putByte 139
380 put F2L = putByte 140
381 put F2D = putByte 141
382 put D2I = putByte 142
383 put D2L = putByte 143
384 put D2F = putByte 144
385 put I2B = putByte 145
386 put I2C = putByte 146
387 put I2S = putByte 147
388 put LCMP = putByte 148
389 put (FCMP C_LT) = putByte 149
390 put (FCMP C_GT) = putByte 150
391 put (FCMP c) = fail $ "No such instruction: FCMP " ++ show c
392 put (DCMP C_LT) = putByte 151
393 put (DCMP C_GT) = putByte 152
394 put (DCMP c) = fail $ "No such instruction: DCMP " ++ show c
395 put (IF c) = putByte (fromIntegral $ 153 + fromEnum c)
396 put (IF_ACMP C_EQ x) = put1 165 x
397 put (IF_ACMP C_NE x) = put1 166 x
398 put (IF_ACMP c _) = fail $ "No such instruction: IF_ACMP " ++ show c
399 put (IF_ICMP c x) = putByte (fromIntegral $ 159 + fromEnum c) >> put x
400 put GOTO = putByte 167
401 put (JSR x) = put1 168 x
402 put RET = putByte 169
403 put (TABLESWITCH def low high offs) = do
406 let pads = 4 - (offset `mod` 4)
407 replicateM (fromIntegral pads) (putByte 0)
411 put (LOOKUPSWITCH def n pairs) = do
414 let pads = 4 - (offset `mod` 4)
415 replicateM (fromIntegral pads) (putByte 0)
419 put IRETURN = putByte 172
420 put LRETURN = putByte 173
421 put FRETURN = putByte 174
422 put DRETURN = putByte 175
423 put RETURN = putByte 177
424 put (GETSTATIC x) = put1 178 x
425 put (PUTSTATIC x) = put1 179 x
426 put (GETFIELD x) = put1 180 x
427 put (PUTFIELD x) = put1 181 x
428 put (INVOKEVIRTUAL x) = put1 182 x
429 put (INVOKESPECIAL x) = put1 183 x
430 put (INVOKESTATIC x) = put1 184 x
431 put (INVOKEINTERFACE x c) = put2 185 x c >> putByte 0
432 put (NEW x) = put1 187 x
433 put (NEWARRAY x) = put1 188 x
434 put (ANEWARRAY x) = put1 189 x
435 put ARRAYLENGTH = putByte 190
436 put ATHROW = putByte 191
437 put (CHECKCAST x) = put1 192 x
438 put (INSTANCEOF x) = put1 193 x
439 put MONITORENTER = putByte 194
440 put MONITOREXIT = putByte 195
441 put (WIDE x inst) = put2 196 x inst
442 put (MULTINANEWARRAY x y) = put2 197 x y
443 put (IFNULL x) = put1 198 x
444 put (IFNONNULL x) = put1 199 x
445 put (GOTO_W x) = put1 200 x
446 put (JSR_W x) = put1 201 x
452 1 -> return ACONST_NULL
453 2 -> return ICONST_M1
461 10 -> return LCONST_1
462 11 -> return FCONST_0
463 12 -> return FCONST_1
464 13 -> return FCONST_2
465 14 -> return DCONST_0
466 15 -> return DCONST_1
543 132 -> IINC <$> get <*> get
560 149 -> return $ FCMP C_LT
561 150 -> return $ FCMP C_GT
562 151 -> return $ DCMP C_LT
563 152 -> return $ DCMP C_GT
564 165 -> IF_ACMP C_EQ <$> get
565 166 -> IF_ACMP C_NE <$> get
571 let pads = 4 - (offset `mod` 4)
572 skip (fromIntegral pads)
576 offs <- replicateM (fromIntegral $ high - low + 1) get
577 return $ TABLESWITCH def low high offs
580 let pads = 4 - (offset `mod` 4)
581 skip (fromIntegral pads)
584 pairs <- replicateM (fromIntegral n) get
585 return $ LOOKUPSWITCH def n pairs
586 172 -> return IRETURN
587 173 -> return LRETURN
588 174 -> return FRETURN
589 175 -> return DRETURN
591 178 -> GETSTATIC <$> get
592 179 -> PUTSTATIC <$> get
593 180 -> GETFIELD <$> get
594 181 -> PUTFIELD <$> get
595 182 -> INVOKEVIRTUAL <$> get
596 183 -> INVOKESPECIAL <$> get
597 184 -> INVOKESTATIC <$> get
598 185 -> (INVOKEINTERFACE <$> get <*> get) <* skip 1
600 188 -> NEWARRAY <$> get
601 189 -> ANEWARRAY <$> get
602 190 -> return ARRAYLENGTH
604 192 -> CHECKCAST <$> get
605 193 -> INSTANCEOF <$> get
606 194 -> return MONITORENTER
607 195 -> return MONITOREXIT
608 196 -> WIDE <$> get <*> get
609 197 -> MULTINANEWARRAY <$> get <*> get
610 198 -> IFNULL <$> get
611 199 -> IFNONNULL <$> get
612 200 -> GOTO_W <$> get
614 _ | inRange (59, 62) c -> imm 59 ISTORE_ c
615 | inRange (63, 66) c -> imm 63 LSTORE_ c
616 | inRange (67, 70) c -> imm 67 FSTORE_ c
617 | inRange (71, 74) c -> imm 71 DSTORE_ c
618 | inRange (75, 78) c -> imm 75 ASTORE_ c
619 | inRange (26, 29) c -> imm 26 ILOAD_ c
620 | inRange (30, 33) c -> imm 30 LLOAD_ c
621 | inRange (34, 37) c -> imm 34 FLOAD_ c
622 | inRange (38, 41) c -> imm 38 DLOAD_ c
623 | inRange (42, 45) c -> imm 42 ALOAD_ c
624 | inRange (153, 158) c -> return $ IF (toEnum $ fromIntegral $ c-153)
625 | inRange (159, 164) c -> IF_ICMP (toEnum $ fromIntegral $ c-159) <$> get
626 | otherwise -> fail $ "Unknown instruction byte code: " ++ show c