From a937c158a55c8d80c9c35777fcb104767450e8dd Mon Sep 17 00:00:00 2001 From: Ilya Portnov Date: Mon, 13 Jun 2011 14:06:53 +0600 Subject: [PATCH] Implement [dis]assembler. --- .gitignore | 2 + Data/BinaryState.hs | 93 +++++++ JVM/.Converter.hs.swp | Bin 16384 -> 0 bytes JVM/Assembler.hs | 627 ++++++++++++++++++++++++++++++++++++++++++ JVM/Converter.hs | 4 - JVM/Types.hs | 6 + disassemble.hs | 27 ++ 7 files changed, 755 insertions(+), 4 deletions(-) create mode 100644 Data/BinaryState.hs delete mode 100644 JVM/.Converter.hs.swp create mode 100644 JVM/Assembler.hs create mode 100644 disassemble.hs diff --git a/.gitignore b/.gitignore index a9781ac..d7b78e2 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ *.o *.class *.bytecode +*.swp +.*.swp diff --git a/Data/BinaryState.hs b/Data/BinaryState.hs new file mode 100644 index 0000000..c2ab55d --- /dev/null +++ b/Data/BinaryState.hs @@ -0,0 +1,93 @@ +{-# LANGUAGE MultiParamTypeClasses, FlexibleInstances #-} +module Data.BinaryState where + +import Control.Monad +import qualified Control.Monad.State as State +import qualified Data.Binary as Binary +import qualified Data.Binary.Put as Put +import qualified Data.Binary.Get as Get +import qualified Data.ByteString.Lazy as B +import Foreign.Storable +import Data.Word +import Data.Int + +type PutState s a = State.StateT s Put.PutM a +type GetState s a = State.StateT s Binary.Get a + +class BinaryState s a where + put :: a -> PutState s () + get :: GetState s a + +instance (Binary.Binary a) => BinaryState () a where + put x = putZ x + get = getZ + +putZ :: (Binary.Binary a) => a -> PutState s () +putZ x = State.lift (Binary.put x) + +getZ :: (Binary.Binary a) => GetState s a +getZ = State.lift Binary.get + +------------------------------------------------ + +encodeS :: (BinaryState s a) => s -> a -> B.ByteString +encodeS s a = Put.runPut $ State.evalStateT (put a) s + +decodeS :: (BinaryState s a) => s -> B.ByteString -> a +decodeS s str = Get.runGet (State.evalStateT get s) str + +encodeFile :: BinaryState s a => FilePath -> s -> a -> IO () +encodeFile f s v = B.writeFile f (encodeS s v) + +decodeFile :: BinaryState s a => FilePath -> s -> IO a +decodeFile f s = liftM (decodeS s) (B.readFile f) + +------------------------------------------------ + +getByte :: GetState s Word8 +getByte = State.lift Binary.getWord8 + +liftOffset :: (Binary.Binary a) => Integer -> (a -> Binary.Put) -> a -> PutState Integer () +liftOffset d fn x = State.modify (+d) >> State.lift (fn x) + +putByte :: Word8 -> PutState Integer () +putByte x = liftOffset 1 Put.putWord8 x + +isEmpty :: GetState s Bool +isEmpty = State.lift Get.isEmpty + +skip :: Int -> GetState s () +skip n = State.lift (Get.skip n) + +getOffset :: PutState Integer Integer +getOffset = State.get + +bytesRead :: GetState s Int64 +bytesRead = State.lift Get.bytesRead + +-------------------------------------------------- + +instance BinaryState Integer Word8 where + put x = putByte x + get = getZ + +instance BinaryState Integer Word16 where + put x = liftOffset 2 Binary.put x + get = getZ + +instance BinaryState Integer Word32 where + put x = liftOffset 4 Binary.put x + get = getZ + +instance (BinaryState s a, BinaryState s b) => BinaryState s (a,b) where + put (x,y) = put x >> put y + get = do + x <- get + y <- get + return (x,y) + +-------------------------------------------------- + +-- instance (Binary.Binary a, Storable a) => BinaryState Integer a where +-- put x = liftOffset (fromIntegral $ sizeOf x) Binary.put x +-- get = getZ diff --git a/JVM/.Converter.hs.swp b/JVM/.Converter.hs.swp deleted file mode 100644 index 048d029c0d8bda00f4969a49ca9b7644f2c8f092..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16384 zcmeHNO^h5z6|R_nlOHRD3kgwqZHXCWx7Q}b7>iw)>D{r%nweeg%C<$*dhb=$S5>cmdiKt3)tmg2&a({9DaL;F#J`#^zjlJX@&seUC{DxZ7UkUcc~HH3 zMsYETyNkm(+D{hyBDpDpVDUi1nH{h0afP34=U{jm&K1}p=X0n318z%pPNunbrRECZGS%YbFz9ms(27<(M` zdX*0R`2L@r|05jqJ3t$F5;zIG@hD?|0bU1w0sIK~3XlSuzy;trU=BD9JPf?~2xI>S zUI$(RUIYd}1^6uR5#SFGGxmGnC&0IW1aN^1z>~n=Kgih6fP26)a1!|SNyfejd;=H( z+rS#I44eY~@d4NYUkC01*MLs}7lAk4&)9c>%fRp6$Jn=lA+Q3R1b+Ko#=Zos15X1V z1^)aV#{LAn3VZ=r13m*R08ay_fH&XG*k6I)0KWu&415pxE^r7mfTw`J;WFj>Knf(l zD)3?8kGO>SA#ekD4)_G{AN0#>0QJv5sgI64oZs%tShBAE-$W%%?-)bLkRaC$oiRzC&L8y-_ObVK3SxQ7*xsGB_W|v>$o8tR;T5 z=yAUClJL8^QQO}SMLLQlU*ewZMxI2i_GQ{eiQzupRozdUB696|=*ffm+)GOg$iYlI z%AYc+bh|Q1V3bu?r%hE=q85n#WIm&%sT!&4Q_=~dgWqwSyJh$>`FE!DCwQDN0);w7 z^!0oh+=sn9dxAVA%h`Af?uyDe4@x|nxjegQFoQRxt@Dbs)u?fXHT7N~P3PvE>Er}u zV+$~*5=0oUxbF2zwcXjMxE;QmN$Rc6>PD^NuF}9zI;~2lzBNulztyg;E9 zjj>AmTFbq%vDT`8ZgnNss?}SSMxJVSVB+S<^^LX8s@s`RU*D*2RXSsz>zmaE85Bp_ zZS-1uaxNmfD`--_;19R`w9iYQ_j!+Io8RN9Owy{K^4r|!XTjo3G z9A_7nJ5yzV$?Yii&Ri3WF1JognMOX6DoJ(RJLVk(Q@n@dlNk6*H z56Zmi$iXl@RK#=B23~U4S|Fm;xxN|tYT91sdkM`0Hxuk7)D6d3q`ixLtlp_zgy{@j zs%ItiI)_7vmfuhXkW2|m$4q#?%#Rb_D9sVZfaDp548 zhpF6;MS#2+mbD;4-zrlSV^TA8l{XoaHgxmU3~duB(QYW`Ve>d%VY1#jj=b#}>nWj`)$hlyO}?HQockf? zIS(N(nFTG;gM#xwrkFSr8fwCr70#dRo?OT0OUKGFvrAV*GF}42Jg@qph!1&bLRiMA zSe}RhxnH5{9AeNE2Jl%_GTcY-39vy>C9CQ!sdOcd1^l0%56c(>w0FqDK~dWTaG2+t zkt7viN*fC8gY;1}`KFLHl?(eFcbIQB$Sk8+4Z_(wjXZl|t>kzHYVoG!o@$+qSu_5q z7GK=pH05pJ{b)$_m?Q> zt5;XW<)tNlP6z&0l=f+Bq=Ns|5qMN;DMvZXR8_5Jtm$HM7o-W9 z>?_lMieri5Kx7mQv zI2z|;2#qzszDh7gGjcG5m9iWP4D@^ls2xAw!w7la>_u~}DMNUttaY-7^ZMEm|5(vd z>cv8(&SqO&${f4oa94Gzhp9B-dt6@(K`bvuOnq8)hN64Z%)a8<<8UnXG%;mJ3vy<} zd^NqW#@7=gD-@=uD8sPDAG|sKbM4UDnoJ6ECRrw3k$ahNeJ-#AR z;WRPPM8cby$y9b5u@KqqEL+*A;E+kC495I84e`Pu8CNq^Wvm-l5V?I37X&&)PKnaI zvC0qam}k}nC&{+jkAfZi(@$L`yS28k|uxN^mG~L+sGSXloGLS_Gfkz7w;^b z;f+e`@@D1oDks)!V&Dh9Oy+qT@zN7P6v`DD`?uhFp4S3-;A68=HysMKOa#xVJV+tg z#L1}Zd!neffpH&u+kW7=BKDxRhN*%ri6=c>|Ae1exXbAL{{x)C8P4i-{>L$2JwL_E zi@;ZbYd{5f40sgyIds1Uyaa3m%fM5>Ljd{tFZr{NWxz6E8L$jk1}p=X0n318z%pPN zunfGj7~twtVD(U4*XT<#K1XNXIb773^rsihn^Efq`m+Jk`J|0IA1%X;g%?d`WcM<3 zZBn>~K)NXo_hdoJn82q;Q_g|b6-`bA*ERuVmX3ewz0f4Eb2><{9 diff --git a/JVM/Assembler.hs b/JVM/Assembler.hs new file mode 100644 index 0000000..bc45d80 --- /dev/null +++ b/JVM/Assembler.hs @@ -0,0 +1,627 @@ +{-# LANGUAGE TypeFamilies, StandaloneDeriving, FlexibleInstances, FlexibleContexts, UndecidableInstances, RecordWildCards, OverloadedStrings, TypeSynonymInstances, MultiParamTypeClasses #-} +module JVM.Assembler where + +import Control.Monad +import Control.Applicative +import Data.Ix (inRange) +import Data.List +import Data.Word +import Data.Bits +import qualified Data.Binary as Binary +import qualified Data.Binary.Get as Get +import Data.Char +import Data.String +import qualified Data.ByteString.Lazy as B +import Data.Array +import qualified Data.Set as S +import qualified Data.Map as M + +import Data.BinaryState +import JVM.Types + +data IMM = + I0 + | I1 + | I2 + | I3 + deriving (Eq, Ord, Enum, Show) + +data CMP = + C_EQ + | C_NE + | C_LT + | C_GE + | C_GT + | C_LE + deriving (Eq, Ord, Enum, Show) + +newtype Code = Code [Instruction] + deriving (Eq, Show) + +instance BinaryState Integer Code where + put (Code list) = forM_ list put + + get = do + end <- isEmpty + if end + then return $ Code [] + else do + x <- get + (Code next) <- get + return $ Code (x: next) + +data Instruction = + NOP -- 0 + | ACONST_NULL -- 1 + | ICONST_M1 -- 2 + | ICONST_0 -- 3 + | ICONST_1 -- 4 + | ICONST_2 -- 5 + | ICONST_3 -- 6 + | ICONST_4 -- 7 + | ICONST_5 -- 8 + | LCONST_0 -- 9 + | LCONST_1 -- 10 + | FCONST_0 -- 11 + | FCONST_1 -- 12 + | FCONST_2 -- 13 + | DCONST_0 -- 14 + | DCONST_1 -- 15 + | BIPUSH Word8 -- 16 + | SIPUSH Word16 -- 17 + | LDC1 Word8 -- 18 + | LDC2 Word16 -- 19 + | LDC2W Word16 -- 20 + | ILOAD Word8 -- 21 + | LLOAD Word8 -- 22 + | FLOAD Word8 -- 23 + | DLOAD Word8 -- 24 + | ALOAD Word8 -- 25 + | ILOAD_ IMM -- 26, 27, 28, 29 + | LLOAD_ IMM -- 30, 31, 32, 33 + | FLOAD_ IMM -- 34, 35, 36, 37 + | DLOAD_ IMM -- 38, 39, 40, 41 + | ALOAD_ IMM -- 42, 43, 44, 45 + | IALOAD -- 46 + | LALOAD -- 47 + | FALOAD -- 48 + | DALOAD -- 49 + | AALOAD -- 50 + | BALOAD -- 51 + | CALOAD -- 52 + | SALOAD -- 53 + | ISTORE Word8 -- 54 + | LSTORE Word8 -- 55 + | FSTORE Word8 -- 56 + | DSTORE Word8 -- 57 + | ASTORE Word8 -- 58 + | ISTORE_ IMM -- 59, 60, 61, 62 + | LSTORE_ IMM -- 63, 64, 65, 66 + | FSTORE_ IMM -- 67, 68, 69, 70 + | DSTORE_ IMM -- 71, 72, 73, 74 + | ASTORE_ IMM -- 75, 76, 77, 78 + | IASTORE -- 79 + | LASTORE -- 80 + | FASTORE -- 81 + | DASTORE -- 82 + | AASTORE -- 83 + | BASTORE -- 84 + | CASTORE -- 85 + | SASTORE -- 86 + | POP -- 87 + | POP2 -- 88 + | DUP -- 89 + | DUP_X1 -- 90 + | DUP_X2 -- 91 + | DUP2 -- 92 + | DUP2_X1 -- 93 + | DUP2_X2 -- 94 + | SWAP -- 95 + | IADD -- 96 + | LADD -- 97 + | FADD -- 98 + | DADD -- 99 + | ISUB -- 100 + | LSUB -- 101 + | FSUB -- 102 + | DSUB -- 103 + | IMUL -- 104 + | LMUL -- 105 + | FMUL -- 106 + | DMUL -- 107 + | IDIV -- 108 + | LDIV -- 109 + | FDIV -- 110 + | DDIV -- 111 + | IREM -- 112 + | LREM -- 113 + | FREM -- 114 + | DREM -- 115 + | INEG -- 116 + | LNEG -- 117 + | FNEG -- 118 + | DNEG -- 119 + | ISHL -- 120 + | LSHL -- 121 + | ISHR -- 122 + | LSHR -- 123 + | IUSHR -- 124 + | LUSHR -- 125 + | IAND -- 126 + | LAND -- 127 + | IOR -- 128 + | LOR -- 129 + | IXOR -- 130 + | LXOR -- 131 + | IINC Word8 Word8 -- 132 + | I2L -- 133 + | I2F -- 134 + | I2D -- 135 + | L2I -- 136 + | L2F -- 137 + | L2D -- 138 + | F2I -- 139 + | F2L -- 140 + | F2D -- 141 + | D2I -- 142 + | D2L -- 143 + | D2F -- 144 + | I2B -- 145 + | I2C -- 146 + | I2S -- 147 + | LCMP -- 148 + | FCMP CMP -- 149, 150 + | DCMP CMP -- 151, 152 + | IF CMP -- 153, 154, 155, 156, 157, 158 + | IF_ACMP CMP Word16 -- 165, 166 + | IF_ICMP CMP Word16 -- 159, 160, 161, 162, 163, 164 + | GOTO -- 167 + | JSR Word16 -- 168 + | RET -- 169 + | TABLESWITCH Word32 Word32 Word32 [Word32] -- 170 + | LOOKUPSWITCH Word32 Word32 [(Word32, Word32)] -- 171 + | IRETURN -- 172 + | LRETURN -- 173 + | FRETURN -- 174 + | DRETURN -- 175 + | RETURN -- 177 + | GETSTATIC Word16 -- 178 + | PUTSTATIC Word16 -- 179 + | GETFIELD Word16 -- 180 + | PUTFIELD Word16 -- 181 + | INVOKEVIRTUAL Word16 -- 182 + | INVOKESPECIAL Word16 -- 183 + | INVOKESTATIC Word16 -- 184 + | INVOKEINTERFACE Word16 Word8 -- 185 + | NEW Word16 -- 187 + | NEWARRAY Word8 -- 188, see ArrayType + | ANEWARRAY Word16 -- 189 + | ARRAYLENGTH -- 190 + | ATHROW -- 191 + | CHECKCAST Word16 -- 192 + | INSTANCEOF Word16 -- 193 + | MONITORENTER -- 194 + | MONITOREXIT -- 195 + | WIDE Word8 Instruction -- 196 + | MULTINANEWARRAY Word16 Word8 -- 197 + | IFNULL Word16 -- 198 + | IFNONNULL Word16 -- 199 + | GOTO_W Word32 -- 200 + | JSR_W Word32 -- 201 + deriving (Eq, Show) + +data ArrayType = + T_BOOLEAN -- 4 + | T_CHAR -- 5 + | T_FLOAT -- 6 + | T_DOUBLE -- 7 + | T_BYTE -- 8 + | T_SHORT -- 9 + | T_INT -- 10 + | T_LONG -- 11 + deriving (Eq, Show, Enum) + +imm :: Word8 -> (IMM -> Instruction) -> Word8 -> GetState s Instruction +imm base constr x = return $ constr $ toEnum $ fromIntegral (x-base) + +putImm :: Word8 -> IMM -> PutState Integer () +putImm base i = putByte $ base + (fromIntegral $ fromEnum i) + +atype2byte :: ArrayType -> Word8 +atype2byte T_BOOLEAN = 4 +atype2byte T_CHAR = 5 +atype2byte T_FLOAT = 6 +atype2byte T_DOUBLE = 7 +atype2byte T_BYTE = 8 +atype2byte T_SHORT = 9 +atype2byte T_INT = 10 +atype2byte T_LONG = 11 + +byte2atype :: Word8 -> GetState s ArrayType +byte2atype 4 = return T_BOOLEAN +byte2atype 5 = return T_CHAR +byte2atype 6 = return T_FLOAT +byte2atype 7 = return T_DOUBLE +byte2atype 8 = return T_BYTE +byte2atype 9 = return T_SHORT +byte2atype 10 = return T_INT +byte2atype 11 = return T_LONG +byte2atype x = fail $ "Unknown array type byte: " ++ show x + +instance BinaryState Integer ArrayType where + get = do + x <- getByte + byte2atype x + + put t = putByte (atype2byte t) + +put1 :: (BinaryState Integer a) => Word8 -> a -> PutState Integer () +put1 code x = do + putByte code + put x + +put2 :: (BinaryState Integer a, BinaryState Integer b) => Word8 -> a -> b -> PutState Integer () +put2 code x y = do + putByte code + put x + put y + +instance BinaryState Integer Instruction where + put NOP = putByte 0 + put ACONST_NULL = putByte 1 + put ICONST_M1 = putByte 2 + put ICONST_0 = putByte 3 + put ICONST_1 = putByte 4 + put ICONST_2 = putByte 5 + put ICONST_3 = putByte 6 + put ICONST_4 = putByte 7 + put ICONST_5 = putByte 8 + put LCONST_0 = putByte 9 + put LCONST_1 = putByte 10 + put FCONST_0 = putByte 11 + put FCONST_1 = putByte 12 + put FCONST_2 = putByte 13 + put DCONST_0 = putByte 14 + put DCONST_1 = putByte 15 + put (BIPUSH x) = put1 16 x + put (SIPUSH x) = put1 17 x + put (LDC1 x) = put1 18 x + put (LDC2 x) = put1 19 x + put (LDC2W x) = put1 20 x + put (ILOAD x) = put1 21 x + put (LLOAD x) = put1 22 x + put (FLOAD x) = put1 23 x + put (DLOAD x) = put1 24 x + put (ALOAD x) = put1 25 x + put (ILOAD_ i) = putImm 26 i + put (LLOAD_ i) = putImm 30 i + put (FLOAD_ i) = putImm 34 i + put (DLOAD_ i) = putImm 38 i + put (ALOAD_ i) = putImm 42 i + put IALOAD = putByte 46 + put LALOAD = putByte 47 + put FALOAD = putByte 48 + put DALOAD = putByte 49 + put AALOAD = putByte 50 + put BALOAD = putByte 51 + put CALOAD = putByte 52 + put SALOAD = putByte 53 + put (ISTORE x) = put1 54 x + put (LSTORE x) = put1 55 x + put (FSTORE x) = put1 56 x + put (DSTORE x) = put1 57 x + put (ASTORE x) = put1 58 x + put (ISTORE_ i) = putImm 59 i + put (LSTORE_ i) = putImm 63 i + put (FSTORE_ i) = putImm 67 i + put (DSTORE_ i) = putImm 71 i + put (ASTORE_ i) = putImm 75 i + put IASTORE = putByte 79 + put LASTORE = putByte 80 + put FASTORE = putByte 81 + put DASTORE = putByte 82 + put AASTORE = putByte 83 + put BASTORE = putByte 84 + put CASTORE = putByte 85 + put SASTORE = putByte 86 + put POP = putByte 87 + put POP2 = putByte 88 + put DUP = putByte 89 + put DUP_X1 = putByte 90 + put DUP_X2 = putByte 91 + put DUP2 = putByte 92 + put DUP2_X1 = putByte 93 + put DUP2_X2 = putByte 94 + put SWAP = putByte 95 + put IADD = putByte 96 + put LADD = putByte 97 + put FADD = putByte 98 + put DADD = putByte 99 + put ISUB = putByte 100 + put LSUB = putByte 101 + put FSUB = putByte 102 + put DSUB = putByte 103 + put IMUL = putByte 104 + put LMUL = putByte 105 + put FMUL = putByte 106 + put DMUL = putByte 107 + put IDIV = putByte 108 + put LDIV = putByte 109 + put FDIV = putByte 110 + put DDIV = putByte 111 + put IREM = putByte 112 + put LREM = putByte 113 + put FREM = putByte 114 + put DREM = putByte 115 + put INEG = putByte 116 + put LNEG = putByte 117 + put FNEG = putByte 118 + put DNEG = putByte 119 + put ISHL = putByte 120 + put LSHL = putByte 121 + put ISHR = putByte 122 + put LSHR = putByte 123 + put IUSHR = putByte 124 + put LUSHR = putByte 125 + put IAND = putByte 126 + put LAND = putByte 127 + put IOR = putByte 128 + put LOR = putByte 129 + put IXOR = putByte 130 + put LXOR = putByte 131 + put (IINC x y) = put2 132 x y + put I2L = putByte 133 + put I2F = putByte 134 + put I2D = putByte 135 + put L2I = putByte 136 + put L2F = putByte 137 + put L2D = putByte 138 + put F2I = putByte 139 + put F2L = putByte 140 + put F2D = putByte 141 + put D2I = putByte 142 + put D2L = putByte 143 + put D2F = putByte 144 + put I2B = putByte 145 + put I2C = putByte 146 + put I2S = putByte 147 + put LCMP = putByte 148 + put (FCMP C_LT) = putByte 149 + put (FCMP C_GT) = putByte 150 + put (FCMP c) = fail $ "No such instruction: FCMP " ++ show c + put (DCMP C_LT) = putByte 151 + put (DCMP C_GT) = putByte 152 + put (DCMP c) = fail $ "No such instruction: DCMP " ++ show c + put (IF c) = putByte (fromIntegral $ 153 + fromEnum c) + put (IF_ACMP C_EQ x) = put1 165 x + put (IF_ACMP C_NE x) = put1 166 x + put (IF_ACMP c _) = fail $ "No such instruction: IF_ACMP " ++ show c + put (IF_ICMP c x) = putByte (fromIntegral $ 159 + fromEnum c) >> put x + put GOTO = putByte 167 + put (JSR x) = put1 168 x + put RET = putByte 169 + put (TABLESWITCH def low high offs) = do + putByte 170 + offset <- getOffset + let pads = 4 - (offset `mod` 4) + replicateM (fromIntegral pads) (putByte 0) + put low + put high + forM_ offs put + put (LOOKUPSWITCH def n pairs) = do + putByte 171 + offset <- getOffset + let pads = 4 - (offset `mod` 4) + replicateM (fromIntegral pads) (putByte 0) + put def + put n + forM_ pairs put + put IRETURN = putByte 172 + put LRETURN = putByte 173 + put FRETURN = putByte 174 + put DRETURN = putByte 175 + put RETURN = putByte 177 + put (GETSTATIC x) = put1 178 x + put (PUTSTATIC x) = put1 179 x + put (GETFIELD x) = put1 180 x + put (PUTFIELD x) = put1 181 x + put (INVOKEVIRTUAL x) = put1 182 x + put (INVOKESPECIAL x) = put1 183 x + put (INVOKESTATIC x) = put1 184 x + put (INVOKEINTERFACE x c) = put2 185 x c >> putByte 0 + put (NEW x) = put1 187 x + put (NEWARRAY x) = put1 188 x + put (ANEWARRAY x) = put1 189 x + put ARRAYLENGTH = putByte 190 + put ATHROW = putByte 191 + put (CHECKCAST x) = put1 192 x + put (INSTANCEOF x) = put1 193 x + put MONITORENTER = putByte 194 + put MONITOREXIT = putByte 195 + put (WIDE x inst) = put2 196 x inst + put (MULTINANEWARRAY x y) = put2 197 x y + put (IFNULL x) = put1 198 x + put (IFNONNULL x) = put1 199 x + put (GOTO_W x) = put1 200 x + put (JSR_W x) = put1 201 x + + get = do + c <- getByte + case c of + 0 -> return NOP + 1 -> return ACONST_NULL + 2 -> return ICONST_M1 + 3 -> return ICONST_0 + 4 -> return ICONST_1 + 5 -> return ICONST_2 + 6 -> return ICONST_3 + 7 -> return ICONST_4 + 8 -> return ICONST_5 + 9 -> return LCONST_0 + 10 -> return LCONST_1 + 11 -> return FCONST_0 + 12 -> return FCONST_1 + 13 -> return FCONST_2 + 14 -> return DCONST_0 + 15 -> return DCONST_1 + 16 -> BIPUSH <$> get + 17 -> SIPUSH <$> get + 18 -> LDC1 <$> get + 19 -> LDC2 <$> get + 20 -> LDC2W <$> get + 21 -> ILOAD <$> get + 22 -> LLOAD <$> get + 23 -> FLOAD <$> get + 24 -> DLOAD <$> get + 25 -> ALOAD <$> get + 46 -> return IALOAD + 47 -> return LALOAD + 48 -> return FALOAD + 49 -> return DALOAD + 50 -> return AALOAD + 51 -> return BALOAD + 52 -> return CALOAD + 53 -> return SALOAD + 54 -> ISTORE <$> get + 55 -> LSTORE <$> get + 56 -> FSTORE <$> get + 57 -> DSTORE <$> get + 58 -> ASTORE <$> get + 79 -> return IASTORE + 80 -> return LASTORE + 81 -> return FASTORE + 82 -> return DASTORE + 83 -> return AASTORE + 84 -> return BASTORE + 85 -> return CASTORE + 86 -> return SASTORE + 87 -> return POP + 88 -> return POP2 + 89 -> return DUP + 90 -> return DUP_X1 + 91 -> return DUP_X2 + 92 -> return DUP2 + 93 -> return DUP2_X1 + 94 -> return DUP2_X2 + 95 -> return SWAP + 96 -> return IADD + 97 -> return LADD + 98 -> return FADD + 99 -> return DADD + 100 -> return ISUB + 101 -> return LSUB + 102 -> return FSUB + 103 -> return DSUB + 104 -> return IMUL + 105 -> return LMUL + 106 -> return FMUL + 107 -> return DMUL + 108 -> return IDIV + 109 -> return LDIV + 110 -> return FDIV + 111 -> return DDIV + 112 -> return IREM + 113 -> return LREM + 114 -> return FREM + 115 -> return DREM + 116 -> return INEG + 117 -> return LNEG + 118 -> return FNEG + 119 -> return DNEG + 120 -> return ISHL + 121 -> return LSHL + 122 -> return ISHR + 123 -> return LSHR + 124 -> return IUSHR + 125 -> return LUSHR + 126 -> return IAND + 127 -> return LAND + 128 -> return IOR + 129 -> return LOR + 130 -> return IXOR + 131 -> return LXOR + 132 -> IINC <$> get <*> get + 133 -> return I2L + 134 -> return I2F + 135 -> return I2D + 136 -> return L2I + 137 -> return L2F + 138 -> return L2D + 139 -> return F2I + 140 -> return F2L + 141 -> return F2D + 142 -> return D2I + 143 -> return D2L + 144 -> return D2F + 145 -> return I2B + 146 -> return I2C + 147 -> return I2S + 148 -> return LCMP + 149 -> return $ FCMP C_LT + 150 -> return $ FCMP C_GT + 151 -> return $ DCMP C_LT + 152 -> return $ DCMP C_GT + 165 -> IF_ACMP C_EQ <$> get + 166 -> IF_ACMP C_NE <$> get + 167 -> return GOTO + 168 -> JSR <$> get + 169 -> return RET + 170 -> do + offset <- bytesRead + let pads = 4 - (offset `mod` 4) + skip (fromIntegral pads) + def <- get + low <- get + high <- get + offs <- replicateM (fromIntegral $ high - low + 1) get + return $ TABLESWITCH def low high offs + 171 -> do + offset <- bytesRead + let pads = 4 - (offset `mod` 4) + skip (fromIntegral pads) + def <- get + n <- get + pairs <- replicateM (fromIntegral n) get + return $ LOOKUPSWITCH def n pairs + 172 -> return IRETURN + 173 -> return LRETURN + 174 -> return FRETURN + 175 -> return DRETURN + 177 -> return RETURN + 178 -> GETSTATIC <$> get + 179 -> PUTSTATIC <$> get + 180 -> GETFIELD <$> get + 181 -> PUTFIELD <$> get + 182 -> INVOKEVIRTUAL <$> get + 183 -> INVOKESPECIAL <$> get + 184 -> INVOKESTATIC <$> get + 185 -> (INVOKEINTERFACE <$> get <*> get) <* skip 1 + 187 -> NEW <$> get + 188 -> NEWARRAY <$> get + 189 -> ANEWARRAY <$> get + 190 -> return ARRAYLENGTH + 191 -> return ATHROW + 192 -> CHECKCAST <$> get + 193 -> INSTANCEOF <$> get + 194 -> return MONITORENTER + 195 -> return MONITOREXIT + 196 -> WIDE <$> get <*> get + 197 -> MULTINANEWARRAY <$> get <*> get + 198 -> IFNULL <$> get + 199 -> IFNONNULL <$> get + 200 -> GOTO_W <$> get + 201 -> JSR_W <$> get + _ | inRange (59, 62) c -> imm 59 ISTORE_ c + | inRange (63, 66) c -> imm 63 LSTORE_ c + | inRange (67, 70) c -> imm 67 FSTORE_ c + | inRange (71, 74) c -> imm 71 DSTORE_ c + | inRange (75, 78) c -> imm 75 ASTORE_ c + | inRange (26, 29) c -> imm 26 ILOAD_ c + | inRange (30, 33) c -> imm 30 LLOAD_ c + | inRange (34, 37) c -> imm 34 FLOAD_ c + | inRange (38, 41) c -> imm 38 DLOAD_ c + | inRange (42, 45) c -> imm 42 ALOAD_ c + | inRange (153, 158) c -> return $ IF (toEnum $ fromIntegral $ c-153) + | inRange (159, 164) c -> IF_ICMP (toEnum $ fromIntegral $ c-159) <$> get + | otherwise -> fail $ "Unknown instruction byte code: " ++ show c + diff --git a/JVM/Converter.hs b/JVM/Converter.hs index 82c88e9..6f16e31 100644 --- a/JVM/Converter.hs +++ b/JVM/Converter.hs @@ -1,7 +1,6 @@ {-# LANGUAGE TypeFamilies, StandaloneDeriving, FlexibleInstances, FlexibleContexts, UndecidableInstances, RecordWildCards, OverloadedStrings #-} module JVM.Converter where -import Codec.Binary.UTF8.String hiding (encode, decode) import Data.List import Data.Word import Data.Bits @@ -18,9 +17,6 @@ import Debug.Trace import JVM.ClassFile import JVM.Types -instance IsString B.ByteString where - fromString s = B.pack $ map (fromIntegral . ord) $ encodeString s - decompile :: B.ByteString -> Class decompile bstr = convertClass $ decode bstr diff --git a/JVM/Types.hs b/JVM/Types.hs index 08ceeae..2f2f3a1 100644 --- a/JVM/Types.hs +++ b/JVM/Types.hs @@ -1,14 +1,20 @@ {-# LANGUAGE TypeFamilies, StandaloneDeriving, FlexibleInstances, FlexibleContexts, UndecidableInstances #-} module JVM.Types where +import Codec.Binary.UTF8.String hiding (encode, decode) import Data.Array import qualified Data.ByteString.Lazy as B import Data.Word +import Data.Char +import Data.String import qualified Data.Set as S import qualified Data.Map as M import JVM.ClassFile +instance IsString B.ByteString where + fromString s = B.pack $ map (fromIntegral . ord) $ encodeString s + type Pool = Array Word16 Constant class HasAttributes a where diff --git a/disassemble.hs b/disassemble.hs new file mode 100644 index 0000000..a11a2ad --- /dev/null +++ b/disassemble.hs @@ -0,0 +1,27 @@ +{-# LANGUAGE OverloadedStrings #-} +import Control.Monad +import System.Environment +import qualified Data.ByteString.Lazy as B + +import Data.BinaryState +import JVM.Types +import JVM.Converter +import JVM.Assembler + +main = do + args <- getArgs + case args of + [clspath] -> do + cls <- decompileFile clspath + putStr "Class: " + B.putStrLn (this cls) + putStrLn "Methods:" + forM_ (methods cls) $ \m -> do + putStr ">> Method " + B.putStrLn (methodName m) + case attrByName m "Code" of + Nothing -> putStrLn "(no code)\n" + Just bytecode -> let (Code code) = decodeS (0 :: Integer) bytecode + in forM_ code print + + _ -> error "Synopsis: disassemble File.class" -- 2.25.1