From 281875bae2de5eec6e4e5de8e5733118533258ea Mon Sep 17 00:00:00 2001 From: Ilya Portnov Date: Mon, 13 Jun 2011 21:49:59 +0600 Subject: [PATCH] Add some documentation. --- JVM/Assembler.hs | 386 +++++++++++++++++++++++++---------------------- JVM/ClassFile.hs | 105 ++++++------- JVM/Converter.hs | 22 ++- JVM/Types.hs | 37 +++-- 4 files changed, 303 insertions(+), 247 deletions(-) diff --git a/JVM/Assembler.hs b/JVM/Assembler.hs index 9c8d3d1..21ecdd6 100644 --- a/JVM/Assembler.hs +++ b/JVM/Assembler.hs @@ -1,16 +1,26 @@ -{-# LANGUAGE TypeFamilies, StandaloneDeriving, FlexibleInstances, FlexibleContexts, UndecidableInstances, RecordWildCards, OverloadedStrings, TypeSynonymInstances, MultiParamTypeClasses #-} -module JVM.Assembler where +{-# LANGUAGE TypeFamilies, StandaloneDeriving, FlexibleInstances, + FlexibleContexts, UndecidableInstances, RecordWildCards, OverloadedStrings, + TypeSynonymInstances, MultiParamTypeClasses #-} +-- | This module declares data type for JVM instructions, and BinaryState +-- instances to read/write them. +module JVM.Assembler + (Instruction (..), + ArrayType (..), + Code (..), + IMM (..), + CMP (..) + ) + where import Control.Monad import Control.Applicative import Data.Ix (inRange) -import Data.List +import Data.List (intercalate) import Data.Word import Data.Bits import qualified Data.Binary as Binary import qualified Data.Binary.Get as Get import Data.Char -import Data.String import qualified Data.ByteString.Lazy as B import Data.Array import qualified Data.Set as S @@ -20,13 +30,15 @@ import Data.BinaryState import JVM.ClassFile import JVM.Types +-- | Immediate constant. Corresponding value will be added to base opcode. data IMM = - I0 - | I1 - | I2 - | I3 + I0 -- ^ 0 + | I1 -- ^ 1 + | I2 -- ^ 2 + | I3 -- ^ 3 deriving (Eq, Ord, Enum, Show) +-- | Comparation operation type. Not all CMP instructions support all operations. data CMP = C_EQ | C_NE @@ -36,6 +48,7 @@ data CMP = | C_LE deriving (Eq, Ord, Enum, Show) +-- | Format of Code method attribute. data Code = Code { codeStackSize :: Word16, codeMaxLocals :: Word16, @@ -47,6 +60,7 @@ data Code = Code { codeAttributes :: [AttributeInfo] } deriving (Eq, Show) +-- | Exception descriptor data CodeException = CodeException { eStartPC :: Word16, eEndPC :: Word16, @@ -94,6 +108,7 @@ instance BinaryState Integer Code where attrs <- replicateM (fromIntegral nAttrs) get return $ Code stackSz locals len code excn excs nAttrs attrs +-- | Read sequence of instructions (to end of stream) readInstructions :: GetState Integer [Instruction] readInstructions = do end <- isEmpty @@ -104,181 +119,190 @@ readInstructions = do next <- readInstructions return (x: next) +-- | JVM instruction set data Instruction = - NOP -- 0 - | ACONST_NULL -- 1 - | ICONST_M1 -- 2 - | ICONST_0 -- 3 - | ICONST_1 -- 4 - | ICONST_2 -- 5 - | ICONST_3 -- 6 - | ICONST_4 -- 7 - | ICONST_5 -- 8 - | LCONST_0 -- 9 - | LCONST_1 -- 10 - | FCONST_0 -- 11 - | FCONST_1 -- 12 - | FCONST_2 -- 13 - | DCONST_0 -- 14 - | DCONST_1 -- 15 - | BIPUSH Word8 -- 16 - | SIPUSH Word16 -- 17 - | LDC1 Word8 -- 18 - | LDC2 Word16 -- 19 - | LDC2W Word16 -- 20 - | ILOAD Word8 -- 21 - | LLOAD Word8 -- 22 - | FLOAD Word8 -- 23 - | DLOAD Word8 -- 24 - | ALOAD Word8 -- 25 - | ILOAD_ IMM -- 26, 27, 28, 29 - | LLOAD_ IMM -- 30, 31, 32, 33 - | FLOAD_ IMM -- 34, 35, 36, 37 - | DLOAD_ IMM -- 38, 39, 40, 41 - | ALOAD_ IMM -- 42, 43, 44, 45 - | IALOAD -- 46 - | LALOAD -- 47 - | FALOAD -- 48 - | DALOAD -- 49 - | AALOAD -- 50 - | BALOAD -- 51 - | CALOAD -- 52 - | SALOAD -- 53 - | ISTORE Word8 -- 54 - | LSTORE Word8 -- 55 - | FSTORE Word8 -- 56 - | DSTORE Word8 -- 57 - | ASTORE Word8 -- 58 - | ISTORE_ IMM -- 59, 60, 61, 62 - | LSTORE_ IMM -- 63, 64, 65, 66 - | FSTORE_ IMM -- 67, 68, 69, 70 - | DSTORE_ IMM -- 71, 72, 73, 74 - | ASTORE_ IMM -- 75, 76, 77, 78 - | IASTORE -- 79 - | LASTORE -- 80 - | FASTORE -- 81 - | DASTORE -- 82 - | AASTORE -- 83 - | BASTORE -- 84 - | CASTORE -- 85 - | SASTORE -- 86 - | POP -- 87 - | POP2 -- 88 - | DUP -- 89 - | DUP_X1 -- 90 - | DUP_X2 -- 91 - | DUP2 -- 92 - | DUP2_X1 -- 93 - | DUP2_X2 -- 94 - | SWAP -- 95 - | IADD -- 96 - | LADD -- 97 - | FADD -- 98 - | DADD -- 99 - | ISUB -- 100 - | LSUB -- 101 - | FSUB -- 102 - | DSUB -- 103 - | IMUL -- 104 - | LMUL -- 105 - | FMUL -- 106 - | DMUL -- 107 - | IDIV -- 108 - | LDIV -- 109 - | FDIV -- 110 - | DDIV -- 111 - | IREM -- 112 - | LREM -- 113 - | FREM -- 114 - | DREM -- 115 - | INEG -- 116 - | LNEG -- 117 - | FNEG -- 118 - | DNEG -- 119 - | ISHL -- 120 - | LSHL -- 121 - | ISHR -- 122 - | LSHR -- 123 - | IUSHR -- 124 - | LUSHR -- 125 - | IAND -- 126 - | LAND -- 127 - | IOR -- 128 - | LOR -- 129 - | IXOR -- 130 - | LXOR -- 131 - | IINC Word8 Word8 -- 132 - | I2L -- 133 - | I2F -- 134 - | I2D -- 135 - | L2I -- 136 - | L2F -- 137 - | L2D -- 138 - | F2I -- 139 - | F2L -- 140 - | F2D -- 141 - | D2I -- 142 - | D2L -- 143 - | D2F -- 144 - | I2B -- 145 - | I2C -- 146 - | I2S -- 147 - | LCMP -- 148 - | FCMP CMP -- 149, 150 - | DCMP CMP -- 151, 152 - | IF CMP -- 153, 154, 155, 156, 157, 158 - | IF_ICMP CMP Word16 -- 159, 160, 161, 162, 163, 164 - | IF_ACMP CMP Word16 -- 165, 166 - | GOTO -- 167 - | JSR Word16 -- 168 - | RET -- 169 - | TABLESWITCH Word32 Word32 Word32 [Word32] -- 170 - | LOOKUPSWITCH Word32 Word32 [(Word32, Word32)] -- 171 - | IRETURN -- 172 - | LRETURN -- 173 - | FRETURN -- 174 - | DRETURN -- 175 - | RETURN -- 177 - | GETSTATIC Word16 -- 178 - | PUTSTATIC Word16 -- 179 - | GETFIELD Word16 -- 180 - | PUTFIELD Word16 -- 181 - | INVOKEVIRTUAL Word16 -- 182 - | INVOKESPECIAL Word16 -- 183 - | INVOKESTATIC Word16 -- 184 - | INVOKEINTERFACE Word16 Word8 -- 185 - | NEW Word16 -- 187 - | NEWARRAY Word8 -- 188, see ArrayType - | ANEWARRAY Word16 -- 189 - | ARRAYLENGTH -- 190 - | ATHROW -- 191 - | CHECKCAST Word16 -- 192 - | INSTANCEOF Word16 -- 193 - | MONITORENTER -- 194 - | MONITOREXIT -- 195 - | WIDE Word8 Instruction -- 196 - | MULTINANEWARRAY Word16 Word8 -- 197 - | IFNULL Word16 -- 198 - | IFNONNULL Word16 -- 199 - | GOTO_W Word32 -- 200 - | JSR_W Word32 -- 201 + NOP -- ^ 0 + | ACONST_NULL -- ^ 1 + | ICONST_M1 -- ^ 2 + | ICONST_0 -- ^ 3 + | ICONST_1 -- ^ 4 + | ICONST_2 -- ^ 5 + | ICONST_3 -- ^ 6 + | ICONST_4 -- ^ 7 + | ICONST_5 -- ^ 8 + | LCONST_0 -- ^ 9 + | LCONST_1 -- ^ 10 + | FCONST_0 -- ^ 11 + | FCONST_1 -- ^ 12 + | FCONST_2 -- ^ 13 + | DCONST_0 -- ^ 14 + | DCONST_1 -- ^ 15 + | BIPUSH Word8 -- ^ 16 + | SIPUSH Word16 -- ^ 17 + | LDC1 Word8 -- ^ 18 + | LDC2 Word16 -- ^ 19 + | LDC2W Word16 -- ^ 20 + | ILOAD Word8 -- ^ 21 + | LLOAD Word8 -- ^ 22 + | FLOAD Word8 -- ^ 23 + | DLOAD Word8 -- ^ 24 + | ALOAD Word8 -- ^ 25 + | ILOAD_ IMM -- ^ 26, 27, 28, 29 + | LLOAD_ IMM -- ^ 30, 31, 32, 33 + | FLOAD_ IMM -- ^ 34, 35, 36, 37 + | DLOAD_ IMM -- ^ 38, 39, 40, 41 + | ALOAD_ IMM -- ^ 42, 43, 44, 45 + | IALOAD -- ^ 46 + | LALOAD -- ^ 47 + | FALOAD -- ^ 48 + | DALOAD -- ^ 49 + | AALOAD -- ^ 50 + | BALOAD -- ^ 51 + | CALOAD -- ^ 52 + | SALOAD -- ^ 53 + | ISTORE Word8 -- ^ 54 + | LSTORE Word8 -- ^ 55 + | FSTORE Word8 -- ^ 56 + | DSTORE Word8 -- ^ 57 + | ASTORE Word8 -- ^ 58 + | ISTORE_ IMM -- ^ 59, 60, 61, 62 + | LSTORE_ IMM -- ^ 63, 64, 65, 66 + | FSTORE_ IMM -- ^ 67, 68, 69, 70 + | DSTORE_ IMM -- ^ 71, 72, 73, 74 + | ASTORE_ IMM -- ^ 75, 76, 77, 78 + | IASTORE -- ^ 79 + | LASTORE -- ^ 80 + | FASTORE -- ^ 81 + | DASTORE -- ^ 82 + | AASTORE -- ^ 83 + | BASTORE -- ^ 84 + | CASTORE -- ^ 85 + | SASTORE -- ^ 86 + | POP -- ^ 87 + | POP2 -- ^ 88 + | DUP -- ^ 89 + | DUP_X1 -- ^ 90 + | DUP_X2 -- ^ 91 + | DUP2 -- ^ 92 + | DUP2_X1 -- ^ 93 + | DUP2_X2 -- ^ 94 + | SWAP -- ^ 95 + | IADD -- ^ 96 + | LADD -- ^ 97 + | FADD -- ^ 98 + | DADD -- ^ 99 + | ISUB -- ^ 100 + | LSUB -- ^ 101 + | FSUB -- ^ 102 + | DSUB -- ^ 103 + | IMUL -- ^ 104 + | LMUL -- ^ 105 + | FMUL -- ^ 106 + | DMUL -- ^ 107 + | IDIV -- ^ 108 + | LDIV -- ^ 109 + | FDIV -- ^ 110 + | DDIV -- ^ 111 + | IREM -- ^ 112 + | LREM -- ^ 113 + | FREM -- ^ 114 + | DREM -- ^ 115 + | INEG -- ^ 116 + | LNEG -- ^ 117 + | FNEG -- ^ 118 + | DNEG -- ^ 119 + | ISHL -- ^ 120 + | LSHL -- ^ 121 + | ISHR -- ^ 122 + | LSHR -- ^ 123 + | IUSHR -- ^ 124 + | LUSHR -- ^ 125 + | IAND -- ^ 126 + | LAND -- ^ 127 + | IOR -- ^ 128 + | LOR -- ^ 129 + | IXOR -- ^ 130 + | LXOR -- ^ 131 + | IINC Word8 Word8 -- ^ 132 + | I2L -- ^ 133 + | I2F -- ^ 134 + | I2D -- ^ 135 + | L2I -- ^ 136 + | L2F -- ^ 137 + | L2D -- ^ 138 + | F2I -- ^ 139 + | F2L -- ^ 140 + | F2D -- ^ 141 + | D2I -- ^ 142 + | D2L -- ^ 143 + | D2F -- ^ 144 + | I2B -- ^ 145 + | I2C -- ^ 146 + | I2S -- ^ 147 + | LCMP -- ^ 148 + | FCMP CMP -- ^ 149, 150 + | DCMP CMP -- ^ 151, 152 + | IF CMP -- ^ 153, 154, 155, 156, 157, 158 + | IF_ICMP CMP Word16 -- ^ 159, 160, 161, 162, 163, 164 + | IF_ACMP CMP Word16 -- ^ 165, 166 + | GOTO -- ^ 167 + | JSR Word16 -- ^ 168 + | RET -- ^ 169 + | TABLESWITCH Word32 Word32 Word32 [Word32] -- ^ 170 + | LOOKUPSWITCH Word32 Word32 [(Word32, Word32)] -- ^ 171 + | IRETURN -- ^ 172 + | LRETURN -- ^ 173 + | FRETURN -- ^ 174 + | DRETURN -- ^ 175 + | RETURN -- ^ 177 + | GETSTATIC Word16 -- ^ 178 + | PUTSTATIC Word16 -- ^ 179 + | GETFIELD Word16 -- ^ 180 + | PUTFIELD Word16 -- ^ 181 + | INVOKEVIRTUAL Word16 -- ^ 182 + | INVOKESPECIAL Word16 -- ^ 183 + | INVOKESTATIC Word16 -- ^ 184 + | INVOKEINTERFACE Word16 Word8 -- ^ 185 + | NEW Word16 -- ^ 187 + | NEWARRAY Word8 -- ^ 188, see @ArrayType@ + | ANEWARRAY Word16 -- ^ 189 + | ARRAYLENGTH -- ^ 190 + | ATHROW -- ^ 191 + | CHECKCAST Word16 -- ^ 192 + | INSTANCEOF Word16 -- ^ 193 + | MONITORENTER -- ^ 194 + | MONITOREXIT -- ^ 195 + | WIDE Word8 Instruction -- ^ 196 + | MULTINANEWARRAY Word16 Word8 -- ^ 197 + | IFNULL Word16 -- ^ 198 + | IFNONNULL Word16 -- ^ 199 + | GOTO_W Word32 -- ^ 200 + | JSR_W Word32 -- ^ 201 deriving (Eq, Show) +-- ^ JVM array type (primitive types) data ArrayType = - T_BOOLEAN -- 4 - | T_CHAR -- 5 - | T_FLOAT -- 6 - | T_DOUBLE -- 7 - | T_BYTE -- 8 - | T_SHORT -- 9 - | T_INT -- 10 - | T_LONG -- 11 + T_BOOLEAN -- ^ 4 + | T_CHAR -- ^ 5 + | T_FLOAT -- ^ 6 + | T_DOUBLE -- ^ 7 + | T_BYTE -- ^ 8 + | T_SHORT -- ^ 9 + | T_INT -- ^ 10 + | T_LONG -- ^ 11 deriving (Eq, Show, Enum) -imm :: Word8 -> (IMM -> Instruction) -> Word8 -> GetState s Instruction +-- ^ Parse opcode with immediate constant +imm :: Word8 -- ^ Base opcode + -> (IMM -> Instruction) -- ^ Instruction constructor + -> Word8 -- ^ Opcode to parse + -> GetState s Instruction imm base constr x = return $ constr $ toEnum $ fromIntegral (x-base) -putImm :: Word8 -> IMM -> PutState Integer () +-- ^ Put opcode with immediate constant +putImm :: Word8 -- ^ Base opcode + -> IMM -- ^ Constant to add to opcode + -> PutState Integer () putImm base i = putByte $ base + (fromIntegral $ fromEnum i) atype2byte :: ArrayType -> Word8 @@ -309,12 +333,20 @@ instance BinaryState Integer ArrayType where put t = putByte (atype2byte t) -put1 :: (BinaryState Integer a) => Word8 -> a -> PutState Integer () +-- ^ Put opcode with one argument +put1 :: (BinaryState Integer a) + => Word8 -- ^ Opcode + -> a -- ^ First argument + -> PutState Integer () put1 code x = do putByte code put x -put2 :: (BinaryState Integer a, BinaryState Integer b) => Word8 -> a -> b -> PutState Integer () +put2 :: (BinaryState Integer a, BinaryState Integer b) + => Word8 -- ^ Opcode + -> a -- ^ First argument + -> b -- ^ Second argument + -> PutState Integer () put2 code x y = do putByte code put x diff --git a/JVM/ClassFile.hs b/JVM/ClassFile.hs index 3a9f3bf..9546292 100644 --- a/JVM/ClassFile.hs +++ b/JVM/ClassFile.hs @@ -1,4 +1,6 @@ {-# LANGUAGE RecordWildCards, BangPatterns #-} +-- | This module declares (low-level) data types for Java .class files +-- structures, and Binary instances to read/write them. module JVM.ClassFile where import Control.Monad @@ -12,45 +14,33 @@ import Data.Char import Data.List import qualified Data.ByteString.Lazy as B -import Debug.Trace - -traceS :: (Show a) => String -> a -> a -traceS msg x = trace (msg ++ ": " ++ show x) x - -char :: Word8 -> Char -char n = chr (fromIntegral n) - +-- | Read one-byte Char getChar8 :: Get Char getChar8 = do x <- getWord8 - return (char x) + return $ chr (fromIntegral x) +-- | Generic .class file format data ClassFile = ClassFile { - magic :: Word32, + magic :: Word32, -- ^ Magic value: 0xCAFEBABE minorVersion :: Word16, majorVersion :: Word16, - constsPoolSize :: Word16, - constsPool :: [CpInfo], - accessFlags :: Word16, - thisClass :: Word16, - superClass :: Word16, - interfacesCount :: Word16, - interfaces :: [Word16], - classFieldsCount :: Word16, - classFields :: [FieldInfo], - classMethodsCount :: Word16, - classMethods :: [MethodInfo], - classAttributesCount :: Word16, - classAttributes :: [AttributeInfo] + constsPoolSize :: Word16, -- ^ Number of items in constants pool + constsPool :: [CpInfo], -- ^ Constants pool itself + accessFlags :: Word16, -- ^ See @JVM.Types.AccessFlag@ + thisClass :: Word16, -- ^ Constants pool item index for this class + superClass :: Word16, -- ^ --/-- for super class, zero for java.lang.Object + interfacesCount :: Word16, -- ^ Number of implemented interfaces + interfaces :: [Word16], -- ^ Constants pool item indexes for implemented interfaces + classFieldsCount :: Word16, -- ^ Number of class fileds + classFields :: [FieldInfo], -- ^ Class fields + classMethodsCount :: Word16, -- ^ Number of class methods + classMethods :: [MethodInfo], -- ^ Class methods + classAttributesCount :: Word16, -- ^ Number of class attributes + classAttributes :: [AttributeInfo] -- ^ Class attributes } deriving (Eq, Show) -traceM msg x = do - r <- x - return $ traceS msg r - -replicateMT n m = replicateM n (traceM ">" m) - instance Binary ClassFile where put (ClassFile {..}) = do put magic @@ -90,17 +80,18 @@ instance Binary ClassFile where return $ ClassFile magic minor major poolsize pool af this super interfacesCount ifaces classFieldsCount classFields classMethodsCount classMethods asCount as +-- | Field signature format data FieldType = - SignedByte -- B - | CharByte -- C - | DoubleType -- D - | FloatType -- F - | IntType -- I - | LongInt -- J - | ShortInt -- S - | BoolType -- Z - | ObjectType String -- L - | Array (Maybe Int) FieldType + SignedByte -- ^ B + | CharByte -- ^ C + | DoubleType -- ^ D + | FloatType -- ^ F + | IntType -- ^ I + | LongInt -- ^ J + | ShortInt -- ^ S + | BoolType -- ^ Z + | ObjectType String -- ^ L + | Array (Maybe Int) FieldType -- ^ [ deriving (Eq) instance Show FieldType where @@ -116,8 +107,10 @@ instance Show FieldType where show (Array Nothing t) = show t ++ "[]" show (Array (Just n) t) = show t ++ "[" ++ show n ++ "]" +-- | Class field signature type FieldSignature = FieldType +-- | Try to read integer value from decimal representation getInt :: Get (Maybe Int) getInt = do s <- getDigits @@ -168,6 +161,7 @@ instance Binary FieldType where return (Array mbSize sig) _ -> fail $ "Unknown signature opening symbol: " ++ [b] +-- | Read string up to `;' getToSemicolon :: Get String getToSemicolon = do x <- get @@ -177,6 +171,7 @@ getToSemicolon = do next <- getToSemicolon return (x: next) +-- | Return value signature data ReturnSignature = Returns FieldType | ReturnsVoid @@ -196,8 +191,10 @@ instance Binary ReturnSignature where 'V' -> skip 1 >> return ReturnsVoid _ -> Returns <$> get +-- | Method argument signature type ArgumentSignature = FieldType +-- | Class method argument signature data MethodSignature = MethodSignature [ArgumentSignature] ReturnSignature deriving (Eq) @@ -223,6 +220,7 @@ instance Binary MethodSignature where ret <- get return (MethodSignature args ret) +-- | Read arguments signatures (up to `)') getArgs :: Get [ArgumentSignature] getArgs = whileJust getArg where @@ -242,19 +240,20 @@ whileJust m = do return (x: next) Nothing -> return [] +-- | Constant pool item format data CpInfo = - CONSTANT_Class {nameIndex :: Word16} -- 7 - | CONSTANT_Fieldref {classIndex :: Word16, nameAndTypeIndex :: Word16} -- 9 - | CONSTANT_Methodref {classIndex :: Word16, nameAndTypeIndex :: Word16} -- 10 - | CONSTANT_InterfaceMethodref {classIndex :: Word16, nameAndTypeIndex :: Word16}-- 11 - | CONSTANT_String {stringIndex :: Word16} -- 8 - | CONSTANT_Integer {fourBytes :: Word32} -- 3 - | CONSTANT_Float Float -- 4 - | CONSTANT_Long Word64 -- 5 - | CONSTANT_Double Double -- 6 - | CONSTANT_NameAndType {nameIndex :: Word16, signatureIndex :: Word16} -- 12 - | CONSTANT_Utf8 {stringLength :: Word16, stringBytes :: B.ByteString} -- 1 - | CONSTANT_Unicode {stringLength :: Word16, stringBytes :: B.ByteString} -- 2 + CONSTANT_Class {nameIndex :: Word16} -- ^ 7 + | CONSTANT_Fieldref {classIndex :: Word16, nameAndTypeIndex :: Word16} -- ^ 9 + | CONSTANT_Methodref {classIndex :: Word16, nameAndTypeIndex :: Word16} -- ^ 10 + | CONSTANT_InterfaceMethodref {classIndex :: Word16, nameAndTypeIndex :: Word16}-- ^ 11 + | CONSTANT_String {stringIndex :: Word16} -- ^ 8 + | CONSTANT_Integer {fourBytes :: Word32} -- ^ 3 + | CONSTANT_Float Float -- ^ 4 + | CONSTANT_Long Word64 -- ^ 5 + | CONSTANT_Double Double -- ^ 6 + | CONSTANT_NameAndType {nameIndex :: Word16, signatureIndex :: Word16} -- ^ 12 + | CONSTANT_Utf8 {stringLength :: Word16, stringBytes :: B.ByteString} -- ^ 1 + | CONSTANT_Unicode {stringLength :: Word16, stringBytes :: B.ByteString} -- ^ 2 deriving (Eq, Show) instance Binary CpInfo where @@ -295,6 +294,7 @@ instance Binary CpInfo where 12 -> CONSTANT_NameAndType <$> get <*> get _ -> fail $ "Unknown constants pool entry tag: " ++ show tag +-- | Class field format data FieldInfo = FieldInfo { fieldAccessFlags :: Word16, fieldNameIndex :: Word16, @@ -319,6 +319,7 @@ instance Binary FieldInfo where as <- replicateM (fromIntegral n) get return $ FieldInfo af ni si n as +-- | Class method format data MethodInfo = MethodInfo { methodAccessFlags :: Word16, methodNameIndex :: Word16, @@ -344,6 +345,8 @@ instance Binary MethodInfo where as <- replicateM (fromIntegral n) get return $ MethodInfo af ni si n as +-- | Any (class/ field/ method/ ...) attribute format. +-- Some formats specify special formats for @attributeValue@. data AttributeInfo = AttributeInfo { attributeName :: Word16, attributeLength :: Word32, diff --git a/JVM/Converter.hs b/JVM/Converter.hs index 6f16e31..1bc8e85 100644 --- a/JVM/Converter.hs +++ b/JVM/Converter.hs @@ -1,5 +1,14 @@ {-# LANGUAGE TypeFamilies, StandaloneDeriving, FlexibleInstances, FlexibleContexts, UndecidableInstances, RecordWildCards, OverloadedStrings #-} -module JVM.Converter where +-- | Functions to convert from low-level .class format representation and +-- high-level Java classes, methods etc representation +module JVM.Converter + (decompile, decompileFile, + convertClass, + methodByName, + attrByName, + methodCode + ) + where import Data.List import Data.Word @@ -17,9 +26,11 @@ import Debug.Trace import JVM.ClassFile import JVM.Types +-- | Parse .class file data decompile :: B.ByteString -> Class decompile bstr = convertClass $ decode bstr +-- | Parse class data from file decompileFile :: FilePath -> IO Class decompileFile path = convertClass `fmap` decodeFile path @@ -62,8 +73,6 @@ constantPoolArray list = pool convert (CONSTANT_Utf8 _ bs) = CUTF8 bs convert (CONSTANT_Unicode _ bs) = CUnicode bs -className' x = trace ("Class name: " ++ show x) B.empty - convertAccess :: Word16 -> Access convertAccess w = S.fromList $ concat $ zipWith (\i f -> if testBit w i then [f] else []) [0..] $ [ ACC_PUBLIC, @@ -98,14 +107,19 @@ convertAttrs pool attrs = M.fromList $ map go attrs go (AttributeInfo {..}) = (getString $ pool ! attributeName, attributeValue) +-- | Try to get class method by name methodByName :: Class -> B.ByteString -> Maybe Method methodByName cls name = find (\m -> methodName m == name) (methods cls) +-- | Try to get object attribute by name attrByName :: (HasAttributes a) => a -> B.ByteString -> Maybe B.ByteString attrByName x name = M.lookup name (attributes x) -methodCode :: Class -> B.ByteString -> Maybe B.ByteString +-- | Try to get Code for class method (no Code for interface methods) +methodCode :: Class + -> B.ByteString -- ^ Method name + -> Maybe B.ByteString methodCode cls name = do method <- methodByName cls name attrByName method "Code" diff --git a/JVM/Types.hs b/JVM/Types.hs index 834af21..f54a8ba 100644 --- a/JVM/Types.hs +++ b/JVM/Types.hs @@ -1,4 +1,5 @@ {-# LANGUAGE TypeFamilies, StandaloneDeriving, FlexibleInstances, FlexibleContexts, UndecidableInstances #-} +-- | This module declares `high-level' data types for Java classes, methods etc. module JVM.Types where import Codec.Binary.UTF8.String hiding (encode, decode) @@ -18,11 +19,13 @@ instance IsString B.ByteString where toString :: B.ByteString -> String toString bstr = decodeString $ map (chr . fromIntegral) $ B.unpack bstr +-- | Constant pool type Pool = Array Word16 Constant class HasAttributes a where attributes :: a -> Attributes +-- | Java class data Class = Class { constantPool :: Pool, classAccess :: Access, @@ -41,6 +44,7 @@ instance HasAttributes Class where class HasSignature a where type Signature a +-- | Name and signature pair. Used for methods and fields. data NameType a = NameType { ntName :: B.ByteString, ntSignature :: Signature a } @@ -50,6 +54,7 @@ instance Show (Signature a) => Show (NameType a) where deriving instance Eq (Signature a) => Eq (NameType a) +-- | Constant pool item data Constant = CClass {className :: B.ByteString} | CField {refClass :: B.ByteString, fieldNameType :: NameType Field} @@ -79,6 +84,7 @@ instance Show Constant where show (CUTF8 s) = "UTF8 \"" ++ toString s ++ "\"" show (CUnicode s) = "Unicode \"" ++ toString s ++ "\"" +-- | Class field data Field = Field { fieldAccess :: Access, fieldName :: B.ByteString, @@ -92,6 +98,7 @@ instance HasSignature Field where instance HasAttributes Field where attributes = fieldAttrs +-- | Class method data Method = Method { methodAccess :: Access, methodName :: B.ByteString, @@ -105,30 +112,30 @@ instance HasSignature Method where instance HasAttributes Method where attributes = methodAttrs +-- | Set of access flags type Access = S.Set AccessFlag +-- | Access flags. Used for classess, methods, variables. data AccessFlag = - ACC_PUBLIC -- 0x0001 Видимый для всех Класс, Метод, Переменная - | ACC_PRIVATE -- 0x0002 Видимый только для определяемого класса Метод, Переменная - | ACC_PROTECTED -- 0x0004 Видимый для подклассов Метод, Переменная - | ACC_STATIC -- 0x0008 Переменная или метод статические Метод, Переменная - | ACC_FINAL -- 0x0010 Нет дальнейшей подкласификации, обхода или присваивания после инициализации Класс, Метод, Переменная - | ACC_SYNCHRONIZED -- 0x0020 Использует возврат в блокировке монитора Метод - | ACC_VOLATILE -- 0x0040 Не может помещать в кеш Переменная - | ACC_TRANSIENT -- 0x0080 Не может боть написан или прочитан постоянным объектом управления Перемення - | ACC_NATIVE -- 0x0100 Реализован в других языках Метод - | ACC_INTERFACE -- 0x0200 интерфейс Класс - | ACC_ABSTRACT -- 0x0400 Ничего не предусматривает Класс, Метод + ACC_PUBLIC -- 0x0001 Visible for all + | ACC_PRIVATE -- 0x0002 Visible only for defined class + | ACC_PROTECTED -- 0x0004 Visible only for subclasses + | ACC_STATIC -- 0x0008 Static method or variable + | ACC_FINAL -- 0x0010 No further subclassing or assignments + | ACC_SYNCHRONIZED -- 0x0020 Uses monitors + | ACC_VOLATILE -- 0x0040 Could not be cached + | ACC_TRANSIENT -- 0x0080 + | ACC_NATIVE -- 0x0100 Implemented in other language + | ACC_INTERFACE -- 0x0200 Class is interface + | ACC_ABSTRACT -- 0x0400 deriving (Eq, Show, Ord) +-- | Generic attribute data Attribute = Attribute { attrName :: B.ByteString, attrValue :: B.ByteString } deriving (Eq, Show) -class AttributeValue a where - decodeAttribute :: B.ByteString -> a - encodeAttribute :: a -> B.ByteString - +-- | Set of attributes type Attributes = M.Map B.ByteString B.ByteString -- 2.25.1