-{-# LANGUAGE TypeFamilies, StandaloneDeriving, FlexibleInstances, FlexibleContexts, UndecidableInstances, RecordWildCards, OverloadedStrings, TypeSynonymInstances, MultiParamTypeClasses #-}
-module JVM.Assembler where
+{-# LANGUAGE TypeFamilies, StandaloneDeriving, FlexibleInstances,
+ FlexibleContexts, UndecidableInstances, RecordWildCards, OverloadedStrings,
+ TypeSynonymInstances, MultiParamTypeClasses #-}
+-- | This module declares data type for JVM instructions, and BinaryState
+-- instances to read/write them.
+module JVM.Assembler
+ (Instruction (..),
+ ArrayType (..),
+ Code (..),
+ IMM (..),
+ CMP (..)
+ )
+ where
import Control.Monad
import Control.Applicative
import Data.Ix (inRange)
-import Data.List
+import Data.List (intercalate)
import Data.Word
import Data.Bits
import qualified Data.Binary as Binary
import qualified Data.Binary.Get as Get
import Data.Char
-import Data.String
import qualified Data.ByteString.Lazy as B
import Data.Array
import qualified Data.Set as S
import JVM.ClassFile
import JVM.Types
+-- | Immediate constant. Corresponding value will be added to base opcode.
data IMM =
- I0
- | I1
- | I2
- | I3
+ I0 -- ^ 0
+ | I1 -- ^ 1
+ | I2 -- ^ 2
+ | I3 -- ^ 3
deriving (Eq, Ord, Enum, Show)
+-- | Comparation operation type. Not all CMP instructions support all operations.
data CMP =
C_EQ
| C_NE
| C_LE
deriving (Eq, Ord, Enum, Show)
+-- | Format of Code method attribute.
data Code = Code {
codeStackSize :: Word16,
codeMaxLocals :: Word16,
codeAttributes :: [AttributeInfo] }
deriving (Eq, Show)
+-- | Exception descriptor
data CodeException = CodeException {
eStartPC :: Word16,
eEndPC :: Word16,
attrs <- replicateM (fromIntegral nAttrs) get
return $ Code stackSz locals len code excn excs nAttrs attrs
+-- | Read sequence of instructions (to end of stream)
readInstructions :: GetState Integer [Instruction]
readInstructions = do
end <- isEmpty
next <- readInstructions
return (x: next)
+-- | JVM instruction set
data Instruction =
- NOP -- 0
- | ACONST_NULL -- 1
- | ICONST_M1 -- 2
- | ICONST_0 -- 3
- | ICONST_1 -- 4
- | ICONST_2 -- 5
- | ICONST_3 -- 6
- | ICONST_4 -- 7
- | ICONST_5 -- 8
- | LCONST_0 -- 9
- | LCONST_1 -- 10
- | FCONST_0 -- 11
- | FCONST_1 -- 12
- | FCONST_2 -- 13
- | DCONST_0 -- 14
- | DCONST_1 -- 15
- | BIPUSH Word8 -- 16
- | SIPUSH Word16 -- 17
- | LDC1 Word8 -- 18
- | LDC2 Word16 -- 19
- | LDC2W Word16 -- 20
- | ILOAD Word8 -- 21
- | LLOAD Word8 -- 22
- | FLOAD Word8 -- 23
- | DLOAD Word8 -- 24
- | ALOAD Word8 -- 25
- | ILOAD_ IMM -- 26, 27, 28, 29
- | LLOAD_ IMM -- 30, 31, 32, 33
- | FLOAD_ IMM -- 34, 35, 36, 37
- | DLOAD_ IMM -- 38, 39, 40, 41
- | ALOAD_ IMM -- 42, 43, 44, 45
- | IALOAD -- 46
- | LALOAD -- 47
- | FALOAD -- 48
- | DALOAD -- 49
- | AALOAD -- 50
- | BALOAD -- 51
- | CALOAD -- 52
- | SALOAD -- 53
- | ISTORE Word8 -- 54
- | LSTORE Word8 -- 55
- | FSTORE Word8 -- 56
- | DSTORE Word8 -- 57
- | ASTORE Word8 -- 58
- | ISTORE_ IMM -- 59, 60, 61, 62
- | LSTORE_ IMM -- 63, 64, 65, 66
- | FSTORE_ IMM -- 67, 68, 69, 70
- | DSTORE_ IMM -- 71, 72, 73, 74
- | ASTORE_ IMM -- 75, 76, 77, 78
- | IASTORE -- 79
- | LASTORE -- 80
- | FASTORE -- 81
- | DASTORE -- 82
- | AASTORE -- 83
- | BASTORE -- 84
- | CASTORE -- 85
- | SASTORE -- 86
- | POP -- 87
- | POP2 -- 88
- | DUP -- 89
- | DUP_X1 -- 90
- | DUP_X2 -- 91
- | DUP2 -- 92
- | DUP2_X1 -- 93
- | DUP2_X2 -- 94
- | SWAP -- 95
- | IADD -- 96
- | LADD -- 97
- | FADD -- 98
- | DADD -- 99
- | ISUB -- 100
- | LSUB -- 101
- | FSUB -- 102
- | DSUB -- 103
- | IMUL -- 104
- | LMUL -- 105
- | FMUL -- 106
- | DMUL -- 107
- | IDIV -- 108
- | LDIV -- 109
- | FDIV -- 110
- | DDIV -- 111
- | IREM -- 112
- | LREM -- 113
- | FREM -- 114
- | DREM -- 115
- | INEG -- 116
- | LNEG -- 117
- | FNEG -- 118
- | DNEG -- 119
- | ISHL -- 120
- | LSHL -- 121
- | ISHR -- 122
- | LSHR -- 123
- | IUSHR -- 124
- | LUSHR -- 125
- | IAND -- 126
- | LAND -- 127
- | IOR -- 128
- | LOR -- 129
- | IXOR -- 130
- | LXOR -- 131
- | IINC Word8 Word8 -- 132
- | I2L -- 133
- | I2F -- 134
- | I2D -- 135
- | L2I -- 136
- | L2F -- 137
- | L2D -- 138
- | F2I -- 139
- | F2L -- 140
- | F2D -- 141
- | D2I -- 142
- | D2L -- 143
- | D2F -- 144
- | I2B -- 145
- | I2C -- 146
- | I2S -- 147
- | LCMP -- 148
- | FCMP CMP -- 149, 150
- | DCMP CMP -- 151, 152
- | IF CMP -- 153, 154, 155, 156, 157, 158
- | IF_ICMP CMP Word16 -- 159, 160, 161, 162, 163, 164
- | IF_ACMP CMP Word16 -- 165, 166
- | GOTO -- 167
- | JSR Word16 -- 168
- | RET -- 169
- | TABLESWITCH Word32 Word32 Word32 [Word32] -- 170
- | LOOKUPSWITCH Word32 Word32 [(Word32, Word32)] -- 171
- | IRETURN -- 172
- | LRETURN -- 173
- | FRETURN -- 174
- | DRETURN -- 175
- | RETURN -- 177
- | GETSTATIC Word16 -- 178
- | PUTSTATIC Word16 -- 179
- | GETFIELD Word16 -- 180
- | PUTFIELD Word16 -- 181
- | INVOKEVIRTUAL Word16 -- 182
- | INVOKESPECIAL Word16 -- 183
- | INVOKESTATIC Word16 -- 184
- | INVOKEINTERFACE Word16 Word8 -- 185
- | NEW Word16 -- 187
- | NEWARRAY Word8 -- 188, see ArrayType
- | ANEWARRAY Word16 -- 189
- | ARRAYLENGTH -- 190
- | ATHROW -- 191
- | CHECKCAST Word16 -- 192
- | INSTANCEOF Word16 -- 193
- | MONITORENTER -- 194
- | MONITOREXIT -- 195
- | WIDE Word8 Instruction -- 196
- | MULTINANEWARRAY Word16 Word8 -- 197
- | IFNULL Word16 -- 198
- | IFNONNULL Word16 -- 199
- | GOTO_W Word32 -- 200
- | JSR_W Word32 -- 201
+ NOP -- ^ 0
+ | ACONST_NULL -- ^ 1
+ | ICONST_M1 -- ^ 2
+ | ICONST_0 -- ^ 3
+ | ICONST_1 -- ^ 4
+ | ICONST_2 -- ^ 5
+ | ICONST_3 -- ^ 6
+ | ICONST_4 -- ^ 7
+ | ICONST_5 -- ^ 8
+ | LCONST_0 -- ^ 9
+ | LCONST_1 -- ^ 10
+ | FCONST_0 -- ^ 11
+ | FCONST_1 -- ^ 12
+ | FCONST_2 -- ^ 13
+ | DCONST_0 -- ^ 14
+ | DCONST_1 -- ^ 15
+ | BIPUSH Word8 -- ^ 16
+ | SIPUSH Word16 -- ^ 17
+ | LDC1 Word8 -- ^ 18
+ | LDC2 Word16 -- ^ 19
+ | LDC2W Word16 -- ^ 20
+ | ILOAD Word8 -- ^ 21
+ | LLOAD Word8 -- ^ 22
+ | FLOAD Word8 -- ^ 23
+ | DLOAD Word8 -- ^ 24
+ | ALOAD Word8 -- ^ 25
+ | ILOAD_ IMM -- ^ 26, 27, 28, 29
+ | LLOAD_ IMM -- ^ 30, 31, 32, 33
+ | FLOAD_ IMM -- ^ 34, 35, 36, 37
+ | DLOAD_ IMM -- ^ 38, 39, 40, 41
+ | ALOAD_ IMM -- ^ 42, 43, 44, 45
+ | IALOAD -- ^ 46
+ | LALOAD -- ^ 47
+ | FALOAD -- ^ 48
+ | DALOAD -- ^ 49
+ | AALOAD -- ^ 50
+ | BALOAD -- ^ 51
+ | CALOAD -- ^ 52
+ | SALOAD -- ^ 53
+ | ISTORE Word8 -- ^ 54
+ | LSTORE Word8 -- ^ 55
+ | FSTORE Word8 -- ^ 56
+ | DSTORE Word8 -- ^ 57
+ | ASTORE Word8 -- ^ 58
+ | ISTORE_ IMM -- ^ 59, 60, 61, 62
+ | LSTORE_ IMM -- ^ 63, 64, 65, 66
+ | FSTORE_ IMM -- ^ 67, 68, 69, 70
+ | DSTORE_ IMM -- ^ 71, 72, 73, 74
+ | ASTORE_ IMM -- ^ 75, 76, 77, 78
+ | IASTORE -- ^ 79
+ | LASTORE -- ^ 80
+ | FASTORE -- ^ 81
+ | DASTORE -- ^ 82
+ | AASTORE -- ^ 83
+ | BASTORE -- ^ 84
+ | CASTORE -- ^ 85
+ | SASTORE -- ^ 86
+ | POP -- ^ 87
+ | POP2 -- ^ 88
+ | DUP -- ^ 89
+ | DUP_X1 -- ^ 90
+ | DUP_X2 -- ^ 91
+ | DUP2 -- ^ 92
+ | DUP2_X1 -- ^ 93
+ | DUP2_X2 -- ^ 94
+ | SWAP -- ^ 95
+ | IADD -- ^ 96
+ | LADD -- ^ 97
+ | FADD -- ^ 98
+ | DADD -- ^ 99
+ | ISUB -- ^ 100
+ | LSUB -- ^ 101
+ | FSUB -- ^ 102
+ | DSUB -- ^ 103
+ | IMUL -- ^ 104
+ | LMUL -- ^ 105
+ | FMUL -- ^ 106
+ | DMUL -- ^ 107
+ | IDIV -- ^ 108
+ | LDIV -- ^ 109
+ | FDIV -- ^ 110
+ | DDIV -- ^ 111
+ | IREM -- ^ 112
+ | LREM -- ^ 113
+ | FREM -- ^ 114
+ | DREM -- ^ 115
+ | INEG -- ^ 116
+ | LNEG -- ^ 117
+ | FNEG -- ^ 118
+ | DNEG -- ^ 119
+ | ISHL -- ^ 120
+ | LSHL -- ^ 121
+ | ISHR -- ^ 122
+ | LSHR -- ^ 123
+ | IUSHR -- ^ 124
+ | LUSHR -- ^ 125
+ | IAND -- ^ 126
+ | LAND -- ^ 127
+ | IOR -- ^ 128
+ | LOR -- ^ 129
+ | IXOR -- ^ 130
+ | LXOR -- ^ 131
+ | IINC Word8 Word8 -- ^ 132
+ | I2L -- ^ 133
+ | I2F -- ^ 134
+ | I2D -- ^ 135
+ | L2I -- ^ 136
+ | L2F -- ^ 137
+ | L2D -- ^ 138
+ | F2I -- ^ 139
+ | F2L -- ^ 140
+ | F2D -- ^ 141
+ | D2I -- ^ 142
+ | D2L -- ^ 143
+ | D2F -- ^ 144
+ | I2B -- ^ 145
+ | I2C -- ^ 146
+ | I2S -- ^ 147
+ | LCMP -- ^ 148
+ | FCMP CMP -- ^ 149, 150
+ | DCMP CMP -- ^ 151, 152
+ | IF CMP -- ^ 153, 154, 155, 156, 157, 158
+ | IF_ICMP CMP Word16 -- ^ 159, 160, 161, 162, 163, 164
+ | IF_ACMP CMP Word16 -- ^ 165, 166
+ | GOTO -- ^ 167
+ | JSR Word16 -- ^ 168
+ | RET -- ^ 169
+ | TABLESWITCH Word32 Word32 Word32 [Word32] -- ^ 170
+ | LOOKUPSWITCH Word32 Word32 [(Word32, Word32)] -- ^ 171
+ | IRETURN -- ^ 172
+ | LRETURN -- ^ 173
+ | FRETURN -- ^ 174
+ | DRETURN -- ^ 175
+ | RETURN -- ^ 177
+ | GETSTATIC Word16 -- ^ 178
+ | PUTSTATIC Word16 -- ^ 179
+ | GETFIELD Word16 -- ^ 180
+ | PUTFIELD Word16 -- ^ 181
+ | INVOKEVIRTUAL Word16 -- ^ 182
+ | INVOKESPECIAL Word16 -- ^ 183
+ | INVOKESTATIC Word16 -- ^ 184
+ | INVOKEINTERFACE Word16 Word8 -- ^ 185
+ | NEW Word16 -- ^ 187
+ | NEWARRAY Word8 -- ^ 188, see @ArrayType@
+ | ANEWARRAY Word16 -- ^ 189
+ | ARRAYLENGTH -- ^ 190
+ | ATHROW -- ^ 191
+ | CHECKCAST Word16 -- ^ 192
+ | INSTANCEOF Word16 -- ^ 193
+ | MONITORENTER -- ^ 194
+ | MONITOREXIT -- ^ 195
+ | WIDE Word8 Instruction -- ^ 196
+ | MULTINANEWARRAY Word16 Word8 -- ^ 197
+ | IFNULL Word16 -- ^ 198
+ | IFNONNULL Word16 -- ^ 199
+ | GOTO_W Word32 -- ^ 200
+ | JSR_W Word32 -- ^ 201
deriving (Eq, Show)
+-- ^ JVM array type (primitive types)
data ArrayType =
- T_BOOLEAN -- 4
- | T_CHAR -- 5
- | T_FLOAT -- 6
- | T_DOUBLE -- 7
- | T_BYTE -- 8
- | T_SHORT -- 9
- | T_INT -- 10
- | T_LONG -- 11
+ T_BOOLEAN -- ^ 4
+ | T_CHAR -- ^ 5
+ | T_FLOAT -- ^ 6
+ | T_DOUBLE -- ^ 7
+ | T_BYTE -- ^ 8
+ | T_SHORT -- ^ 9
+ | T_INT -- ^ 10
+ | T_LONG -- ^ 11
deriving (Eq, Show, Enum)
-imm :: Word8 -> (IMM -> Instruction) -> Word8 -> GetState s Instruction
+-- ^ Parse opcode with immediate constant
+imm :: Word8 -- ^ Base opcode
+ -> (IMM -> Instruction) -- ^ Instruction constructor
+ -> Word8 -- ^ Opcode to parse
+ -> GetState s Instruction
imm base constr x = return $ constr $ toEnum $ fromIntegral (x-base)
-putImm :: Word8 -> IMM -> PutState Integer ()
+-- ^ Put opcode with immediate constant
+putImm :: Word8 -- ^ Base opcode
+ -> IMM -- ^ Constant to add to opcode
+ -> PutState Integer ()
putImm base i = putByte $ base + (fromIntegral $ fromEnum i)
atype2byte :: ArrayType -> Word8
put t = putByte (atype2byte t)
-put1 :: (BinaryState Integer a) => Word8 -> a -> PutState Integer ()
+-- ^ Put opcode with one argument
+put1 :: (BinaryState Integer a)
+ => Word8 -- ^ Opcode
+ -> a -- ^ First argument
+ -> PutState Integer ()
put1 code x = do
putByte code
put x
-put2 :: (BinaryState Integer a, BinaryState Integer b) => Word8 -> a -> b -> PutState Integer ()
+put2 :: (BinaryState Integer a, BinaryState Integer b)
+ => Word8 -- ^ Opcode
+ -> a -- ^ First argument
+ -> b -- ^ Second argument
+ -> PutState Integer ()
put2 code x y = do
putByte code
put x
{-# LANGUAGE RecordWildCards, BangPatterns #-}
+-- | This module declares (low-level) data types for Java .class files
+-- structures, and Binary instances to read/write them.
module JVM.ClassFile where
import Control.Monad
import Data.List
import qualified Data.ByteString.Lazy as B
-import Debug.Trace
-
-traceS :: (Show a) => String -> a -> a
-traceS msg x = trace (msg ++ ": " ++ show x) x
-
-char :: Word8 -> Char
-char n = chr (fromIntegral n)
-
+-- | Read one-byte Char
getChar8 :: Get Char
getChar8 = do
x <- getWord8
- return (char x)
+ return $ chr (fromIntegral x)
+-- | Generic .class file format
data ClassFile = ClassFile {
- magic :: Word32,
+ magic :: Word32, -- ^ Magic value: 0xCAFEBABE
minorVersion :: Word16,
majorVersion :: Word16,
- constsPoolSize :: Word16,
- constsPool :: [CpInfo],
- accessFlags :: Word16,
- thisClass :: Word16,
- superClass :: Word16,
- interfacesCount :: Word16,
- interfaces :: [Word16],
- classFieldsCount :: Word16,
- classFields :: [FieldInfo],
- classMethodsCount :: Word16,
- classMethods :: [MethodInfo],
- classAttributesCount :: Word16,
- classAttributes :: [AttributeInfo]
+ constsPoolSize :: Word16, -- ^ Number of items in constants pool
+ constsPool :: [CpInfo], -- ^ Constants pool itself
+ accessFlags :: Word16, -- ^ See @JVM.Types.AccessFlag@
+ thisClass :: Word16, -- ^ Constants pool item index for this class
+ superClass :: Word16, -- ^ --/-- for super class, zero for java.lang.Object
+ interfacesCount :: Word16, -- ^ Number of implemented interfaces
+ interfaces :: [Word16], -- ^ Constants pool item indexes for implemented interfaces
+ classFieldsCount :: Word16, -- ^ Number of class fileds
+ classFields :: [FieldInfo], -- ^ Class fields
+ classMethodsCount :: Word16, -- ^ Number of class methods
+ classMethods :: [MethodInfo], -- ^ Class methods
+ classAttributesCount :: Word16, -- ^ Number of class attributes
+ classAttributes :: [AttributeInfo] -- ^ Class attributes
}
deriving (Eq, Show)
-traceM msg x = do
- r <- x
- return $ traceS msg r
-
-replicateMT n m = replicateM n (traceM ">" m)
-
instance Binary ClassFile where
put (ClassFile {..}) = do
put magic
return $ ClassFile magic minor major poolsize pool af this super
interfacesCount ifaces classFieldsCount classFields classMethodsCount classMethods asCount as
+-- | Field signature format
data FieldType =
- SignedByte -- B
- | CharByte -- C
- | DoubleType -- D
- | FloatType -- F
- | IntType -- I
- | LongInt -- J
- | ShortInt -- S
- | BoolType -- Z
- | ObjectType String -- L <class name>
- | Array (Maybe Int) FieldType
+ SignedByte -- ^ B
+ | CharByte -- ^ C
+ | DoubleType -- ^ D
+ | FloatType -- ^ F
+ | IntType -- ^ I
+ | LongInt -- ^ J
+ | ShortInt -- ^ S
+ | BoolType -- ^ Z
+ | ObjectType String -- ^ L <class name>
+ | Array (Maybe Int) FieldType -- ^ [<type>
deriving (Eq)
instance Show FieldType where
show (Array Nothing t) = show t ++ "[]"
show (Array (Just n) t) = show t ++ "[" ++ show n ++ "]"
+-- | Class field signature
type FieldSignature = FieldType
+-- | Try to read integer value from decimal representation
getInt :: Get (Maybe Int)
getInt = do
s <- getDigits
return (Array mbSize sig)
_ -> fail $ "Unknown signature opening symbol: " ++ [b]
+-- | Read string up to `;'
getToSemicolon :: Get String
getToSemicolon = do
x <- get
next <- getToSemicolon
return (x: next)
+-- | Return value signature
data ReturnSignature =
Returns FieldType
| ReturnsVoid
'V' -> skip 1 >> return ReturnsVoid
_ -> Returns <$> get
+-- | Method argument signature
type ArgumentSignature = FieldType
+-- | Class method argument signature
data MethodSignature =
MethodSignature [ArgumentSignature] ReturnSignature
deriving (Eq)
ret <- get
return (MethodSignature args ret)
+-- | Read arguments signatures (up to `)')
getArgs :: Get [ArgumentSignature]
getArgs = whileJust getArg
where
return (x: next)
Nothing -> return []
+-- | Constant pool item format
data CpInfo =
- CONSTANT_Class {nameIndex :: Word16} -- 7
- | CONSTANT_Fieldref {classIndex :: Word16, nameAndTypeIndex :: Word16} -- 9
- | CONSTANT_Methodref {classIndex :: Word16, nameAndTypeIndex :: Word16} -- 10
- | CONSTANT_InterfaceMethodref {classIndex :: Word16, nameAndTypeIndex :: Word16}-- 11
- | CONSTANT_String {stringIndex :: Word16} -- 8
- | CONSTANT_Integer {fourBytes :: Word32} -- 3
- | CONSTANT_Float Float -- 4
- | CONSTANT_Long Word64 -- 5
- | CONSTANT_Double Double -- 6
- | CONSTANT_NameAndType {nameIndex :: Word16, signatureIndex :: Word16} -- 12
- | CONSTANT_Utf8 {stringLength :: Word16, stringBytes :: B.ByteString} -- 1
- | CONSTANT_Unicode {stringLength :: Word16, stringBytes :: B.ByteString} -- 2
+ CONSTANT_Class {nameIndex :: Word16} -- ^ 7
+ | CONSTANT_Fieldref {classIndex :: Word16, nameAndTypeIndex :: Word16} -- ^ 9
+ | CONSTANT_Methodref {classIndex :: Word16, nameAndTypeIndex :: Word16} -- ^ 10
+ | CONSTANT_InterfaceMethodref {classIndex :: Word16, nameAndTypeIndex :: Word16}-- ^ 11
+ | CONSTANT_String {stringIndex :: Word16} -- ^ 8
+ | CONSTANT_Integer {fourBytes :: Word32} -- ^ 3
+ | CONSTANT_Float Float -- ^ 4
+ | CONSTANT_Long Word64 -- ^ 5
+ | CONSTANT_Double Double -- ^ 6
+ | CONSTANT_NameAndType {nameIndex :: Word16, signatureIndex :: Word16} -- ^ 12
+ | CONSTANT_Utf8 {stringLength :: Word16, stringBytes :: B.ByteString} -- ^ 1
+ | CONSTANT_Unicode {stringLength :: Word16, stringBytes :: B.ByteString} -- ^ 2
deriving (Eq, Show)
instance Binary CpInfo where
12 -> CONSTANT_NameAndType <$> get <*> get
_ -> fail $ "Unknown constants pool entry tag: " ++ show tag
+-- | Class field format
data FieldInfo = FieldInfo {
fieldAccessFlags :: Word16,
fieldNameIndex :: Word16,
as <- replicateM (fromIntegral n) get
return $ FieldInfo af ni si n as
+-- | Class method format
data MethodInfo = MethodInfo {
methodAccessFlags :: Word16,
methodNameIndex :: Word16,
as <- replicateM (fromIntegral n) get
return $ MethodInfo af ni si n as
+-- | Any (class/ field/ method/ ...) attribute format.
+-- Some formats specify special formats for @attributeValue@.
data AttributeInfo = AttributeInfo {
attributeName :: Word16,
attributeLength :: Word32,
{-# LANGUAGE TypeFamilies, StandaloneDeriving, FlexibleInstances, FlexibleContexts, UndecidableInstances, RecordWildCards, OverloadedStrings #-}
-module JVM.Converter where
+-- | Functions to convert from low-level .class format representation and
+-- high-level Java classes, methods etc representation
+module JVM.Converter
+ (decompile, decompileFile,
+ convertClass,
+ methodByName,
+ attrByName,
+ methodCode
+ )
+ where
import Data.List
import Data.Word
import JVM.ClassFile
import JVM.Types
+-- | Parse .class file data
decompile :: B.ByteString -> Class
decompile bstr = convertClass $ decode bstr
+-- | Parse class data from file
decompileFile :: FilePath -> IO Class
decompileFile path = convertClass `fmap` decodeFile path
convert (CONSTANT_Utf8 _ bs) = CUTF8 bs
convert (CONSTANT_Unicode _ bs) = CUnicode bs
-className' x = trace ("Class name: " ++ show x) B.empty
-
convertAccess :: Word16 -> Access
convertAccess w = S.fromList $ concat $ zipWith (\i f -> if testBit w i then [f] else []) [0..] $ [
ACC_PUBLIC,
go (AttributeInfo {..}) = (getString $ pool ! attributeName,
attributeValue)
+-- | Try to get class method by name
methodByName :: Class -> B.ByteString -> Maybe Method
methodByName cls name =
find (\m -> methodName m == name) (methods cls)
+-- | Try to get object attribute by name
attrByName :: (HasAttributes a) => a -> B.ByteString -> Maybe B.ByteString
attrByName x name = M.lookup name (attributes x)
-methodCode :: Class -> B.ByteString -> Maybe B.ByteString
+-- | Try to get Code for class method (no Code for interface methods)
+methodCode :: Class
+ -> B.ByteString -- ^ Method name
+ -> Maybe B.ByteString
methodCode cls name = do
method <- methodByName cls name
attrByName method "Code"
{-# LANGUAGE TypeFamilies, StandaloneDeriving, FlexibleInstances, FlexibleContexts, UndecidableInstances #-}
+-- | This module declares `high-level' data types for Java classes, methods etc.
module JVM.Types where
import Codec.Binary.UTF8.String hiding (encode, decode)
toString :: B.ByteString -> String
toString bstr = decodeString $ map (chr . fromIntegral) $ B.unpack bstr
+-- | Constant pool
type Pool = Array Word16 Constant
class HasAttributes a where
attributes :: a -> Attributes
+-- | Java class
data Class = Class {
constantPool :: Pool,
classAccess :: Access,
class HasSignature a where
type Signature a
+-- | Name and signature pair. Used for methods and fields.
data NameType a = NameType {
ntName :: B.ByteString,
ntSignature :: Signature a }
deriving instance Eq (Signature a) => Eq (NameType a)
+-- | Constant pool item
data Constant =
CClass {className :: B.ByteString}
| CField {refClass :: B.ByteString, fieldNameType :: NameType Field}
show (CUTF8 s) = "UTF8 \"" ++ toString s ++ "\""
show (CUnicode s) = "Unicode \"" ++ toString s ++ "\""
+-- | Class field
data Field = Field {
fieldAccess :: Access,
fieldName :: B.ByteString,
instance HasAttributes Field where
attributes = fieldAttrs
+-- | Class method
data Method = Method {
methodAccess :: Access,
methodName :: B.ByteString,
instance HasAttributes Method where
attributes = methodAttrs
+-- | Set of access flags
type Access = S.Set AccessFlag
+-- | Access flags. Used for classess, methods, variables.
data AccessFlag =
- ACC_PUBLIC -- 0x0001 Видимый для всех Класс, Метод, Переменная
- | ACC_PRIVATE -- 0x0002 Видимый только для определяемого класса Метод, Переменная
- | ACC_PROTECTED -- 0x0004 Видимый для подклассов Метод, Переменная
- | ACC_STATIC -- 0x0008 Переменная или метод статические Метод, Переменная
- | ACC_FINAL -- 0x0010 Нет дальнейшей подкласификации, обхода или присваивания после инициализации Класс, Метод, Переменная
- | ACC_SYNCHRONIZED -- 0x0020 Использует возврат в блокировке монитора Метод
- | ACC_VOLATILE -- 0x0040 Не может помещать в кеш Переменная
- | ACC_TRANSIENT -- 0x0080 Не может боть написан или прочитан постоянным объектом управления Перемення
- | ACC_NATIVE -- 0x0100 Реализован в других языках Метод
- | ACC_INTERFACE -- 0x0200 интерфейс Класс
- | ACC_ABSTRACT -- 0x0400 Ничего не предусматривает Класс, Метод
+ ACC_PUBLIC -- 0x0001 Visible for all
+ | ACC_PRIVATE -- 0x0002 Visible only for defined class
+ | ACC_PROTECTED -- 0x0004 Visible only for subclasses
+ | ACC_STATIC -- 0x0008 Static method or variable
+ | ACC_FINAL -- 0x0010 No further subclassing or assignments
+ | ACC_SYNCHRONIZED -- 0x0020 Uses monitors
+ | ACC_VOLATILE -- 0x0040 Could not be cached
+ | ACC_TRANSIENT -- 0x0080
+ | ACC_NATIVE -- 0x0100 Implemented in other language
+ | ACC_INTERFACE -- 0x0200 Class is interface
+ | ACC_ABSTRACT -- 0x0400
deriving (Eq, Show, Ord)
+-- | Generic attribute
data Attribute = Attribute {
attrName :: B.ByteString,
attrValue :: B.ByteString }
deriving (Eq, Show)
-class AttributeValue a where
- decodeAttribute :: B.ByteString -> a
- encodeAttribute :: a -> B.ByteString
-
+-- | Set of attributes
type Attributes = M.Map B.ByteString B.ByteString