-{-# LANGUAGE RecordWildCards, BangPatterns #-}
+{-# LANGUAGE RecordWildCards, BangPatterns, TypeFamilies, StandaloneDeriving, FlexibleContexts, FlexibleInstances, UndecidableInstances, TypeSynonymInstances #-}
-- | This module declares (low-level) data types for Java .class files
-- structures, and Binary instances to read/write them.
module JVM.ClassFile
- (ClassFile (..),
- CpInfo (..),
- FieldInfo (..),
- MethodInfo (..),
- AttributeInfo (..),
- FieldType,
- FieldSignature, MethodSignature (..), ReturnSignature (..)
+ (-- * About
+ -- $about
+ --
+ -- * Internal class file structures
+ Attribute (..),
+ FieldType (..),
+ -- * Signatures
+ FieldSignature, MethodSignature (..), ReturnSignature (..),
+ ArgumentSignature (..),
+ -- * Stage types
+ File, Direct,
+ -- * Staged structures
+ Pool, Link,
+ Method (..), Field (..), Class (..),
+ Constant (..),
+ AccessFlag (..), AccessFlags,
+ Attributes (..),
+ defaultClass,
+ -- * Misc
+ HasSignature (..), HasAttributes (..),
+ NameType (..),
+ toString,
+ className,
+ apsize, arsize, arlist
)
where
import Data.Binary.IEEE754
import Data.Binary.Get
import Data.Binary.Put
-import Data.Word
import Data.Char
import Data.List
+import Data.Default
+import qualified Data.Set as S
+import qualified Data.Map as M
import qualified Data.ByteString.Lazy as B
+import Codec.Binary.UTF8.String hiding (encode, decode)
+
+-- $about
+--
+-- Java .class file uses constants pool, which stores almost all source-code-level
+-- constants (strings, integer literals etc), and also all identifiers (class,
+-- method, field names etc). All other structures contain indexes of constants in
+-- the pool instead of constants theirself.
+--
+-- It's not convient to use that indexes programmatically. So, .class file is represented
+-- at two stages: File and Direct. At File stage, all data structures contain only indexes,
+-- not constants theirself. When we read a class from a file, we get structure at File stage.
+-- We only can write File stage structure to file.
+--
+-- At Direct stage, structures conain constants, not indexes. Convertion functions (File <-> Direct)
+-- are located in the JVM.Converter module.
+--
-- | Read one-byte Char
getChar8 :: Get Char
x <- getWord8
return $ chr (fromIntegral x)
+toString :: B.ByteString -> String
+toString bstr = decodeString $ map (chr . fromIntegral) $ B.unpack bstr
+
+-- | File stage
+data File = File
+
+-- | Direct representation stage
+data Direct = Direct
+
+-- | Link to some object
+type family Link stage a
+
+-- | At File stage, Link contain index of object in the constants pool.
+type instance Link File a = Word16
+
+-- | At Direct stage, Link contain object itself.
+type instance Link Direct a = a
+
+-- | Object (class, method, field …) access flags
+type family AccessFlags stage
+
+-- | At File stage, access flags are represented as Word16
+type instance AccessFlags File = Word16
+
+-- | At Direct stage, access flags are represented as set of flags.
+type instance AccessFlags Direct = S.Set AccessFlag
+
+-- | Object (class, method, field) attributes
+data family Attributes stage
+
+-- | At File stage, attributes are represented as list of Attribute structures.
+data instance Attributes File = AP {attributesList :: [Attribute]}
+ deriving (Eq, Show)
+
+instance Default (Attributes File) where
+ def = AP []
+
+-- | At Direct stage, attributes are represented as a Map.
+data instance Attributes Direct = AR (M.Map B.ByteString B.ByteString)
+ deriving (Eq, Show)
+
+instance Default (Attributes Direct) where
+ def = AR M.empty
+
+-- | Size of attributes set at Direct stage
+arsize :: Attributes Direct -> Int
+arsize (AR m) = M.size m
+
+-- | Associative list of attributes at Direct stage
+arlist :: Attributes Direct -> [(B.ByteString, B.ByteString)]
+arlist (AR m) = M.assocs m
+
+-- | Size of attributes set at File stage
+apsize :: Attributes File -> Int
+apsize (AP list) = length list
+
+-- | Access flags. Used for classess, methods, variables.
+data AccessFlag =
+ ACC_PUBLIC -- ^ 0x0001 Visible for all
+ | ACC_PRIVATE -- ^ 0x0002 Visible only for defined class
+ | ACC_PROTECTED -- ^ 0x0004 Visible only for subclasses
+ | ACC_STATIC -- ^ 0x0008 Static method or variable
+ | ACC_FINAL -- ^ 0x0010 No further subclassing or assignments
+ | ACC_SYNCHRONIZED -- ^ 0x0020 Uses monitors
+ | ACC_VOLATILE -- ^ 0x0040 Could not be cached
+ | ACC_TRANSIENT -- ^ 0x0080
+ | ACC_NATIVE -- ^ 0x0100 Implemented in other language
+ | ACC_INTERFACE -- ^ 0x0200 Class is interface
+ | ACC_ABSTRACT -- ^ 0x0400
+ deriving (Eq, Show, Ord, Enum)
+
+-- | Fields and methods have signatures.
+class (Binary (Signature a), Show (Signature a), Eq (Signature a))
+ => HasSignature a where
+ type Signature a
+
+instance HasSignature Field where
+ type Signature Field = FieldSignature
+
+instance HasSignature Method where
+ type Signature Method = MethodSignature
+
+-- | Name and signature pair. Used for methods and fields.
+data NameType a = NameType {
+ ntName :: B.ByteString,
+ ntSignature :: Signature a }
+
+instance (HasSignature a) => Show (NameType a) where
+ show (NameType n t) = toString n ++ ": " ++ show t
+
+deriving instance HasSignature a => Eq (NameType a)
+
+instance HasSignature a => Binary (NameType a) where
+ put (NameType n t) = putLazyByteString n >> put t
+
+ get = NameType <$> get <*> get
+
+-- | Constant pool item
+data Constant stage =
+ CClass (Link stage B.ByteString)
+ | CField {refClass :: Link stage B.ByteString, fieldNameType :: Link stage (NameType Field)}
+ | CMethod {refClass :: Link stage B.ByteString, nameType :: Link stage (NameType Method)}
+ | CIfaceMethod {refClass :: Link stage B.ByteString, nameType :: Link stage (NameType Method)}
+ | CString (Link stage B.ByteString)
+ | CInteger Word32
+ | CFloat Float
+ | CLong Integer
+ | CDouble Double
+ | CNameType (Link stage B.ByteString) (Link stage B.ByteString)
+ | CUTF8 {getString :: B.ByteString}
+ | CUnicode {getString :: B.ByteString}
+
+-- | Name of the CClass. Error on any other constant.
+className :: Constant Direct -> B.ByteString
+className (CClass s) = s
+className x = error $ "Not a class: " ++ show x
+
+instance Show (Constant Direct) where
+ show (CClass name) = "class " ++ toString name
+ show (CField cls nt) = "field " ++ toString cls ++ "." ++ show nt
+ show (CMethod cls nt) = "method " ++ toString cls ++ "." ++ show nt
+ show (CIfaceMethod cls nt) = "interface method " ++ toString cls ++ "." ++ show nt
+ show (CString s) = "String \"" ++ toString s ++ "\""
+ show (CInteger x) = show x
+ show (CFloat x) = show x
+ show (CLong x) = show x
+ show (CDouble x) = show x
+ show (CNameType name tp) = toString name ++ ": " ++ toString tp
+ show (CUTF8 s) = "UTF8 \"" ++ toString s ++ "\""
+ show (CUnicode s) = "Unicode \"" ++ toString s ++ "\""
+
+-- | Constant pool
+type Pool stage = M.Map Word16 (Constant stage)
+
-- | Generic .class file format
-data ClassFile = ClassFile {
+data Class stage = Class {
magic :: Word32, -- ^ Magic value: 0xCAFEBABE
minorVersion :: Word16,
majorVersion :: Word16,
constsPoolSize :: Word16, -- ^ Number of items in constants pool
- constsPool :: [CpInfo], -- ^ Constants pool itself
- accessFlags :: Word16, -- ^ See @JVM.Types.AccessFlag@
- thisClass :: Word16, -- ^ Constants pool item index for this class
- superClass :: Word16, -- ^ --/-- for super class, zero for java.lang.Object
+ constsPool :: Pool stage, -- ^ Constants pool itself
+ accessFlags :: AccessFlags stage, -- ^ See @JVM.Types.AccessFlag@
+ thisClass :: Link stage B.ByteString, -- ^ Constants pool item index for this class
+ superClass :: Link stage B.ByteString, -- ^ --/-- for super class, zero for java.lang.Object
interfacesCount :: Word16, -- ^ Number of implemented interfaces
- interfaces :: [Word16], -- ^ Constants pool item indexes for implemented interfaces
+ interfaces :: [Link stage B.ByteString], -- ^ Constants pool item indexes for implemented interfaces
classFieldsCount :: Word16, -- ^ Number of class fileds
- classFields :: [FieldInfo], -- ^ Class fields
+ classFields :: [Field stage], -- ^ Class fields
classMethodsCount :: Word16, -- ^ Number of class methods
- classMethods :: [MethodInfo], -- ^ Class methods
+ classMethods :: [Method stage], -- ^ Class methods
classAttributesCount :: Word16, -- ^ Number of class attributes
- classAttributes :: [AttributeInfo] -- ^ Class attributes
+ classAttributes :: Attributes stage -- ^ Class attributes
}
- deriving (Eq, Show)
-instance Binary ClassFile where
- put (ClassFile {..}) = do
+deriving instance Eq (Class File)
+deriving instance Eq (Class Direct)
+deriving instance Show (Class File)
+deriving instance Show (Class Direct)
+
+deriving instance Eq (Constant File)
+deriving instance Eq (Constant Direct)
+deriving instance Show (Constant File)
+
+-- | Default (empty) class file definition.
+defaultClass :: (Default (AccessFlags stage), Default (Link stage B.ByteString), Default (Attributes stage))
+ => Class stage
+defaultClass = Class {
+ magic = 0xCAFEBABE,
+ minorVersion = 0,
+ majorVersion = 50,
+ constsPoolSize = 0,
+ constsPool = def,
+ accessFlags = def,
+ thisClass = def,
+ superClass = def,
+ interfacesCount = 0,
+ interfaces = [],
+ classFieldsCount = 0,
+ classFields = [],
+ classMethodsCount = 0,
+ classMethods = [],
+ classAttributesCount = 0,
+ classAttributes = def }
+
+instance Binary (Class File) where
+ put (Class {..}) = do
put magic
put minorVersion
put majorVersion
put constsPoolSize
- forM_ constsPool put
+ forM_ (M.elems constsPool) put
put accessFlags
put thisClass
put superClass
put classMethodsCount
forM_ classMethods put
put classAttributesCount
- forM_ classAttributes put
+ forM_ (attributesList classAttributes) put
get = do
magic <- get
classMethodsCount <- get
classMethods <- replicateM (fromIntegral classMethodsCount) get
asCount <- get
- as <- replicateM (fromIntegral $ asCount - 1) get
- return $ ClassFile magic minor major poolsize pool af this super
- interfacesCount ifaces classFieldsCount classFields classMethodsCount classMethods asCount as
+ as <- replicateM (fromIntegral $ asCount) get
+ let pool' = M.fromList $ zip [1..] pool
+ return $ Class magic minor major poolsize pool' af this super
+ interfacesCount ifaces classFieldsCount classFields
+ classMethodsCount classMethods asCount (AP as)
-- | Field signature format
data FieldType =
| LongInt -- ^ J
| ShortInt -- ^ S
| BoolType -- ^ Z
- | ObjectType String -- ^ L <class name>
- | Array (Maybe Int) FieldType -- ^ [<type>
+ | ObjectType String -- ^ L @{class name}@
+ | Array (Maybe Int) FieldType -- ^ @[{type}@
deriving (Eq)
instance Show FieldType where
return (c: next)
else return []
+putString :: String -> Put
+putString str = forM_ str put
+
instance Binary FieldType where
put SignedByte = put 'B'
put CharByte = put 'C'
put LongInt = put 'J'
put ShortInt = put 'S'
put BoolType = put 'Z'
- put (ObjectType name) = put 'L' >> put name
+ put (ObjectType name) = put 'L' >> putString name >> put ';'
put (Array Nothing sig) = put '[' >> put sig
put (Array (Just n) sig) = put '[' >> put (show n) >> put sig
return (x: next)
Nothing -> return []
--- | Constant pool item format
-data CpInfo =
- CONSTANT_Class {nameIndex :: Word16} -- ^ 7
- | CONSTANT_Fieldref {classIndex :: Word16, nameAndTypeIndex :: Word16} -- ^ 9
- | CONSTANT_Methodref {classIndex :: Word16, nameAndTypeIndex :: Word16} -- ^ 10
- | CONSTANT_InterfaceMethodref {classIndex :: Word16, nameAndTypeIndex :: Word16}-- ^ 11
- | CONSTANT_String {stringIndex :: Word16} -- ^ 8
- | CONSTANT_Integer {fourBytes :: Word32} -- ^ 3
- | CONSTANT_Float Float -- ^ 4
- | CONSTANT_Long Word64 -- ^ 5
- | CONSTANT_Double Double -- ^ 6
- | CONSTANT_NameAndType {nameIndex :: Word16, signatureIndex :: Word16} -- ^ 12
- | CONSTANT_Utf8 {stringLength :: Word16, stringBytes :: B.ByteString} -- ^ 1
- | CONSTANT_Unicode {stringLength :: Word16, stringBytes :: B.ByteString} -- ^ 2
- deriving (Eq, Show)
-
-instance Binary CpInfo where
- put (CONSTANT_Class i) = putWord8 7 >> put i
- put (CONSTANT_Fieldref i j) = putWord8 9 >> put i >> put j
- put (CONSTANT_Methodref i j) = putWord8 10 >> put i >> put j
- put (CONSTANT_InterfaceMethodref i j) = putWord8 11 >> put i >> put j
- put (CONSTANT_String i) = putWord8 8 >> put i
- put (CONSTANT_Integer x) = putWord8 3 >> put x
- put (CONSTANT_Float x) = putWord8 4 >> putFloat32be x
- put (CONSTANT_Long x) = putWord8 5 >> put x
- put (CONSTANT_Double x) = putWord8 6 >> putFloat64be x
- put (CONSTANT_NameAndType i j) = putWord8 12 >> put i >> put j
- put (CONSTANT_Utf8 l bs) = putWord8 1 >> put l >> putLazyByteString bs
- put (CONSTANT_Unicode l bs) = putWord8 2 >> put l >> putLazyByteString bs
+instance Binary (Constant File) where
+ put (CClass i) = putWord8 7 >> put i
+ put (CField i j) = putWord8 9 >> put i >> put j
+ put (CMethod i j) = putWord8 10 >> put i >> put j
+ put (CIfaceMethod i j) = putWord8 11 >> put i >> put j
+ put (CString i) = putWord8 8 >> put i
+ put (CInteger x) = putWord8 3 >> put x
+ put (CFloat x) = putWord8 4 >> putFloat32be x
+ put (CLong x) = putWord8 5 >> put x
+ put (CDouble x) = putWord8 6 >> putFloat64be x
+ put (CNameType i j) = putWord8 12 >> put i >> put j
+ put (CUTF8 bs) = do
+ putWord8 1
+ put (fromIntegral (B.length bs) :: Word16)
+ putLazyByteString bs
+ put (CUnicode bs) = do
+ putWord8 2
+ put (fromIntegral (B.length bs) :: Word16)
+ putLazyByteString bs
get = do
!offset <- bytesRead
case tag of
1 -> do
l <- get
- bs <- getLazyByteString (fromIntegral l)
- return $ CONSTANT_Utf8 l bs
+ bs <- getLazyByteString (fromIntegral (l :: Word16))
+ return $ CUTF8 bs
2 -> do
l <- get
- bs <- getLazyByteString (fromIntegral l)
- return $ CONSTANT_Unicode l bs
- 3 -> CONSTANT_Integer <$> get
- 4 -> CONSTANT_Float <$> getFloat32be
- 5 -> CONSTANT_Long <$> get
- 6 -> CONSTANT_Double <$> getFloat64be
- 7 -> CONSTANT_Class <$> get
- 8 -> CONSTANT_String <$> get
- 9 -> CONSTANT_Fieldref <$> get <*> get
- 10 -> CONSTANT_Methodref <$> get <*> get
- 11 -> CONSTANT_InterfaceMethodref <$> get <*> get
- 12 -> CONSTANT_NameAndType <$> get <*> get
+ bs <- getLazyByteString (fromIntegral (l :: Word16))
+ return $ CUnicode bs
+ 3 -> CInteger <$> get
+ 4 -> CFloat <$> getFloat32be
+ 5 -> CLong <$> get
+ 6 -> CDouble <$> getFloat64be
+ 7 -> CClass <$> get
+ 8 -> CString <$> get
+ 9 -> CField <$> get <*> get
+ 10 -> CMethod <$> get <*> get
+ 11 -> CIfaceMethod <$> get <*> get
+ 12 -> CNameType <$> get <*> get
_ -> fail $ "Unknown constants pool entry tag: " ++ show tag
-- | Class field format
-data FieldInfo = FieldInfo {
- fieldAccessFlags :: Word16,
- fieldNameIndex :: Word16,
- fieldSignatureIndex :: Word16,
+data Field stage = Field {
+ fieldAccessFlags :: AccessFlags stage,
+ fieldName :: Link stage B.ByteString,
+ fieldSignature :: Link stage FieldSignature,
fieldAttributesCount :: Word16,
- fieldAttributes :: [AttributeInfo] }
- deriving (Eq, Show)
+ fieldAttributes :: Attributes stage }
-instance Binary FieldInfo where
- put (FieldInfo {..}) = do
+deriving instance Eq (Field File)
+deriving instance Eq (Field Direct)
+deriving instance Show (Field File)
+deriving instance Show (Field Direct)
+
+instance Binary (Field File) where
+ put (Field {..}) = do
put fieldAccessFlags
- put fieldNameIndex
- put fieldSignatureIndex
+ put fieldName
+ put fieldSignature
put fieldAttributesCount
- forM_ fieldAttributes put
+ forM_ (attributesList fieldAttributes) put
get = do
af <- get
si <- get
n <- get
as <- replicateM (fromIntegral n) get
- return $ FieldInfo af ni si n as
+ return $ Field af ni si n (AP as)
-- | Class method format
-data MethodInfo = MethodInfo {
- methodAccessFlags :: Word16,
- methodNameIndex :: Word16,
- methodSignatureIndex :: Word16,
+data Method stage = Method {
+ methodAccessFlags :: AccessFlags stage,
+ methodName :: Link stage B.ByteString,
+ methodSignature :: Link stage MethodSignature,
methodAttributesCount :: Word16,
- methodAttributes :: [AttributeInfo] }
- deriving (Eq, Show)
+ methodAttributes :: Attributes stage }
+
+deriving instance Eq (Method File)
+deriving instance Eq (Method Direct)
+deriving instance Show (Method File)
+deriving instance Show (Method Direct)
-instance Binary MethodInfo where
- put (MethodInfo {..}) = do
+instance Binary (Method File) where
+ put (Method {..}) = do
put methodAccessFlags
- put methodNameIndex
- put methodSignatureIndex
+ put methodName
+ put methodSignature
put methodAttributesCount
- forM_ methodAttributes put
+ forM_ (attributesList methodAttributes) put
get = do
offset <- bytesRead
si <- get
n <- get
as <- replicateM (fromIntegral n) get
- return $ MethodInfo af ni si n as
+ return $ Method {
+ methodAccessFlags = af,
+ methodName = ni,
+ methodSignature = si,
+ methodAttributesCount = n,
+ methodAttributes = AP as }
-- | Any (class/ field/ method/ ...) attribute format.
-- Some formats specify special formats for @attributeValue@.
-data AttributeInfo = AttributeInfo {
+data Attribute = Attribute {
attributeName :: Word16,
attributeLength :: Word32,
attributeValue :: B.ByteString }
deriving (Eq, Show)
-instance Binary AttributeInfo where
- put (AttributeInfo {..}) = do
+instance Binary Attribute where
+ put (Attribute {..}) = do
put attributeName
putWord32be attributeLength
putLazyByteString attributeValue
name <- get
len <- getWord32be
value <- getLazyByteString (fromIntegral len)
- return $ AttributeInfo name len value
+ return $ Attribute name len value
+
+class HasAttributes a where
+ attributes :: a stage -> Attributes stage
+
+instance HasAttributes Class where
+ attributes = classAttributes
+
+instance HasAttributes Field where
+ attributes = fieldAttributes
+instance HasAttributes Method where
+ attributes = methodAttributes