-- | This module declares (low-level) data types for Java .class files
-- structures, and Binary instances to read/write them.
module JVM.ClassFile
- (Attribute (..),
+ (-- * About
+ -- $about
+ --
+ -- * Internal class file structures
+ Attribute (..),
FieldType (..),
+ -- * Signatures
FieldSignature, MethodSignature (..), ReturnSignature (..),
ArgumentSignature (..),
+ -- * Stage types
+ File, Direct,
+ -- * Staged structures
Pool, Link,
Method (..), Field (..), Class (..),
Constant (..),
- Pointers, Resolved,
- NameType (..),
- HasSignature (..), HasAttributes (..),
AccessFlag (..), AccessFlags,
Attributes (..),
- className
+ -- * Misc
+ HasSignature (..), HasAttributes (..),
+ NameType (..),
+ className,
+ apsize, arsize, arlist
)
where
import qualified Data.ByteString.Lazy as B
import Codec.Binary.UTF8.String hiding (encode, decode)
+-- $about
+--
+-- Java .class file uses constants pool, which stores almost all source-code-level
+-- constants (strings, integer literals etc), and also all identifiers (class,
+-- method, field names etc). All other structures contain indexes of constants in
+-- the pool instead of constants theirself.
+--
+-- It's not convient to use that indexes programmatically. So, .class file is represented
+-- at two stages: File and Direct. At File stage, all data structures contain only indexes,
+-- not constants theirself. When we read a class from a file, we get structure at File stage.
+-- We only can write File stage structure to file.
+--
+-- At Direct stage, structures conain constants, not indexes. Convertion functions (File <-> Direct)
+-- are located in the JVM.Converter module.
+--
+
-- | Read one-byte Char
getChar8 :: Get Char
getChar8 = do
toString :: B.ByteString -> String
toString bstr = decodeString $ map (chr . fromIntegral) $ B.unpack bstr
-type family Link s a
+-- | File stage
+data File = File
-data Pointers = Pointers
+-- | Direct representation stage
+data Direct = Direct
-data Resolved = Resolved
+-- | Link to some object
+type family Link stage a
-type instance Link Pointers a = Word16
+-- | At File stage, Link contain index of object in the constants pool.
+type instance Link File a = Word16
-type instance Link Resolved a = a
+-- | At Direct stage, Link contain object itself.
+type instance Link Direct a = a
+-- | Object (class, method, field …) access flags
type family AccessFlags stage
-type instance AccessFlags Pointers = Word16
+-- | At File stage, access flags are represented as Word16
+type instance AccessFlags File = Word16
+
+-- | At Direct stage, access flags are represented as set of flags.
+type instance AccessFlags Direct = S.Set AccessFlag
+
+-- | Object (class, method, field) attributes
+data family Attributes stage
+
+-- | At File stage, attributes are represented as list of Attribute structures.
+data instance Attributes File = AP {attributesList :: [Attribute]}
+ deriving (Eq, Show)
+
+-- | At Direct stage, attributes are represented as a Map.
+data instance Attributes Direct = AR (M.Map B.ByteString B.ByteString)
+ deriving (Eq, Show)
-type instance AccessFlags Resolved = S.Set AccessFlag
+-- | Size of attributes set at Direct stage
+arsize :: Attributes Direct -> Int
+arsize (AR m) = M.size m
-type family Attributes stage
+-- | Associative list of attributes at Direct stage
+arlist :: Attributes Direct -> [(B.ByteString, B.ByteString)]
+arlist (AR m) = M.assocs m
-type instance Attributes Pointers = [Attribute]
-type instance Attributes Resolved = M.Map B.ByteString B.ByteString
+-- | Size of attributes set at File stage
+apsize :: Attributes File -> Int
+apsize (AP list) = length list
-- | Access flags. Used for classess, methods, variables.
data AccessFlag =
| ACC_ABSTRACT -- ^ 0x0400
deriving (Eq, Show, Ord, Enum)
-class HasSignature a where
+-- | Fields and methods have signatures.
+class (Binary (Signature a), Show (Signature a), Eq (Signature a))
+ => HasSignature a where
type Signature a
instance HasSignature Field where
ntName :: B.ByteString,
ntSignature :: Signature a }
-instance Show (Signature a) => Show (NameType a) where
+instance (HasSignature a) => Show (NameType a) where
show (NameType n t) = toString n ++ ": " ++ show t
-deriving instance Eq (Signature a) => Eq (NameType a)
+deriving instance HasSignature a => Eq (NameType a)
-instance (Binary (Signature a)) => Binary (NameType a) where
+instance HasSignature a => Binary (NameType a) where
put (NameType n t) = putLazyByteString n >> put t
get = NameType <$> get <*> get
-- | Constant pool item
data Constant stage =
- CClass B.ByteString
+ CClass (Link stage B.ByteString)
| CField {refClass :: Link stage B.ByteString, fieldNameType :: Link stage (NameType Field)}
| CMethod {refClass :: Link stage B.ByteString, nameType :: Link stage (NameType Method)}
| CIfaceMethod {refClass :: Link stage B.ByteString, nameType :: Link stage (NameType Method)}
| CUTF8 {getString :: B.ByteString}
| CUnicode {getString :: B.ByteString}
-className :: Constant Resolved -> B.ByteString
+-- | Name of the CClass. Error on any other constant.
+className :: Constant Direct -> B.ByteString
className (CClass s) = s
className x = error $ "Not a class: " ++ show x
-instance Show (Constant Resolved) where
+instance Show (Constant Direct) where
show (CClass name) = "class " ++ toString name
show (CField cls nt) = "field " ++ toString cls ++ "." ++ show nt
show (CMethod cls nt) = "method " ++ toString cls ++ "." ++ show nt
classAttributes :: Attributes stage -- ^ Class attributes
}
-deriving instance Eq (Constant Pointers)
-deriving instance Eq (Constant Resolved)
-deriving instance Show (Constant Pointers)
+deriving instance Eq (Constant File)
+deriving instance Eq (Constant Direct)
+deriving instance Show (Constant File)
-instance Binary (Class Pointers) where
+instance Binary (Class File) where
put (Class {..}) = do
put magic
put minorVersion
put classMethodsCount
forM_ classMethods put
put classAttributesCount
- forM_ classAttributes put
+ forM_ (attributesList classAttributes) put
get = do
magic <- get
as <- replicateM (fromIntegral $ asCount) get
let pool' = M.fromList $ zip [1..] pool
return $ Class magic minor major poolsize pool' af this super
- interfacesCount ifaces classFieldsCount classFields classMethodsCount classMethods asCount as
+ interfacesCount ifaces classFieldsCount classFields
+ classMethodsCount classMethods asCount (AP as)
-- | Field signature format
data FieldType =
return (x: next)
Nothing -> return []
-instance Binary (Constant Pointers) where
+instance Binary (Constant File) where
put (CClass i) = putWord8 7 >> put i
put (CField i j) = putWord8 9 >> put i >> put j
put (CMethod i j) = putWord8 10 >> put i >> put j
fieldAttributesCount :: Word16,
fieldAttributes :: Attributes stage }
-deriving instance Eq (Field Pointers)
-deriving instance Eq (Field Resolved)
-deriving instance Show (Field Pointers)
-deriving instance Show (Field Resolved)
+deriving instance Eq (Field File)
+deriving instance Eq (Field Direct)
+deriving instance Show (Field File)
+deriving instance Show (Field Direct)
-instance Binary (Field Pointers) where
+instance Binary (Field File) where
put (Field {..}) = do
put fieldAccessFlags
put fieldName
put fieldSignature
put fieldAttributesCount
- forM_ fieldAttributes put
+ forM_ (attributesList fieldAttributes) put
get = do
af <- get
si <- get
n <- get
as <- replicateM (fromIntegral n) get
- return $ Field af ni si n as
+ return $ Field af ni si n (AP as)
-- | Class method format
data Method stage = Method {
- methodAccessFlags :: Attributes stage,
+ methodAccessFlags :: AccessFlags stage,
methodName :: Link stage B.ByteString,
methodSignature :: Link stage MethodSignature,
methodAttributesCount :: Word16,
methodAttributes :: Attributes stage }
-deriving instance Eq (Method Pointers)
-deriving instance Eq (Method Resolved)
-deriving instance Show (Method Pointers)
-deriving instance Show (Method Resolved)
+deriving instance Eq (Method File)
+deriving instance Eq (Method Direct)
+deriving instance Show (Method File)
+deriving instance Show (Method Direct)
-instance Binary (Method Pointers) where
+instance Binary (Method File) where
put (Method {..}) = do
put methodAccessFlags
put methodName
put methodSignature
put methodAttributesCount
- forM_ methodAttributes put
+ forM_ (attributesList methodAttributes) put
get = do
offset <- bytesRead
si <- get
n <- get
as <- replicateM (fromIntegral n) get
- return $ Method af ni si n as
+ return $ Method {
+ methodAccessFlags = af,
+ methodName = ni,
+ methodSignature = si,
+ methodAttributesCount = n,
+ methodAttributes = AP as }
-- | Any (class/ field/ method/ ...) attribute format.
-- Some formats specify special formats for @attributeValue@.