1 {-# LANGUAGE RecordWildCards, BangPatterns, TypeFamilies, StandaloneDeriving, FlexibleContexts, FlexibleInstances, UndecidableInstances, TypeSynonymInstances #-}
2 -- | This module declares (low-level) data types for Java .class files
3 -- structures, and Binary instances to read/write them.
8 -- * Internal class file structures
12 FieldSignature, MethodSignature (..), ReturnSignature (..),
13 ArgumentSignature (..),
16 -- * Staged structures
18 Method (..), Field (..), Class (..),
20 AccessFlag (..), AccessFlags,
23 HasSignature (..), HasAttributes (..),
26 apsize, arsize, arlist
31 import Control.Applicative
33 import Data.Binary.IEEE754
34 import Data.Binary.Get
35 import Data.Binary.Put
38 import qualified Data.Set as S
39 import qualified Data.Map as M
40 import qualified Data.ByteString.Lazy as B
41 import Codec.Binary.UTF8.String hiding (encode, decode)
45 -- Java .class file uses constants pool, which stores almost all source-code-level
46 -- constants (strings, integer literals etc), and also all identifiers (class,
47 -- method, field names etc). All other structures contain indexes of constants in
48 -- the pool instead of constants theirself.
50 -- It's not convient to use that indexes programmatically. So, .class file is represented
51 -- at two stages: File and Direct. At File stage, all data structures contain only indexes,
52 -- not constants theirself. When we read a class from a file, we get structure at File stage.
53 -- We only can write File stage structure to file.
55 -- At Direct stage, structures conain constants, not indexes. Convertion functions (File <-> Direct)
56 -- are located in the JVM.Converter module.
59 -- | Read one-byte Char
63 return $ chr (fromIntegral x)
65 toString :: B.ByteString -> String
66 toString bstr = decodeString $ map (chr . fromIntegral) $ B.unpack bstr
71 -- | Direct representation stage
74 -- | Link to some object
75 type family Link stage a
77 -- | At File stage, Link contain index of object in the constants pool.
78 type instance Link File a = Word16
80 -- | At Direct stage, Link contain object itself.
81 type instance Link Direct a = a
83 -- | Object (class, method, field …) access flags
84 type family AccessFlags stage
86 -- | At File stage, access flags are represented as Word16
87 type instance AccessFlags File = Word16
89 -- | At Direct stage, access flags are represented as set of flags.
90 type instance AccessFlags Direct = S.Set AccessFlag
92 -- | Object (class, method, field) attributes
93 data family Attributes stage
95 -- | At File stage, attributes are represented as list of Attribute structures.
96 data instance Attributes File = AP {attributesList :: [Attribute]}
99 -- | At Direct stage, attributes are represented as a Map.
100 data instance Attributes Direct = AR (M.Map B.ByteString B.ByteString)
103 -- | Size of attributes set at Direct stage
104 arsize :: Attributes Direct -> Int
105 arsize (AR m) = M.size m
107 -- | Associative list of attributes at Direct stage
108 arlist :: Attributes Direct -> [(B.ByteString, B.ByteString)]
109 arlist (AR m) = M.assocs m
111 -- | Size of attributes set at File stage
112 apsize :: Attributes File -> Int
113 apsize (AP list) = length list
115 -- | Access flags. Used for classess, methods, variables.
117 ACC_PUBLIC -- ^ 0x0001 Visible for all
118 | ACC_PRIVATE -- ^ 0x0002 Visible only for defined class
119 | ACC_PROTECTED -- ^ 0x0004 Visible only for subclasses
120 | ACC_STATIC -- ^ 0x0008 Static method or variable
121 | ACC_FINAL -- ^ 0x0010 No further subclassing or assignments
122 | ACC_SYNCHRONIZED -- ^ 0x0020 Uses monitors
123 | ACC_VOLATILE -- ^ 0x0040 Could not be cached
124 | ACC_TRANSIENT -- ^ 0x0080
125 | ACC_NATIVE -- ^ 0x0100 Implemented in other language
126 | ACC_INTERFACE -- ^ 0x0200 Class is interface
127 | ACC_ABSTRACT -- ^ 0x0400
128 deriving (Eq, Show, Ord, Enum)
130 -- | Fields and methods have signatures.
131 class (Binary (Signature a), Show (Signature a), Eq (Signature a))
132 => HasSignature a where
135 instance HasSignature Field where
136 type Signature Field = FieldSignature
138 instance HasSignature Method where
139 type Signature Method = MethodSignature
141 -- | Name and signature pair. Used for methods and fields.
142 data NameType a = NameType {
143 ntName :: B.ByteString,
144 ntSignature :: Signature a }
146 instance (HasSignature a) => Show (NameType a) where
147 show (NameType n t) = toString n ++ ": " ++ show t
149 deriving instance HasSignature a => Eq (NameType a)
151 instance HasSignature a => Binary (NameType a) where
152 put (NameType n t) = putLazyByteString n >> put t
154 get = NameType <$> get <*> get
156 -- | Constant pool item
157 data Constant stage =
158 CClass (Link stage B.ByteString)
159 | CField {refClass :: Link stage B.ByteString, fieldNameType :: Link stage (NameType Field)}
160 | CMethod {refClass :: Link stage B.ByteString, nameType :: Link stage (NameType Method)}
161 | CIfaceMethod {refClass :: Link stage B.ByteString, nameType :: Link stage (NameType Method)}
162 | CString (Link stage B.ByteString)
167 | CNameType (Link stage B.ByteString) (Link stage B.ByteString)
168 | CUTF8 {getString :: B.ByteString}
169 | CUnicode {getString :: B.ByteString}
171 -- | Name of the CClass. Error on any other constant.
172 className :: Constant Direct -> B.ByteString
173 className (CClass s) = s
174 className x = error $ "Not a class: " ++ show x
176 instance Show (Constant Direct) where
177 show (CClass name) = "class " ++ toString name
178 show (CField cls nt) = "field " ++ toString cls ++ "." ++ show nt
179 show (CMethod cls nt) = "method " ++ toString cls ++ "." ++ show nt
180 show (CIfaceMethod cls nt) = "interface method " ++ toString cls ++ "." ++ show nt
181 show (CString s) = "String \"" ++ toString s ++ "\""
182 show (CInteger x) = show x
183 show (CFloat x) = show x
184 show (CLong x) = show x
185 show (CDouble x) = show x
186 show (CNameType name tp) = toString name ++ ": " ++ toString tp
187 show (CUTF8 s) = "UTF8 \"" ++ toString s ++ "\""
188 show (CUnicode s) = "Unicode \"" ++ toString s ++ "\""
191 type Pool stage = M.Map Word16 (Constant stage)
193 -- | Generic .class file format
194 data Class stage = Class {
195 magic :: Word32, -- ^ Magic value: 0xCAFEBABE
196 minorVersion :: Word16,
197 majorVersion :: Word16,
198 constsPoolSize :: Word16, -- ^ Number of items in constants pool
199 constsPool :: Pool stage, -- ^ Constants pool itself
200 accessFlags :: AccessFlags stage, -- ^ See @JVM.Types.AccessFlag@
201 thisClass :: Link stage B.ByteString, -- ^ Constants pool item index for this class
202 superClass :: Link stage B.ByteString, -- ^ --/-- for super class, zero for java.lang.Object
203 interfacesCount :: Word16, -- ^ Number of implemented interfaces
204 interfaces :: [Link stage B.ByteString], -- ^ Constants pool item indexes for implemented interfaces
205 classFieldsCount :: Word16, -- ^ Number of class fileds
206 classFields :: [Field stage], -- ^ Class fields
207 classMethodsCount :: Word16, -- ^ Number of class methods
208 classMethods :: [Method stage], -- ^ Class methods
209 classAttributesCount :: Word16, -- ^ Number of class attributes
210 classAttributes :: Attributes stage -- ^ Class attributes
213 deriving instance Eq (Constant File)
214 deriving instance Eq (Constant Direct)
215 deriving instance Show (Constant File)
217 instance Binary (Class File) where
218 put (Class {..}) = do
223 forM_ (M.elems constsPool) put
230 forM_ classFields put
231 put classMethodsCount
232 forM_ classMethods put
233 put classAttributesCount
234 forM_ (attributesList classAttributes) put
241 pool <- replicateM (fromIntegral poolsize - 1) get
245 interfacesCount <- get
246 ifaces <- replicateM (fromIntegral interfacesCount) get
247 classFieldsCount <- get
248 classFields <- replicateM (fromIntegral classFieldsCount) get
249 classMethodsCount <- get
250 classMethods <- replicateM (fromIntegral classMethodsCount) get
252 as <- replicateM (fromIntegral $ asCount) get
253 let pool' = M.fromList $ zip [1..] pool
254 return $ Class magic minor major poolsize pool' af this super
255 interfacesCount ifaces classFieldsCount classFields
256 classMethodsCount classMethods asCount (AP as)
258 -- | Field signature format
268 | ObjectType String -- ^ L @{class name}@
269 | Array (Maybe Int) FieldType -- ^ @[{type}@
272 instance Show FieldType where
273 show SignedByte = "byte"
274 show CharByte = "char"
275 show DoubleType = "double"
276 show FloatType = "float"
278 show LongInt = "long"
279 show ShortInt = "short"
280 show BoolType = "bool"
281 show (ObjectType s) = "Object " ++ s
282 show (Array Nothing t) = show t ++ "[]"
283 show (Array (Just n) t) = show t ++ "[" ++ show n ++ "]"
285 -- | Class field signature
286 type FieldSignature = FieldType
288 -- | Try to read integer value from decimal representation
289 getInt :: Get (Maybe Int)
294 else return $ Just (read s)
296 getDigits :: Get [Char]
298 c <- lookAhead getChar8
306 putString :: String -> Put
307 putString str = forM_ str put
309 instance Binary FieldType where
310 put SignedByte = put 'B'
311 put CharByte = put 'C'
312 put DoubleType = put 'D'
313 put FloatType = put 'F'
314 put IntType = put 'I'
315 put LongInt = put 'J'
316 put ShortInt = put 'S'
317 put BoolType = put 'Z'
318 put (ObjectType name) = put 'L' >> putString name >> put ';'
319 put (Array Nothing sig) = put '[' >> put sig
320 put (Array (Just n) sig) = put '[' >> put (show n) >> put sig
325 'B' -> return SignedByte
326 'C' -> return CharByte
327 'D' -> return DoubleType
328 'F' -> return FloatType
329 'I' -> return IntType
330 'J' -> return LongInt
331 'S' -> return ShortInt
332 'Z' -> return BoolType
334 name <- getToSemicolon
335 return (ObjectType name)
339 return (Array mbSize sig)
340 _ -> fail $ "Unknown signature opening symbol: " ++ [b]
342 -- | Read string up to `;'
343 getToSemicolon :: Get String
349 next <- getToSemicolon
352 -- | Return value signature
353 data ReturnSignature =
358 instance Show ReturnSignature where
359 show (Returns t) = show t
360 show ReturnsVoid = "Void"
362 instance Binary ReturnSignature where
363 put (Returns sig) = put sig
364 put ReturnsVoid = put 'V'
367 x <- lookAhead getChar8
369 'V' -> skip 1 >> return ReturnsVoid
372 -- | Method argument signature
373 type ArgumentSignature = FieldType
375 -- | Class method argument signature
376 data MethodSignature =
377 MethodSignature [ArgumentSignature] ReturnSignature
380 instance Show MethodSignature where
381 show (MethodSignature args ret) = "(" ++ intercalate ", " (map show args) ++ ") returns " ++ show ret
383 instance Binary MethodSignature where
384 put (MethodSignature args ret) = do
393 fail "Cannot parse method signature: no starting `(' !"
397 fail "Internal error: method signature without `)' !?"
399 return (MethodSignature args ret)
401 -- | Read arguments signatures (up to `)')
402 getArgs :: Get [ArgumentSignature]
403 getArgs = whileJust getArg
405 getArg :: Get (Maybe ArgumentSignature)
407 x <- lookAhead getChar8
412 whileJust :: (Monad m) => m (Maybe a) -> m [a]
421 instance Binary (Constant File) where
422 put (CClass i) = putWord8 7 >> put i
423 put (CField i j) = putWord8 9 >> put i >> put j
424 put (CMethod i j) = putWord8 10 >> put i >> put j
425 put (CIfaceMethod i j) = putWord8 11 >> put i >> put j
426 put (CString i) = putWord8 8 >> put i
427 put (CInteger x) = putWord8 3 >> put x
428 put (CFloat x) = putWord8 4 >> putFloat32be x
429 put (CLong x) = putWord8 5 >> put x
430 put (CDouble x) = putWord8 6 >> putFloat64be x
431 put (CNameType i j) = putWord8 12 >> put i >> put j
434 put (fromIntegral (B.length bs) :: Word16)
436 put (CUnicode bs) = do
438 put (fromIntegral (B.length bs) :: Word16)
447 bs <- getLazyByteString (fromIntegral (l :: Word16))
451 bs <- getLazyByteString (fromIntegral (l :: Word16))
453 3 -> CInteger <$> get
454 4 -> CFloat <$> getFloat32be
456 6 -> CDouble <$> getFloat64be
459 9 -> CField <$> get <*> get
460 10 -> CMethod <$> get <*> get
461 11 -> CIfaceMethod <$> get <*> get
462 12 -> CNameType <$> get <*> get
463 _ -> fail $ "Unknown constants pool entry tag: " ++ show tag
465 -- | Class field format
466 data Field stage = Field {
467 fieldAccessFlags :: AccessFlags stage,
468 fieldName :: Link stage B.ByteString,
469 fieldSignature :: Link stage FieldSignature,
470 fieldAttributesCount :: Word16,
471 fieldAttributes :: Attributes stage }
473 deriving instance Eq (Field File)
474 deriving instance Eq (Field Direct)
475 deriving instance Show (Field File)
476 deriving instance Show (Field Direct)
478 instance Binary (Field File) where
479 put (Field {..}) = do
483 put fieldAttributesCount
484 forM_ (attributesList fieldAttributes) put
491 as <- replicateM (fromIntegral n) get
492 return $ Field af ni si n (AP as)
494 -- | Class method format
495 data Method stage = Method {
496 methodAccessFlags :: AccessFlags stage,
497 methodName :: Link stage B.ByteString,
498 methodSignature :: Link stage MethodSignature,
499 methodAttributesCount :: Word16,
500 methodAttributes :: Attributes stage }
502 deriving instance Eq (Method File)
503 deriving instance Eq (Method Direct)
504 deriving instance Show (Method File)
505 deriving instance Show (Method Direct)
507 instance Binary (Method File) where
508 put (Method {..}) = do
509 put methodAccessFlags
512 put methodAttributesCount
513 forM_ (attributesList methodAttributes) put
521 as <- replicateM (fromIntegral n) get
523 methodAccessFlags = af,
525 methodSignature = si,
526 methodAttributesCount = n,
527 methodAttributes = AP as }
529 -- | Any (class/ field/ method/ ...) attribute format.
530 -- Some formats specify special formats for @attributeValue@.
531 data Attribute = Attribute {
532 attributeName :: Word16,
533 attributeLength :: Word32,
534 attributeValue :: B.ByteString }
537 instance Binary Attribute where
538 put (Attribute {..}) = do
540 putWord32be attributeLength
541 putLazyByteString attributeValue
547 value <- getLazyByteString (fromIntegral len)
548 return $ Attribute name len value
550 class HasAttributes a where
551 attributes :: a stage -> Attributes stage
553 instance HasAttributes Class where
554 attributes = classAttributes
556 instance HasAttributes Field where
557 attributes = fieldAttributes
559 instance HasAttributes Method where
560 attributes = methodAttributes