1 {-# LANGUAGE RecordWildCards, BangPatterns, TypeFamilies, StandaloneDeriving, FlexibleContexts, FlexibleInstances, UndecidableInstances, TypeSynonymInstances #-}
2 -- | This module declares (low-level) data types for Java .class files
3 -- structures, and Binary instances to read/write them.
8 -- * Internal class file structures
12 FieldSignature, MethodSignature (..), ReturnSignature (..),
13 ArgumentSignature (..),
16 -- * Staged structures
18 Method (..), Field (..), Class (..),
20 AccessFlag (..), AccessFlags,
23 HasSignature (..), HasAttributes (..),
26 apsize, arsize, arlist
31 import Control.Applicative
33 import Data.Binary.IEEE754
34 import Data.Binary.Get
35 import Data.Binary.Put
38 import qualified Data.Set as S
39 import qualified Data.Map as M
40 import qualified Data.ByteString.Lazy as B
41 import Codec.Binary.UTF8.String hiding (encode, decode)
45 -- Java .class file uses constants pool, which stores almost all source-code-level
46 -- constants (strings, integer literals etc), and also all identifiers (class,
47 -- method, field names etc). All other structures contain indexes of constants in
48 -- the pool instead of constants theirself.
50 -- It's not convient to use that indexes programmatically. So, .class file is represented
51 -- at two stages: File and Direct. At File stage, all data structures contain only indexes,
52 -- not constants theirself. When we read a class from a file, we get structure at File stage.
53 -- We only can write File stage structure to file.
55 -- At Direct stage, structures conain constants, not indexes. Convertion functions (File <-> Direct)
56 -- are located in the JVM.Converter module.
59 -- | Read one-byte Char
63 return $ chr (fromIntegral x)
65 toString :: B.ByteString -> String
66 toString bstr = decodeString $ map (chr . fromIntegral) $ B.unpack bstr
71 -- | Direct representation stage
74 -- | Link to some object
75 type family Link stage a
77 -- | At File stage, Link contain index of object in the constants pool.
78 type instance Link File a = Word16
80 -- | At Direct stage, Link contain object itself.
81 type instance Link Direct a = a
83 -- | Object (class, method, field …) access flags
84 type family AccessFlags stage
86 -- | At File stage, access flags are represented as Word16
87 type instance AccessFlags File = Word16
89 -- | At Direct stage, access flags are represented as set of flags.
90 type instance AccessFlags Direct = S.Set AccessFlag
92 -- | Object (class, method, field) attributes
93 data family Attributes stage
95 -- | At File stage, attributes are represented as list of Attribute structures.
96 data instance Attributes File = AP {attributesList :: [Attribute]}
99 -- | At Direct stage, attributes are represented as a Map.
100 data instance Attributes Direct = AR (M.Map B.ByteString B.ByteString)
103 -- | Size of attributes set at Direct stage
104 arsize :: Attributes Direct -> Int
105 arsize (AR m) = M.size m
107 -- | Associative list of attributes at Direct stage
108 arlist :: Attributes Direct -> [(B.ByteString, B.ByteString)]
109 arlist (AR m) = M.assocs m
111 -- | Size of attributes set at File stage
112 apsize :: Attributes File -> Int
113 apsize (AP list) = length list
115 -- | Access flags. Used for classess, methods, variables.
117 ACC_PUBLIC -- ^ 0x0001 Visible for all
118 | ACC_PRIVATE -- ^ 0x0002 Visible only for defined class
119 | ACC_PROTECTED -- ^ 0x0004 Visible only for subclasses
120 | ACC_STATIC -- ^ 0x0008 Static method or variable
121 | ACC_FINAL -- ^ 0x0010 No further subclassing or assignments
122 | ACC_SYNCHRONIZED -- ^ 0x0020 Uses monitors
123 | ACC_VOLATILE -- ^ 0x0040 Could not be cached
124 | ACC_TRANSIENT -- ^ 0x0080
125 | ACC_NATIVE -- ^ 0x0100 Implemented in other language
126 | ACC_INTERFACE -- ^ 0x0200 Class is interface
127 | ACC_ABSTRACT -- ^ 0x0400
128 deriving (Eq, Show, Ord, Enum)
130 -- | Fields and methods have signatures.
131 class HasSignature a where
134 instance HasSignature Field where
135 type Signature Field = FieldSignature
137 instance HasSignature Method where
138 type Signature Method = MethodSignature
140 -- | Name and signature pair. Used for methods and fields.
141 data NameType a = NameType {
142 ntName :: B.ByteString,
143 ntSignature :: Signature a }
145 instance Show (Signature a) => Show (NameType a) where
146 show (NameType n t) = toString n ++ ": " ++ show t
148 deriving instance Eq (Signature a) => Eq (NameType a)
150 instance (Binary (Signature a)) => Binary (NameType a) where
151 put (NameType n t) = putLazyByteString n >> put t
153 get = NameType <$> get <*> get
155 -- | Constant pool item
156 data Constant stage =
157 CClass (Link stage B.ByteString)
158 | CField {refClass :: Link stage B.ByteString, fieldNameType :: Link stage (NameType Field)}
159 | CMethod {refClass :: Link stage B.ByteString, nameType :: Link stage (NameType Method)}
160 | CIfaceMethod {refClass :: Link stage B.ByteString, nameType :: Link stage (NameType Method)}
161 | CString (Link stage B.ByteString)
166 | CNameType (Link stage B.ByteString) (Link stage B.ByteString)
167 | CUTF8 {getString :: B.ByteString}
168 | CUnicode {getString :: B.ByteString}
170 -- | Name of the CClass. Error on any other constant.
171 className :: Constant Direct -> B.ByteString
172 className (CClass s) = s
173 className x = error $ "Not a class: " ++ show x
175 instance Show (Constant Direct) where
176 show (CClass name) = "class " ++ toString name
177 show (CField cls nt) = "field " ++ toString cls ++ "." ++ show nt
178 show (CMethod cls nt) = "method " ++ toString cls ++ "." ++ show nt
179 show (CIfaceMethod cls nt) = "interface method " ++ toString cls ++ "." ++ show nt
180 show (CString s) = "String \"" ++ toString s ++ "\""
181 show (CInteger x) = show x
182 show (CFloat x) = show x
183 show (CLong x) = show x
184 show (CDouble x) = show x
185 show (CNameType name tp) = toString name ++ ": " ++ toString tp
186 show (CUTF8 s) = "UTF8 \"" ++ toString s ++ "\""
187 show (CUnicode s) = "Unicode \"" ++ toString s ++ "\""
190 type Pool stage = M.Map Word16 (Constant stage)
192 -- | Generic .class file format
193 data Class stage = Class {
194 magic :: Word32, -- ^ Magic value: 0xCAFEBABE
195 minorVersion :: Word16,
196 majorVersion :: Word16,
197 constsPoolSize :: Word16, -- ^ Number of items in constants pool
198 constsPool :: Pool stage, -- ^ Constants pool itself
199 accessFlags :: AccessFlags stage, -- ^ See @JVM.Types.AccessFlag@
200 thisClass :: Link stage B.ByteString, -- ^ Constants pool item index for this class
201 superClass :: Link stage B.ByteString, -- ^ --/-- for super class, zero for java.lang.Object
202 interfacesCount :: Word16, -- ^ Number of implemented interfaces
203 interfaces :: [Link stage B.ByteString], -- ^ Constants pool item indexes for implemented interfaces
204 classFieldsCount :: Word16, -- ^ Number of class fileds
205 classFields :: [Field stage], -- ^ Class fields
206 classMethodsCount :: Word16, -- ^ Number of class methods
207 classMethods :: [Method stage], -- ^ Class methods
208 classAttributesCount :: Word16, -- ^ Number of class attributes
209 classAttributes :: Attributes stage -- ^ Class attributes
212 deriving instance Eq (Constant File)
213 deriving instance Eq (Constant Direct)
214 deriving instance Show (Constant File)
216 instance Binary (Class File) where
217 put (Class {..}) = do
222 forM_ (M.elems constsPool) put
229 forM_ classFields put
230 put classMethodsCount
231 forM_ classMethods put
232 put classAttributesCount
233 forM_ (attributesList classAttributes) put
240 pool <- replicateM (fromIntegral poolsize - 1) get
244 interfacesCount <- get
245 ifaces <- replicateM (fromIntegral interfacesCount) get
246 classFieldsCount <- get
247 classFields <- replicateM (fromIntegral classFieldsCount) get
248 classMethodsCount <- get
249 classMethods <- replicateM (fromIntegral classMethodsCount) get
251 as <- replicateM (fromIntegral $ asCount) get
252 let pool' = M.fromList $ zip [1..] pool
253 return $ Class magic minor major poolsize pool' af this super
254 interfacesCount ifaces classFieldsCount classFields
255 classMethodsCount classMethods asCount (AP as)
257 -- | Field signature format
267 | ObjectType String -- ^ L @{class name}@
268 | Array (Maybe Int) FieldType -- ^ @[{type}@
271 instance Show FieldType where
272 show SignedByte = "byte"
273 show CharByte = "char"
274 show DoubleType = "double"
275 show FloatType = "float"
277 show LongInt = "long"
278 show ShortInt = "short"
279 show BoolType = "bool"
280 show (ObjectType s) = "Object " ++ s
281 show (Array Nothing t) = show t ++ "[]"
282 show (Array (Just n) t) = show t ++ "[" ++ show n ++ "]"
284 -- | Class field signature
285 type FieldSignature = FieldType
287 -- | Try to read integer value from decimal representation
288 getInt :: Get (Maybe Int)
293 else return $ Just (read s)
295 getDigits :: Get [Char]
297 c <- lookAhead getChar8
305 putString :: String -> Put
306 putString str = forM_ str put
308 instance Binary FieldType where
309 put SignedByte = put 'B'
310 put CharByte = put 'C'
311 put DoubleType = put 'D'
312 put FloatType = put 'F'
313 put IntType = put 'I'
314 put LongInt = put 'J'
315 put ShortInt = put 'S'
316 put BoolType = put 'Z'
317 put (ObjectType name) = put 'L' >> putString name >> put ';'
318 put (Array Nothing sig) = put '[' >> put sig
319 put (Array (Just n) sig) = put '[' >> put (show n) >> put sig
324 'B' -> return SignedByte
325 'C' -> return CharByte
326 'D' -> return DoubleType
327 'F' -> return FloatType
328 'I' -> return IntType
329 'J' -> return LongInt
330 'S' -> return ShortInt
331 'Z' -> return BoolType
333 name <- getToSemicolon
334 return (ObjectType name)
338 return (Array mbSize sig)
339 _ -> fail $ "Unknown signature opening symbol: " ++ [b]
341 -- | Read string up to `;'
342 getToSemicolon :: Get String
348 next <- getToSemicolon
351 -- | Return value signature
352 data ReturnSignature =
357 instance Show ReturnSignature where
358 show (Returns t) = show t
359 show ReturnsVoid = "Void"
361 instance Binary ReturnSignature where
362 put (Returns sig) = put sig
363 put ReturnsVoid = put 'V'
366 x <- lookAhead getChar8
368 'V' -> skip 1 >> return ReturnsVoid
371 -- | Method argument signature
372 type ArgumentSignature = FieldType
374 -- | Class method argument signature
375 data MethodSignature =
376 MethodSignature [ArgumentSignature] ReturnSignature
379 instance Show MethodSignature where
380 show (MethodSignature args ret) = "(" ++ intercalate ", " (map show args) ++ ") returns " ++ show ret
382 instance Binary MethodSignature where
383 put (MethodSignature args ret) = do
392 fail "Cannot parse method signature: no starting `(' !"
396 fail "Internal error: method signature without `)' !?"
398 return (MethodSignature args ret)
400 -- | Read arguments signatures (up to `)')
401 getArgs :: Get [ArgumentSignature]
402 getArgs = whileJust getArg
404 getArg :: Get (Maybe ArgumentSignature)
406 x <- lookAhead getChar8
411 whileJust :: (Monad m) => m (Maybe a) -> m [a]
420 instance Binary (Constant File) where
421 put (CClass i) = putWord8 7 >> put i
422 put (CField i j) = putWord8 9 >> put i >> put j
423 put (CMethod i j) = putWord8 10 >> put i >> put j
424 put (CIfaceMethod i j) = putWord8 11 >> put i >> put j
425 put (CString i) = putWord8 8 >> put i
426 put (CInteger x) = putWord8 3 >> put x
427 put (CFloat x) = putWord8 4 >> putFloat32be x
428 put (CLong x) = putWord8 5 >> put x
429 put (CDouble x) = putWord8 6 >> putFloat64be x
430 put (CNameType i j) = putWord8 12 >> put i >> put j
433 put (fromIntegral (B.length bs) :: Word16)
435 put (CUnicode bs) = do
437 put (fromIntegral (B.length bs) :: Word16)
446 bs <- getLazyByteString (fromIntegral (l :: Word16))
450 bs <- getLazyByteString (fromIntegral (l :: Word16))
452 3 -> CInteger <$> get
453 4 -> CFloat <$> getFloat32be
455 6 -> CDouble <$> getFloat64be
458 9 -> CField <$> get <*> get
459 10 -> CMethod <$> get <*> get
460 11 -> CIfaceMethod <$> get <*> get
461 12 -> CNameType <$> get <*> get
462 _ -> fail $ "Unknown constants pool entry tag: " ++ show tag
464 -- | Class field format
465 data Field stage = Field {
466 fieldAccessFlags :: AccessFlags stage,
467 fieldName :: Link stage B.ByteString,
468 fieldSignature :: Link stage FieldSignature,
469 fieldAttributesCount :: Word16,
470 fieldAttributes :: Attributes stage }
472 deriving instance Eq (Field File)
473 deriving instance Eq (Field Direct)
474 deriving instance Show (Field File)
475 deriving instance Show (Field Direct)
477 instance Binary (Field File) where
478 put (Field {..}) = do
482 put fieldAttributesCount
483 forM_ (attributesList fieldAttributes) put
490 as <- replicateM (fromIntegral n) get
491 return $ Field af ni si n (AP as)
493 -- | Class method format
494 data Method stage = Method {
495 methodAccessFlags :: AccessFlags stage,
496 methodName :: Link stage B.ByteString,
497 methodSignature :: Link stage MethodSignature,
498 methodAttributesCount :: Word16,
499 methodAttributes :: Attributes stage }
501 deriving instance Eq (Method File)
502 deriving instance Eq (Method Direct)
503 deriving instance Show (Method File)
504 deriving instance Show (Method Direct)
506 instance Binary (Method File) where
507 put (Method {..}) = do
508 put methodAccessFlags
511 put methodAttributesCount
512 forM_ (attributesList methodAttributes) put
520 as <- replicateM (fromIntegral n) get
522 methodAccessFlags = af,
524 methodSignature = si,
525 methodAttributesCount = n,
526 methodAttributes = AP as }
528 -- | Any (class/ field/ method/ ...) attribute format.
529 -- Some formats specify special formats for @attributeValue@.
530 data Attribute = Attribute {
531 attributeName :: Word16,
532 attributeLength :: Word32,
533 attributeValue :: B.ByteString }
536 instance Binary Attribute where
537 put (Attribute {..}) = do
539 putWord32be attributeLength
540 putLazyByteString attributeValue
546 value <- getLazyByteString (fromIntegral len)
547 return $ Attribute name len value
549 class HasAttributes a where
550 attributes :: a stage -> Attributes stage
552 instance HasAttributes Class where
553 attributes = classAttributes
555 instance HasAttributes Field where
556 attributes = fieldAttributes
558 instance HasAttributes Method where
559 attributes = methodAttributes