1 {-# LANGUAGE RecordWildCards, BangPatterns, TypeFamilies, StandaloneDeriving, FlexibleContexts, FlexibleInstances, UndecidableInstances, TypeSynonymInstances #-}
2 -- | This module declares (low-level) data types for Java .class files
3 -- structures, and Binary instances to read/write them.
8 -- * Internal class file structures
12 FieldSignature, MethodSignature (..), ReturnSignature (..),
13 ArgumentSignature (..),
16 -- * Staged structures
18 Method (..), Field (..), Class (..),
20 AccessFlag (..), AccessFlags,
24 HasSignature (..), HasAttributes (..),
27 apsize, arsize, arlist
32 import Control.Applicative
34 import Data.Binary.IEEE754
35 import Data.Binary.Get
36 import Data.Binary.Put
40 import qualified Data.Set as S
41 import qualified Data.Map as M
42 import qualified Data.ByteString.Lazy as B
43 import Codec.Binary.UTF8.String hiding (encode, decode)
47 -- Java .class file uses constants pool, which stores almost all source-code-level
48 -- constants (strings, integer literals etc), and also all identifiers (class,
49 -- method, field names etc). All other structures contain indexes of constants in
50 -- the pool instead of constants theirself.
52 -- It's not convient to use that indexes programmatically. So, .class file is represented
53 -- at two stages: File and Direct. At File stage, all data structures contain only indexes,
54 -- not constants theirself. When we read a class from a file, we get structure at File stage.
55 -- We only can write File stage structure to file.
57 -- At Direct stage, structures conain constants, not indexes. Convertion functions (File <-> Direct)
58 -- are located in the JVM.Converter module.
61 -- | Read one-byte Char
65 return $ chr (fromIntegral x)
67 toString :: B.ByteString -> String
68 toString bstr = decodeString $ map (chr . fromIntegral) $ B.unpack bstr
73 -- | Direct representation stage
76 -- | Link to some object
77 type family Link stage a
79 -- | At File stage, Link contain index of object in the constants pool.
80 type instance Link File a = Word16
82 -- | At Direct stage, Link contain object itself.
83 type instance Link Direct a = a
85 -- | Object (class, method, field …) access flags
86 type family AccessFlags stage
88 -- | At File stage, access flags are represented as Word16
89 type instance AccessFlags File = Word16
91 -- | At Direct stage, access flags are represented as set of flags.
92 type instance AccessFlags Direct = S.Set AccessFlag
94 -- | Object (class, method, field) attributes
95 data family Attributes stage
97 -- | At File stage, attributes are represented as list of Attribute structures.
98 data instance Attributes File = AP {attributesList :: [Attribute]}
101 instance Default (Attributes File) where
104 -- | At Direct stage, attributes are represented as a Map.
105 data instance Attributes Direct = AR (M.Map B.ByteString B.ByteString)
108 instance Default (Attributes Direct) where
111 -- | Size of attributes set at Direct stage
112 arsize :: Attributes Direct -> Int
113 arsize (AR m) = M.size m
115 -- | Associative list of attributes at Direct stage
116 arlist :: Attributes Direct -> [(B.ByteString, B.ByteString)]
117 arlist (AR m) = M.assocs m
119 -- | Size of attributes set at File stage
120 apsize :: Attributes File -> Int
121 apsize (AP list) = length list
123 -- | Access flags. Used for classess, methods, variables.
125 ACC_PUBLIC -- ^ 0x0001 Visible for all
126 | ACC_PRIVATE -- ^ 0x0002 Visible only for defined class
127 | ACC_PROTECTED -- ^ 0x0004 Visible only for subclasses
128 | ACC_STATIC -- ^ 0x0008 Static method or variable
129 | ACC_FINAL -- ^ 0x0010 No further subclassing or assignments
130 | ACC_SYNCHRONIZED -- ^ 0x0020 Uses monitors
131 | ACC_VOLATILE -- ^ 0x0040 Could not be cached
132 | ACC_TRANSIENT -- ^ 0x0080
133 | ACC_NATIVE -- ^ 0x0100 Implemented in other language
134 | ACC_INTERFACE -- ^ 0x0200 Class is interface
135 | ACC_ABSTRACT -- ^ 0x0400
136 deriving (Eq, Show, Ord, Enum)
138 -- | Fields and methods have signatures.
139 class (Binary (Signature a), Show (Signature a), Eq (Signature a))
140 => HasSignature a where
143 instance HasSignature Field where
144 type Signature Field = FieldSignature
146 instance HasSignature Method where
147 type Signature Method = MethodSignature
149 -- | Name and signature pair. Used for methods and fields.
150 data NameType a = NameType {
151 ntName :: B.ByteString,
152 ntSignature :: Signature a }
154 instance (HasSignature a) => Show (NameType a) where
155 show (NameType n t) = toString n ++ ": " ++ show t
157 deriving instance HasSignature a => Eq (NameType a)
159 instance HasSignature a => Binary (NameType a) where
160 put (NameType n t) = putLazyByteString n >> put t
162 get = NameType <$> get <*> get
164 -- | Constant pool item
165 data Constant stage =
166 CClass (Link stage B.ByteString)
167 | CField {refClass :: Link stage B.ByteString, fieldNameType :: Link stage (NameType Field)}
168 | CMethod {refClass :: Link stage B.ByteString, nameType :: Link stage (NameType Method)}
169 | CIfaceMethod {refClass :: Link stage B.ByteString, nameType :: Link stage (NameType Method)}
170 | CString (Link stage B.ByteString)
175 | CNameType (Link stage B.ByteString) (Link stage B.ByteString)
176 | CUTF8 {getString :: B.ByteString}
177 | CUnicode {getString :: B.ByteString}
179 -- | Name of the CClass. Error on any other constant.
180 className :: Constant Direct -> B.ByteString
181 className (CClass s) = s
182 className x = error $ "Not a class: " ++ show x
184 instance Show (Constant Direct) where
185 show (CClass name) = "class " ++ toString name
186 show (CField cls nt) = "field " ++ toString cls ++ "." ++ show nt
187 show (CMethod cls nt) = "method " ++ toString cls ++ "." ++ show nt
188 show (CIfaceMethod cls nt) = "interface method " ++ toString cls ++ "." ++ show nt
189 show (CString s) = "String \"" ++ toString s ++ "\""
190 show (CInteger x) = show x
191 show (CFloat x) = show x
192 show (CLong x) = show x
193 show (CDouble x) = show x
194 show (CNameType name tp) = toString name ++ ": " ++ toString tp
195 show (CUTF8 s) = "UTF8 \"" ++ toString s ++ "\""
196 show (CUnicode s) = "Unicode \"" ++ toString s ++ "\""
199 type Pool stage = M.Map Word16 (Constant stage)
201 -- | Generic .class file format
202 data Class stage = Class {
203 magic :: Word32, -- ^ Magic value: 0xCAFEBABE
204 minorVersion :: Word16,
205 majorVersion :: Word16,
206 constsPoolSize :: Word16, -- ^ Number of items in constants pool
207 constsPool :: Pool stage, -- ^ Constants pool itself
208 accessFlags :: AccessFlags stage, -- ^ See @JVM.Types.AccessFlag@
209 thisClass :: Link stage B.ByteString, -- ^ Constants pool item index for this class
210 superClass :: Link stage B.ByteString, -- ^ --/-- for super class, zero for java.lang.Object
211 interfacesCount :: Word16, -- ^ Number of implemented interfaces
212 interfaces :: [Link stage B.ByteString], -- ^ Constants pool item indexes for implemented interfaces
213 classFieldsCount :: Word16, -- ^ Number of class fileds
214 classFields :: [Field stage], -- ^ Class fields
215 classMethodsCount :: Word16, -- ^ Number of class methods
216 classMethods :: [Method stage], -- ^ Class methods
217 classAttributesCount :: Word16, -- ^ Number of class attributes
218 classAttributes :: Attributes stage -- ^ Class attributes
221 deriving instance Eq (Constant File)
222 deriving instance Eq (Constant Direct)
223 deriving instance Show (Constant File)
225 defaultClass :: (Default (AccessFlags stage), Default (Link stage B.ByteString), Default (Attributes stage))
227 defaultClass = Class {
238 classFieldsCount = 0,
240 classMethodsCount = 0,
242 classAttributesCount = 0,
243 classAttributes = def }
245 instance Binary (Class File) where
246 put (Class {..}) = do
251 forM_ (M.elems constsPool) put
258 forM_ classFields put
259 put classMethodsCount
260 forM_ classMethods put
261 put classAttributesCount
262 forM_ (attributesList classAttributes) put
269 pool <- replicateM (fromIntegral poolsize - 1) get
273 interfacesCount <- get
274 ifaces <- replicateM (fromIntegral interfacesCount) get
275 classFieldsCount <- get
276 classFields <- replicateM (fromIntegral classFieldsCount) get
277 classMethodsCount <- get
278 classMethods <- replicateM (fromIntegral classMethodsCount) get
280 as <- replicateM (fromIntegral $ asCount) get
281 let pool' = M.fromList $ zip [1..] pool
282 return $ Class magic minor major poolsize pool' af this super
283 interfacesCount ifaces classFieldsCount classFields
284 classMethodsCount classMethods asCount (AP as)
286 -- | Field signature format
296 | ObjectType String -- ^ L @{class name}@
297 | Array (Maybe Int) FieldType -- ^ @[{type}@
300 instance Show FieldType where
301 show SignedByte = "byte"
302 show CharByte = "char"
303 show DoubleType = "double"
304 show FloatType = "float"
306 show LongInt = "long"
307 show ShortInt = "short"
308 show BoolType = "bool"
309 show (ObjectType s) = "Object " ++ s
310 show (Array Nothing t) = show t ++ "[]"
311 show (Array (Just n) t) = show t ++ "[" ++ show n ++ "]"
313 -- | Class field signature
314 type FieldSignature = FieldType
316 -- | Try to read integer value from decimal representation
317 getInt :: Get (Maybe Int)
322 else return $ Just (read s)
324 getDigits :: Get [Char]
326 c <- lookAhead getChar8
334 putString :: String -> Put
335 putString str = forM_ str put
337 instance Binary FieldType where
338 put SignedByte = put 'B'
339 put CharByte = put 'C'
340 put DoubleType = put 'D'
341 put FloatType = put 'F'
342 put IntType = put 'I'
343 put LongInt = put 'J'
344 put ShortInt = put 'S'
345 put BoolType = put 'Z'
346 put (ObjectType name) = put 'L' >> putString name >> put ';'
347 put (Array Nothing sig) = put '[' >> put sig
348 put (Array (Just n) sig) = put '[' >> put (show n) >> put sig
353 'B' -> return SignedByte
354 'C' -> return CharByte
355 'D' -> return DoubleType
356 'F' -> return FloatType
357 'I' -> return IntType
358 'J' -> return LongInt
359 'S' -> return ShortInt
360 'Z' -> return BoolType
362 name <- getToSemicolon
363 return (ObjectType name)
367 return (Array mbSize sig)
368 _ -> fail $ "Unknown signature opening symbol: " ++ [b]
370 -- | Read string up to `;'
371 getToSemicolon :: Get String
377 next <- getToSemicolon
380 -- | Return value signature
381 data ReturnSignature =
386 instance Show ReturnSignature where
387 show (Returns t) = show t
388 show ReturnsVoid = "Void"
390 instance Binary ReturnSignature where
391 put (Returns sig) = put sig
392 put ReturnsVoid = put 'V'
395 x <- lookAhead getChar8
397 'V' -> skip 1 >> return ReturnsVoid
400 -- | Method argument signature
401 type ArgumentSignature = FieldType
403 -- | Class method argument signature
404 data MethodSignature =
405 MethodSignature [ArgumentSignature] ReturnSignature
408 instance Show MethodSignature where
409 show (MethodSignature args ret) = "(" ++ intercalate ", " (map show args) ++ ") returns " ++ show ret
411 instance Binary MethodSignature where
412 put (MethodSignature args ret) = do
421 fail "Cannot parse method signature: no starting `(' !"
425 fail "Internal error: method signature without `)' !?"
427 return (MethodSignature args ret)
429 -- | Read arguments signatures (up to `)')
430 getArgs :: Get [ArgumentSignature]
431 getArgs = whileJust getArg
433 getArg :: Get (Maybe ArgumentSignature)
435 x <- lookAhead getChar8
440 whileJust :: (Monad m) => m (Maybe a) -> m [a]
449 instance Binary (Constant File) where
450 put (CClass i) = putWord8 7 >> put i
451 put (CField i j) = putWord8 9 >> put i >> put j
452 put (CMethod i j) = putWord8 10 >> put i >> put j
453 put (CIfaceMethod i j) = putWord8 11 >> put i >> put j
454 put (CString i) = putWord8 8 >> put i
455 put (CInteger x) = putWord8 3 >> put x
456 put (CFloat x) = putWord8 4 >> putFloat32be x
457 put (CLong x) = putWord8 5 >> put x
458 put (CDouble x) = putWord8 6 >> putFloat64be x
459 put (CNameType i j) = putWord8 12 >> put i >> put j
462 put (fromIntegral (B.length bs) :: Word16)
464 put (CUnicode bs) = do
466 put (fromIntegral (B.length bs) :: Word16)
475 bs <- getLazyByteString (fromIntegral (l :: Word16))
479 bs <- getLazyByteString (fromIntegral (l :: Word16))
481 3 -> CInteger <$> get
482 4 -> CFloat <$> getFloat32be
484 6 -> CDouble <$> getFloat64be
487 9 -> CField <$> get <*> get
488 10 -> CMethod <$> get <*> get
489 11 -> CIfaceMethod <$> get <*> get
490 12 -> CNameType <$> get <*> get
491 _ -> fail $ "Unknown constants pool entry tag: " ++ show tag
493 -- | Class field format
494 data Field stage = Field {
495 fieldAccessFlags :: AccessFlags stage,
496 fieldName :: Link stage B.ByteString,
497 fieldSignature :: Link stage FieldSignature,
498 fieldAttributesCount :: Word16,
499 fieldAttributes :: Attributes stage }
501 deriving instance Eq (Field File)
502 deriving instance Eq (Field Direct)
503 deriving instance Show (Field File)
504 deriving instance Show (Field Direct)
506 instance Binary (Field File) where
507 put (Field {..}) = do
511 put fieldAttributesCount
512 forM_ (attributesList fieldAttributes) put
519 as <- replicateM (fromIntegral n) get
520 return $ Field af ni si n (AP as)
522 -- | Class method format
523 data Method stage = Method {
524 methodAccessFlags :: AccessFlags stage,
525 methodName :: Link stage B.ByteString,
526 methodSignature :: Link stage MethodSignature,
527 methodAttributesCount :: Word16,
528 methodAttributes :: Attributes stage }
530 deriving instance Eq (Method File)
531 deriving instance Eq (Method Direct)
532 deriving instance Show (Method File)
533 deriving instance Show (Method Direct)
535 instance Binary (Method File) where
536 put (Method {..}) = do
537 put methodAccessFlags
540 put methodAttributesCount
541 forM_ (attributesList methodAttributes) put
549 as <- replicateM (fromIntegral n) get
551 methodAccessFlags = af,
553 methodSignature = si,
554 methodAttributesCount = n,
555 methodAttributes = AP as }
557 -- | Any (class/ field/ method/ ...) attribute format.
558 -- Some formats specify special formats for @attributeValue@.
559 data Attribute = Attribute {
560 attributeName :: Word16,
561 attributeLength :: Word32,
562 attributeValue :: B.ByteString }
565 instance Binary Attribute where
566 put (Attribute {..}) = do
568 putWord32be attributeLength
569 putLazyByteString attributeValue
575 value <- getLazyByteString (fromIntegral len)
576 return $ Attribute name len value
578 class HasAttributes a where
579 attributes :: a stage -> Attributes stage
581 instance HasAttributes Class where
582 attributes = classAttributes
584 instance HasAttributes Field where
585 attributes = fieldAttributes
587 instance HasAttributes Method where
588 attributes = methodAttributes