1 {-# LANGUAGE RecordWildCards, BangPatterns, TypeFamilies, StandaloneDeriving, FlexibleContexts, FlexibleInstances, UndecidableInstances, TypeSynonymInstances #-}
2 -- | This module declares (low-level) data types for Java .class files
3 -- structures, and Binary instances to read/write them.
8 -- * Internal class file structures
12 FieldSignature, MethodSignature (..), ReturnSignature (..),
13 ArgumentSignature (..),
16 -- * Staged structures
18 Method (..), Field (..), Class (..),
20 AccessFlag (..), AccessFlags,
24 HasSignature (..), HasAttributes (..),
26 fieldNameType, methodNameType,
29 apsize, arsize, arlist
34 import Control.Applicative
36 import Data.Binary.IEEE754
37 import Data.Binary.Get
38 import Data.Binary.Put
42 import qualified Data.Set as S
43 import qualified Data.Map as M
44 import qualified Data.ByteString.Lazy as B
45 import Codec.Binary.UTF8.String hiding (encode, decode)
49 -- Java .class file uses constants pool, which stores almost all source-code-level
50 -- constants (strings, integer literals etc), and also all identifiers (class,
51 -- method, field names etc). All other structures contain indexes of constants in
52 -- the pool instead of constants theirself.
54 -- It's not convient to use that indexes programmatically. So, .class file is represented
55 -- at two stages: File and Direct. At File stage, all data structures contain only indexes,
56 -- not constants theirself. When we read a class from a file, we get structure at File stage.
57 -- We only can write File stage structure to file.
59 -- At Direct stage, structures conain constants, not indexes. Convertion functions (File <-> Direct)
60 -- are located in the JVM.Converter module.
63 -- | Read one-byte Char
67 return $ chr (fromIntegral x)
69 toString :: B.ByteString -> String
70 toString bstr = decodeString $ map (chr . fromIntegral) $ B.unpack bstr
75 -- | Direct representation stage
78 -- | Link to some object
79 type family Link stage a
81 -- | At File stage, Link contain index of object in the constants pool.
82 type instance Link File a = Word16
84 -- | At Direct stage, Link contain object itself.
85 type instance Link Direct a = a
87 -- | Object (class, method, field …) access flags
88 type family AccessFlags stage
90 -- | At File stage, access flags are represented as Word16
91 type instance AccessFlags File = Word16
93 -- | At Direct stage, access flags are represented as set of flags.
94 type instance AccessFlags Direct = S.Set AccessFlag
96 -- | Object (class, method, field) attributes
97 data family Attributes stage
99 -- | At File stage, attributes are represented as list of Attribute structures.
100 data instance Attributes File = AP {attributesList :: [Attribute]}
103 instance Default (Attributes File) where
106 -- | At Direct stage, attributes are represented as a Map.
107 data instance Attributes Direct = AR (M.Map B.ByteString B.ByteString)
110 instance Default (Attributes Direct) where
113 -- | Size of attributes set at Direct stage
114 arsize :: Attributes Direct -> Int
115 arsize (AR m) = M.size m
117 -- | Associative list of attributes at Direct stage
118 arlist :: Attributes Direct -> [(B.ByteString, B.ByteString)]
119 arlist (AR m) = M.assocs m
121 -- | Size of attributes set at File stage
122 apsize :: Attributes File -> Int
123 apsize (AP list) = length list
125 -- | Access flags. Used for classess, methods, variables.
127 ACC_PUBLIC -- ^ 0x0001 Visible for all
128 | ACC_PRIVATE -- ^ 0x0002 Visible only for defined class
129 | ACC_PROTECTED -- ^ 0x0004 Visible only for subclasses
130 | ACC_STATIC -- ^ 0x0008 Static method or variable
131 | ACC_FINAL -- ^ 0x0010 No further subclassing or assignments
132 | ACC_SYNCHRONIZED -- ^ 0x0020 Uses monitors
133 | ACC_VOLATILE -- ^ 0x0040 Could not be cached
134 | ACC_TRANSIENT -- ^ 0x0080
135 | ACC_NATIVE -- ^ 0x0100 Implemented in other language
136 | ACC_INTERFACE -- ^ 0x0200 Class is interface
137 | ACC_ABSTRACT -- ^ 0x0400
138 deriving (Eq, Show, Ord, Enum)
140 -- | Fields and methods have signatures.
141 class (Binary (Signature a), Show (Signature a), Eq (Signature a))
142 => HasSignature a where
145 instance HasSignature Field where
146 type Signature Field = FieldSignature
148 instance HasSignature Method where
149 type Signature Method = MethodSignature
151 -- | Name and signature pair. Used for methods and fields.
152 data NameType a = NameType {
153 ntName :: B.ByteString,
154 ntSignature :: Signature a }
156 instance (HasSignature a) => Show (NameType a) where
157 show (NameType n t) = toString n ++ ": " ++ show t
159 deriving instance HasSignature a => Eq (NameType a)
161 instance HasSignature a => Binary (NameType a) where
162 put (NameType n t) = putLazyByteString n >> put t
164 get = NameType <$> get <*> get
166 -- | Constant pool item
167 data Constant stage =
168 CClass (Link stage B.ByteString)
169 | CField (Link stage B.ByteString) (Link stage (NameType Field))
170 | CMethod (Link stage B.ByteString) (Link stage (NameType Method))
171 | CIfaceMethod (Link stage B.ByteString) (Link stage (NameType Method))
172 | CString (Link stage B.ByteString)
177 | CNameType (Link stage B.ByteString) (Link stage B.ByteString)
178 | CUTF8 {getString :: B.ByteString}
179 | CUnicode {getString :: B.ByteString}
181 -- | Name of the CClass. Error on any other constant.
182 className :: Constant Direct -> B.ByteString
183 className (CClass s) = s
184 className x = error $ "Not a class: " ++ show x
186 instance Show (Constant Direct) where
187 show (CClass name) = "class " ++ toString name
188 show (CField cls nt) = "field " ++ toString cls ++ "." ++ show nt
189 show (CMethod cls nt) = "method " ++ toString cls ++ "." ++ show nt
190 show (CIfaceMethod cls nt) = "interface method " ++ toString cls ++ "." ++ show nt
191 show (CString s) = "String \"" ++ toString s ++ "\""
192 show (CInteger x) = show x
193 show (CFloat x) = show x
194 show (CLong x) = show x
195 show (CDouble x) = show x
196 show (CNameType name tp) = toString name ++ ": " ++ toString tp
197 show (CUTF8 s) = "UTF8 \"" ++ toString s ++ "\""
198 show (CUnicode s) = "Unicode \"" ++ toString s ++ "\""
201 type Pool stage = M.Map Word16 (Constant stage)
203 -- | Generic .class file format
204 data Class stage = Class {
205 magic :: Word32, -- ^ Magic value: 0xCAFEBABE
206 minorVersion :: Word16,
207 majorVersion :: Word16,
208 constsPoolSize :: Word16, -- ^ Number of items in constants pool
209 constsPool :: Pool stage, -- ^ Constants pool itself
210 accessFlags :: AccessFlags stage, -- ^ See @JVM.Types.AccessFlag@
211 thisClass :: Link stage B.ByteString, -- ^ Constants pool item index for this class
212 superClass :: Link stage B.ByteString, -- ^ --/-- for super class, zero for java.lang.Object
213 interfacesCount :: Word16, -- ^ Number of implemented interfaces
214 interfaces :: [Link stage B.ByteString], -- ^ Constants pool item indexes for implemented interfaces
215 classFieldsCount :: Word16, -- ^ Number of class fileds
216 classFields :: [Field stage], -- ^ Class fields
217 classMethodsCount :: Word16, -- ^ Number of class methods
218 classMethods :: [Method stage], -- ^ Class methods
219 classAttributesCount :: Word16, -- ^ Number of class attributes
220 classAttributes :: Attributes stage -- ^ Class attributes
223 deriving instance Eq (Class File)
224 deriving instance Eq (Class Direct)
225 deriving instance Show (Class File)
226 deriving instance Show (Class Direct)
228 deriving instance Eq (Constant File)
229 deriving instance Eq (Constant Direct)
230 deriving instance Show (Constant File)
232 -- | Default (empty) class file definition.
233 defaultClass :: (Default (AccessFlags stage), Default (Link stage B.ByteString), Default (Attributes stage))
235 defaultClass = Class {
246 classFieldsCount = 0,
248 classMethodsCount = 0,
250 classAttributesCount = 0,
251 classAttributes = def }
253 instance Binary (Class File) where
254 put (Class {..}) = do
259 forM_ (M.elems constsPool) put
266 forM_ classFields put
267 put classMethodsCount
268 forM_ classMethods put
269 put classAttributesCount
270 forM_ (attributesList classAttributes) put
277 pool <- replicateM (fromIntegral poolsize - 1) get
281 interfacesCount <- get
282 ifaces <- replicateM (fromIntegral interfacesCount) get
283 classFieldsCount <- get
284 classFields <- replicateM (fromIntegral classFieldsCount) get
285 classMethodsCount <- get
286 classMethods <- replicateM (fromIntegral classMethodsCount) get
288 as <- replicateM (fromIntegral $ asCount) get
289 let pool' = M.fromList $ zip [1..] pool
290 return $ Class magic minor major poolsize pool' af this super
291 interfacesCount ifaces classFieldsCount classFields
292 classMethodsCount classMethods asCount (AP as)
294 -- | Field signature format
304 | ObjectType String -- ^ L @{class name}@
305 | Array (Maybe Int) FieldType -- ^ @[{type}@
308 instance Show FieldType where
309 show SignedByte = "byte"
310 show CharByte = "char"
311 show DoubleType = "double"
312 show FloatType = "float"
314 show LongInt = "long"
315 show ShortInt = "short"
316 show BoolType = "bool"
317 show (ObjectType s) = "Object " ++ s
318 show (Array Nothing t) = show t ++ "[]"
319 show (Array (Just n) t) = show t ++ "[" ++ show n ++ "]"
321 -- | Class field signature
322 type FieldSignature = FieldType
324 -- | Try to read integer value from decimal representation
325 getInt :: Get (Maybe Int)
330 else return $ Just (read s)
332 getDigits :: Get [Char]
334 c <- lookAhead getChar8
342 putString :: String -> Put
343 putString str = forM_ str put
345 instance Binary FieldType where
346 put SignedByte = put 'B'
347 put CharByte = put 'C'
348 put DoubleType = put 'D'
349 put FloatType = put 'F'
350 put IntType = put 'I'
351 put LongInt = put 'J'
352 put ShortInt = put 'S'
353 put BoolType = put 'Z'
354 put (ObjectType name) = put 'L' >> putString name >> put ';'
355 put (Array Nothing sig) = put '[' >> put sig
356 put (Array (Just n) sig) = put '[' >> put (show n) >> put sig
361 'B' -> return SignedByte
362 'C' -> return CharByte
363 'D' -> return DoubleType
364 'F' -> return FloatType
365 'I' -> return IntType
366 'J' -> return LongInt
367 'S' -> return ShortInt
368 'Z' -> return BoolType
370 name <- getToSemicolon
371 return (ObjectType name)
375 return (Array mbSize sig)
376 _ -> fail $ "Unknown signature opening symbol: " ++ [b]
378 -- | Read string up to `;'
379 getToSemicolon :: Get String
385 next <- getToSemicolon
388 -- | Return value signature
389 data ReturnSignature =
394 instance Show ReturnSignature where
395 show (Returns t) = show t
396 show ReturnsVoid = "Void"
398 instance Binary ReturnSignature where
399 put (Returns sig) = put sig
400 put ReturnsVoid = put 'V'
403 x <- lookAhead getChar8
405 'V' -> skip 1 >> return ReturnsVoid
408 -- | Method argument signature
409 type ArgumentSignature = FieldType
411 -- | Class method argument signature
412 data MethodSignature =
413 MethodSignature [ArgumentSignature] ReturnSignature
416 instance Show MethodSignature where
417 show (MethodSignature args ret) = "(" ++ intercalate ", " (map show args) ++ ") returns " ++ show ret
419 instance Binary MethodSignature where
420 put (MethodSignature args ret) = do
429 fail "Cannot parse method signature: no starting `(' !"
433 fail "Internal error: method signature without `)' !?"
435 return (MethodSignature args ret)
437 -- | Read arguments signatures (up to `)')
438 getArgs :: Get [ArgumentSignature]
439 getArgs = whileJust getArg
441 getArg :: Get (Maybe ArgumentSignature)
443 x <- lookAhead getChar8
448 whileJust :: (Monad m) => m (Maybe a) -> m [a]
457 instance Binary (Constant File) where
458 put (CClass i) = putWord8 7 >> put i
459 put (CField i j) = putWord8 9 >> put i >> put j
460 put (CMethod i j) = putWord8 10 >> put i >> put j
461 put (CIfaceMethod i j) = putWord8 11 >> put i >> put j
462 put (CString i) = putWord8 8 >> put i
463 put (CInteger x) = putWord8 3 >> put x
464 put (CFloat x) = putWord8 4 >> putFloat32be x
465 put (CLong x) = putWord8 5 >> put x
466 put (CDouble x) = putWord8 6 >> putFloat64be x
467 put (CNameType i j) = putWord8 12 >> put i >> put j
470 put (fromIntegral (B.length bs) :: Word16)
472 put (CUnicode bs) = do
474 put (fromIntegral (B.length bs) :: Word16)
483 bs <- getLazyByteString (fromIntegral (l :: Word16))
487 bs <- getLazyByteString (fromIntegral (l :: Word16))
489 3 -> CInteger <$> get
490 4 -> CFloat <$> getFloat32be
492 6 -> CDouble <$> getFloat64be
495 9 -> CField <$> get <*> get
496 10 -> CMethod <$> get <*> get
497 11 -> CIfaceMethod <$> get <*> get
498 12 -> CNameType <$> get <*> get
499 _ -> fail $ "Unknown constants pool entry tag: " ++ show tag
501 -- | Class field format
502 data Field stage = Field {
503 fieldAccessFlags :: AccessFlags stage,
504 fieldName :: Link stage B.ByteString,
505 fieldSignature :: Link stage FieldSignature,
506 fieldAttributesCount :: Word16,
507 fieldAttributes :: Attributes stage }
509 deriving instance Eq (Field File)
510 deriving instance Eq (Field Direct)
511 deriving instance Show (Field File)
512 deriving instance Show (Field Direct)
514 fieldNameType :: Field Direct -> NameType Field
515 fieldNameType f = NameType (fieldName f) (fieldSignature f)
517 instance Binary (Field File) where
518 put (Field {..}) = do
522 put fieldAttributesCount
523 forM_ (attributesList fieldAttributes) put
530 as <- replicateM (fromIntegral n) get
531 return $ Field af ni si n (AP as)
533 -- | Class method format
534 data Method stage = Method {
535 methodAccessFlags :: AccessFlags stage,
536 methodName :: Link stage B.ByteString,
537 methodSignature :: Link stage MethodSignature,
538 methodAttributesCount :: Word16,
539 methodAttributes :: Attributes stage }
541 deriving instance Eq (Method File)
542 deriving instance Eq (Method Direct)
543 deriving instance Show (Method File)
544 deriving instance Show (Method Direct)
546 methodNameType :: Method Direct -> NameType Method
547 methodNameType m = NameType (methodName m) (methodSignature m)
549 instance Binary (Method File) where
550 put (Method {..}) = do
551 put methodAccessFlags
554 put methodAttributesCount
555 forM_ (attributesList methodAttributes) put
563 as <- replicateM (fromIntegral n) get
565 methodAccessFlags = af,
567 methodSignature = si,
568 methodAttributesCount = n,
569 methodAttributes = AP as }
571 -- | Any (class/ field/ method/ ...) attribute format.
572 -- Some formats specify special formats for @attributeValue@.
573 data Attribute = Attribute {
574 attributeName :: Word16,
575 attributeLength :: Word32,
576 attributeValue :: B.ByteString }
579 instance Binary Attribute where
580 put (Attribute {..}) = do
582 putWord32be attributeLength
583 putLazyByteString attributeValue
589 value <- getLazyByteString (fromIntegral len)
590 return $ Attribute name len value
592 class HasAttributes a where
593 attributes :: a stage -> Attributes stage
595 instance HasAttributes Class where
596 attributes = classAttributes
598 instance HasAttributes Field where
599 attributes = fieldAttributes
601 instance HasAttributes Method where
602 attributes = methodAttributes