1 {-# LANGUAGE RecordWildCards, BangPatterns, TypeFamilies, StandaloneDeriving, FlexibleContexts, FlexibleInstances, UndecidableInstances, TypeSynonymInstances #-}
2 -- | This module declares (low-level) data types for Java .class files
3 -- structures, and Binary instances to read/write them.
8 -- * Internal class file structures
12 FieldSignature, MethodSignature (..), ReturnSignature (..),
13 ArgumentSignature (..),
16 -- * Staged structures
18 Method (..), Field (..), Class (..),
20 AccessFlag (..), AccessFlags,
24 HasSignature (..), HasAttributes (..),
26 fieldNameType, methodNameType,
27 lookupField, lookupMethod,
30 apsize, arsize, arlist
35 import Control.Monad.Trans (lift)
36 import Control.Applicative
37 import qualified Control.Monad.State as St
39 import Data.Binary.IEEE754
40 import Data.Binary.Get
41 import Data.Binary.Put
45 import qualified Data.Set as S
46 import qualified Data.Map as M
47 import qualified Data.ByteString.Lazy as B
48 import Codec.Binary.UTF8.String hiding (encode, decode)
52 -- Java .class file uses constants pool, which stores almost all source-code-level
53 -- constants (strings, integer literals etc), and also all identifiers (class,
54 -- method, field names etc). All other structures contain indexes of constants in
55 -- the pool instead of constants theirself.
57 -- It's not convient to use that indexes programmatically. So, .class file is represented
58 -- at two stages: File and Direct. At File stage, all data structures contain only indexes,
59 -- not constants theirself. When we read a class from a file, we get structure at File stage.
60 -- We only can write File stage structure to file.
62 -- At Direct stage, structures conain constants, not indexes. Convertion functions (File <-> Direct)
63 -- are located in the JVM.Converter module.
66 -- | Read one-byte Char
70 return $ chr (fromIntegral x)
72 toString :: B.ByteString -> String
73 toString bstr = decodeString $ map (chr . fromIntegral) $ B.unpack bstr
78 -- | Direct representation stage
81 -- | Link to some object
82 type family Link stage a
84 -- | At File stage, Link contain index of object in the constants pool.
85 type instance Link File a = Word16
87 -- | At Direct stage, Link contain object itself.
88 type instance Link Direct a = a
90 -- | Object (class, method, field …) access flags
91 type family AccessFlags stage
93 -- | At File stage, access flags are represented as Word16
94 type instance AccessFlags File = Word16
96 -- | At Direct stage, access flags are represented as set of flags.
97 type instance AccessFlags Direct = S.Set AccessFlag
99 -- | Object (class, method, field) attributes
100 data family Attributes stage
102 -- | At File stage, attributes are represented as list of Attribute structures.
103 data instance Attributes File = AP {attributesList :: [Attribute]}
106 instance Default (Attributes File) where
109 -- | At Direct stage, attributes are represented as a Map.
110 data instance Attributes Direct = AR (M.Map B.ByteString B.ByteString)
113 instance Default (Attributes Direct) where
116 -- | Size of attributes set at Direct stage
117 arsize :: Attributes Direct -> Int
118 arsize (AR m) = M.size m
120 -- | Associative list of attributes at Direct stage
121 arlist :: Attributes Direct -> [(B.ByteString, B.ByteString)]
122 arlist (AR m) = M.assocs m
124 -- | Size of attributes set at File stage
125 apsize :: Attributes File -> Int
126 apsize (AP list) = length list
128 -- | Access flags. Used for classess, methods, variables.
130 ACC_PUBLIC -- ^ 0x0001 Visible for all
131 | ACC_PRIVATE -- ^ 0x0002 Visible only for defined class
132 | ACC_PROTECTED -- ^ 0x0004 Visible only for subclasses
133 | ACC_STATIC -- ^ 0x0008 Static method or variable
134 | ACC_FINAL -- ^ 0x0010 No further subclassing or assignments
135 | ACC_SYNCHRONIZED -- ^ 0x0020 Uses monitors
136 | ACC_VOLATILE -- ^ 0x0040 Could not be cached
137 | ACC_TRANSIENT -- ^ 0x0080
138 | ACC_NATIVE -- ^ 0x0100 Implemented in other language
139 | ACC_INTERFACE -- ^ 0x0200 Class is interface
140 | ACC_ABSTRACT -- ^ 0x0400
141 deriving (Eq, Show, Ord, Enum)
143 -- | Fields and methods have signatures.
144 class (Binary (Signature a), Show (Signature a), Eq (Signature a))
145 => HasSignature a where
148 instance HasSignature Field where
149 type Signature Field = FieldSignature
151 instance HasSignature Method where
152 type Signature Method = MethodSignature
154 -- | Name and signature pair. Used for methods and fields.
155 data NameType a = NameType {
156 ntName :: B.ByteString,
157 ntSignature :: Signature a }
159 instance (HasSignature a) => Show (NameType a) where
160 show (NameType n t) = toString n ++ ": " ++ show t
162 deriving instance HasSignature a => Eq (NameType a)
164 instance HasSignature a => Binary (NameType a) where
165 put (NameType n t) = putLazyByteString n >> put t
167 get = NameType <$> get <*> get
169 -- | Constant pool item
170 data Constant stage =
171 CClass (Link stage B.ByteString)
172 | CField (Link stage B.ByteString) (Link stage (NameType Field))
173 | CMethod (Link stage B.ByteString) (Link stage (NameType Method))
174 | CIfaceMethod (Link stage B.ByteString) (Link stage (NameType Method))
175 | CString (Link stage B.ByteString)
180 | CNameType (Link stage B.ByteString) (Link stage B.ByteString)
181 | CUTF8 {getString :: B.ByteString}
182 | CUnicode {getString :: B.ByteString}
184 -- | Name of the CClass. Error on any other constant.
185 className :: Constant Direct -> B.ByteString
186 className (CClass s) = s
187 className x = error $ "Not a class: " ++ show x
189 instance Show (Constant Direct) where
190 show (CClass name) = "class " ++ toString name
191 show (CField cls nt) = "field " ++ toString cls ++ "." ++ show nt
192 show (CMethod cls nt) = "method " ++ toString cls ++ "." ++ show nt
193 show (CIfaceMethod cls nt) = "interface method " ++ toString cls ++ "." ++ show nt
194 show (CString s) = "String \"" ++ toString s ++ "\""
195 show (CInteger x) = show x
196 show (CFloat x) = show x
197 show (CLong x) = show x
198 show (CDouble x) = show x
199 show (CNameType name tp) = toString name ++ ": " ++ toString tp
200 show (CUTF8 s) = "UTF8 \"" ++ toString s ++ "\""
201 show (CUnicode s) = "Unicode \"" ++ toString s ++ "\""
204 type Pool stage = M.Map Word16 (Constant stage)
206 -- | Generic .class file format
207 data Class stage = Class {
208 magic :: Word32, -- ^ Magic value: 0xCAFEBABE
209 minorVersion :: Word16,
210 majorVersion :: Word16,
211 constsPoolSize :: Word16, -- ^ Number of items in constants pool
212 constsPool :: Pool stage, -- ^ Constants pool itself
213 accessFlags :: AccessFlags stage, -- ^ See @JVM.Types.AccessFlag@
214 thisClass :: Link stage B.ByteString, -- ^ Constants pool item index for this class
215 superClass :: Link stage B.ByteString, -- ^ --/-- for super class, zero for java.lang.Object
216 interfacesCount :: Word16, -- ^ Number of implemented interfaces
217 interfaces :: [Link stage B.ByteString], -- ^ Constants pool item indexes for implemented interfaces
218 classFieldsCount :: Word16, -- ^ Number of class fileds
219 classFields :: [Field stage], -- ^ Class fields
220 classMethodsCount :: Word16, -- ^ Number of class methods
221 classMethods :: [Method stage], -- ^ Class methods
222 classAttributesCount :: Word16, -- ^ Number of class attributes
223 classAttributes :: Attributes stage -- ^ Class attributes
226 deriving instance Eq (Class File)
227 deriving instance Eq (Class Direct)
228 deriving instance Show (Class File)
229 deriving instance Show (Class Direct)
231 deriving instance Eq (Constant File)
232 deriving instance Eq (Constant Direct)
233 deriving instance Show (Constant File)
235 -- | Default (empty) class file definition.
236 defaultClass :: (Default (AccessFlags stage), Default (Link stage B.ByteString), Default (Attributes stage))
238 defaultClass = Class {
249 classFieldsCount = 0,
251 classMethodsCount = 0,
253 classAttributesCount = 0,
254 classAttributes = def }
256 instance Binary (Class File) where
257 put (Class {..}) = do
268 forM_ classFields put
269 put classMethodsCount
270 forM_ classMethods put
271 put classAttributesCount
272 forM_ (attributesList classAttributes) put
276 when (magic /= 0xCAFEBABE) $
277 fail $ "Invalid .class file MAGIC value: " ++ show magic
281 fail $ "Too new .class file format: " ++ show major
282 poolsize <- getWord16be
283 pool <- getPool (poolsize - 1)
287 interfacesCount <- get
288 ifaces <- replicateM (fromIntegral interfacesCount) get
289 classFieldsCount <- getWord16be
290 classFields <- replicateM (fromIntegral classFieldsCount) get
291 classMethodsCount <- get
292 classMethods <- replicateM (fromIntegral classMethodsCount) get
294 as <- replicateM (fromIntegral $ asCount) get
295 return $ Class magic minor major poolsize pool af this super
296 interfacesCount ifaces classFieldsCount classFields
297 classMethodsCount classMethods asCount (AP as)
299 -- | Field signature format
309 | ObjectType String -- ^ L @{class name}@
310 | Array (Maybe Int) FieldType -- ^ @[{type}@
313 instance Show FieldType where
314 show SignedByte = "byte"
315 show CharByte = "char"
316 show DoubleType = "double"
317 show FloatType = "float"
319 show LongInt = "long"
320 show ShortInt = "short"
321 show BoolType = "bool"
322 show (ObjectType s) = "Object " ++ s
323 show (Array Nothing t) = show t ++ "[]"
324 show (Array (Just n) t) = show t ++ "[" ++ show n ++ "]"
326 -- | Class field signature
327 type FieldSignature = FieldType
329 -- | Try to read integer value from decimal representation
330 getInt :: Get (Maybe Int)
335 else return $ Just (read s)
337 getDigits :: Get [Char]
339 c <- lookAhead getChar8
347 putString :: String -> Put
348 putString str = forM_ str put
350 instance Binary FieldType where
351 put SignedByte = put 'B'
352 put CharByte = put 'C'
353 put DoubleType = put 'D'
354 put FloatType = put 'F'
355 put IntType = put 'I'
356 put LongInt = put 'J'
357 put ShortInt = put 'S'
358 put BoolType = put 'Z'
359 put (ObjectType name) = put 'L' >> putString name >> put ';'
360 put (Array Nothing sig) = put '[' >> put sig
361 put (Array (Just n) sig) = put '[' >> put (show n) >> put sig
366 'B' -> return SignedByte
367 'C' -> return CharByte
368 'D' -> return DoubleType
369 'F' -> return FloatType
370 'I' -> return IntType
371 'J' -> return LongInt
372 'S' -> return ShortInt
373 'Z' -> return BoolType
375 name <- getToSemicolon
376 return (ObjectType name)
380 return (Array mbSize sig)
381 _ -> fail $ "Unknown signature opening symbol: " ++ [b]
383 -- | Read string up to `;'
384 getToSemicolon :: Get String
390 next <- getToSemicolon
393 -- | Return value signature
394 data ReturnSignature =
399 instance Show ReturnSignature where
400 show (Returns t) = show t
401 show ReturnsVoid = "Void"
403 instance Binary ReturnSignature where
404 put (Returns sig) = put sig
405 put ReturnsVoid = put 'V'
408 x <- lookAhead getChar8
410 'V' -> skip 1 >> return ReturnsVoid
413 -- | Method argument signature
414 type ArgumentSignature = FieldType
416 -- | Class method argument signature
417 data MethodSignature =
418 MethodSignature [ArgumentSignature] ReturnSignature
421 instance Show MethodSignature where
422 show (MethodSignature args ret) = "(" ++ intercalate ", " (map show args) ++ ") returns " ++ show ret
424 instance Binary MethodSignature where
425 put (MethodSignature args ret) = do
434 fail "Cannot parse method signature: no starting `(' !"
438 fail "Internal error: method signature without `)' !?"
440 return (MethodSignature args ret)
442 -- | Read arguments signatures (up to `)')
443 getArgs :: Get [ArgumentSignature]
444 getArgs = whileJust getArg
446 getArg :: Get (Maybe ArgumentSignature)
448 x <- lookAhead getChar8
453 whileJust :: (Monad m) => m (Maybe a) -> m [a]
462 long (CLong _) = True
463 long (CDouble _) = True
466 putPool :: Pool File -> Put
468 let list = M.elems pool
469 d = length $ filter long list
470 putWord16be $ fromIntegral (M.size pool + d + 1)
473 putC (CClass i) = putWord8 7 >> put i
474 putC (CField i j) = putWord8 9 >> put i >> put j
475 putC (CMethod i j) = putWord8 10 >> put i >> put j
476 putC (CIfaceMethod i j) = putWord8 11 >> put i >> put j
477 putC (CString i) = putWord8 8 >> put i
478 putC (CInteger x) = putWord8 3 >> put x
479 putC (CFloat x) = putWord8 4 >> putFloat32be x
480 putC (CLong x) = putWord8 5 >> put x
481 putC (CDouble x) = putWord8 6 >> putFloat64be x
482 putC (CNameType i j) = putWord8 12 >> put i >> put j
485 put (fromIntegral (B.length bs) :: Word16)
487 putC (CUnicode bs) = do
489 put (fromIntegral (B.length bs) :: Word16)
492 getPool :: Word16 -> Get (Pool File)
494 items <- St.evalStateT go 1
495 return $ M.fromList items
497 go :: St.StateT Word16 Get [(Word16, Constant File)]
517 bs <- getLazyByteString (fromIntegral (l :: Word16))
521 bs <- getLazyByteString (fromIntegral (l :: Word16))
523 3 -> CInteger <$> get
524 4 -> CFloat <$> getFloat32be
526 6 -> CDouble <$> getFloat64be
529 9 -> CField <$> get <*> get
530 10 -> CMethod <$> get <*> get
531 11 -> CIfaceMethod <$> get <*> get
532 12 -> CNameType <$> get <*> get
533 _ -> fail $ "Unknown constants pool entry tag: " ++ show tag
534 -- _ -> return $ CInteger 0
536 -- | Class field format
537 data Field stage = Field {
538 fieldAccessFlags :: AccessFlags stage,
539 fieldName :: Link stage B.ByteString,
540 fieldSignature :: Link stage FieldSignature,
541 fieldAttributesCount :: Word16,
542 fieldAttributes :: Attributes stage }
544 deriving instance Eq (Field File)
545 deriving instance Eq (Field Direct)
546 deriving instance Show (Field File)
547 deriving instance Show (Field Direct)
549 lookupField :: B.ByteString -> Class Direct -> Maybe (Field Direct)
550 lookupField name cls = look (classFields cls)
554 | fieldName f == name = Just f
555 | otherwise = look fs
557 fieldNameType :: Field Direct -> NameType Field
558 fieldNameType f = NameType (fieldName f) (fieldSignature f)
560 instance Binary (Field File) where
561 put (Field {..}) = do
565 put fieldAttributesCount
566 forM_ (attributesList fieldAttributes) put
573 as <- replicateM (fromIntegral n) get
574 return $ Field af ni si n (AP as)
576 -- | Class method format
577 data Method stage = Method {
578 methodAccessFlags :: AccessFlags stage,
579 methodName :: Link stage B.ByteString,
580 methodSignature :: Link stage MethodSignature,
581 methodAttributesCount :: Word16,
582 methodAttributes :: Attributes stage }
584 deriving instance Eq (Method File)
585 deriving instance Eq (Method Direct)
586 deriving instance Show (Method File)
587 deriving instance Show (Method Direct)
589 methodNameType :: Method Direct -> NameType Method
590 methodNameType m = NameType (methodName m) (methodSignature m)
592 lookupMethod :: B.ByteString -> Class Direct -> Maybe (Method Direct)
593 lookupMethod name cls = look (classMethods cls)
597 | methodName f == name = Just f
598 | otherwise = look fs
600 instance Binary (Method File) where
601 put (Method {..}) = do
602 put methodAccessFlags
605 put methodAttributesCount
606 forM_ (attributesList methodAttributes) put
614 as <- replicateM (fromIntegral n) get
616 methodAccessFlags = af,
618 methodSignature = si,
619 methodAttributesCount = n,
620 methodAttributes = AP as }
622 -- | Any (class/ field/ method/ ...) attribute format.
623 -- Some formats specify special formats for @attributeValue@.
624 data Attribute = Attribute {
625 attributeName :: Word16,
626 attributeLength :: Word32,
627 attributeValue :: B.ByteString }
630 instance Binary Attribute where
631 put (Attribute {..}) = do
633 putWord32be attributeLength
634 putLazyByteString attributeValue
640 value <- getLazyByteString (fromIntegral len)
641 return $ Attribute name len value
643 class HasAttributes a where
644 attributes :: a stage -> Attributes stage
646 instance HasAttributes Class where
647 attributes = classAttributes
649 instance HasAttributes Field where
650 attributes = fieldAttributes
652 instance HasAttributes Method where
653 attributes = methodAttributes