1 {-# LANGUAGE RecordWildCards, BangPatterns, TypeFamilies, StandaloneDeriving, FlexibleContexts, FlexibleInstances, UndecidableInstances, TypeSynonymInstances #-}
2 -- | This module declares (low-level) data types for Java .class files
3 -- structures, and Binary instances to read/write them.
9 -- * Internal class file structures
13 FieldSignature, MethodSignature (..), ReturnSignature (..),
14 ArgumentSignature (..),
17 -- * Staged structures
19 Method (..), Field (..), Class (..),
21 AccessFlag (..), AccessFlags,
25 HasSignature (..), HasAttributes (..),
27 fieldNameType, methodNameType,
28 lookupField, lookupMethod,
32 apsize, arsize, arlist
37 import Control.Monad.Trans (lift)
38 import Control.Applicative
39 import qualified Control.Monad.State as St
41 import Data.Binary.IEEE754
42 import Data.Binary.Get
43 import Data.Binary.Put
47 import qualified Data.Set as S
48 import qualified Data.Map as M
49 import qualified Data.ByteString.Lazy as B
50 import Codec.Binary.UTF8.String hiding (encode, decode)
54 -- Java .class file uses constants pool, which stores almost all source-code-level
55 -- constants (strings, integer literals etc), and also all identifiers (class,
56 -- method, field names etc). All other structures contain indexes of constants in
57 -- the pool instead of constants theirself.
59 -- It's not convient to use that indexes programmatically. So, .class file is represented
60 -- at two stages: File and Direct. At File stage, all data structures contain only indexes,
61 -- not constants theirself. When we read a class from a file, we get structure at File stage.
62 -- We only can write File stage structure to file.
64 -- At Direct stage, structures conain constants, not indexes. Convertion functions (File <-> Direct)
65 -- are located in the JVM.Converter module.
68 -- | Read one-byte Char
72 return $ chr (fromIntegral x)
74 toString :: B.ByteString -> String
75 toString bstr = decodeString $ map (chr . fromIntegral) $ B.unpack bstr
80 -- | Direct representation stage
83 -- | Link to some object
84 type family Link stage a
86 -- | At File stage, Link contain index of object in the constants pool.
87 type instance Link File a = Word16
89 -- | At Direct stage, Link contain object itself.
90 type instance Link Direct a = a
92 -- | Object (class, method, field …) access flags
93 type family AccessFlags stage
95 -- | At File stage, access flags are represented as Word16
96 type instance AccessFlags File = Word16
98 -- | At Direct stage, access flags are represented as set of flags.
99 type instance AccessFlags Direct = S.Set AccessFlag
101 -- | Object (class, method, field) attributes
102 data family Attributes stage
104 -- | At File stage, attributes are represented as list of Attribute structures.
105 data instance Attributes File = AP {attributesList :: [Attribute]}
108 instance Default (Attributes File) where
111 -- | At Direct stage, attributes are represented as a Map.
112 data instance Attributes Direct = AR (M.Map B.ByteString B.ByteString)
115 instance Default (Attributes Direct) where
118 -- | Size of attributes set at Direct stage
119 arsize :: Attributes Direct -> Int
120 arsize (AR m) = M.size m
122 -- | Associative list of attributes at Direct stage
123 arlist :: Attributes Direct -> [(B.ByteString, B.ByteString)]
124 arlist (AR m) = M.assocs m
126 -- | Size of attributes set at File stage
127 apsize :: Attributes File -> Int
128 apsize (AP list) = length list
130 -- | Access flags. Used for classess, methods, variables.
132 ACC_PUBLIC -- ^ 0x0001 Visible for all
133 | ACC_PRIVATE -- ^ 0x0002 Visible only for defined class
134 | ACC_PROTECTED -- ^ 0x0004 Visible only for subclasses
135 | ACC_STATIC -- ^ 0x0008 Static method or variable
136 | ACC_FINAL -- ^ 0x0010 No further subclassing or assignments
137 | ACC_SYNCHRONIZED -- ^ 0x0020 Uses monitors
138 | ACC_VOLATILE -- ^ 0x0040 Could not be cached
139 | ACC_TRANSIENT -- ^ 0x0080
140 | ACC_NATIVE -- ^ 0x0100 Implemented in other language
141 | ACC_INTERFACE -- ^ 0x0200 Class is interface
142 | ACC_ABSTRACT -- ^ 0x0400
143 deriving (Eq, Show, Ord, Enum)
145 -- | Fields and methods have signatures.
146 class (Binary (Signature a), Show (Signature a), Eq (Signature a))
147 => HasSignature a where
150 instance HasSignature Field where
151 type Signature Field = FieldSignature
153 instance HasSignature Method where
154 type Signature Method = MethodSignature
156 -- | Name and signature pair. Used for methods and fields.
157 data NameType a = NameType {
158 ntName :: B.ByteString,
159 ntSignature :: Signature a }
161 instance (HasSignature a) => Show (NameType a) where
162 show (NameType n t) = toString n ++ ": " ++ show t
164 deriving instance HasSignature a => Eq (NameType a)
166 instance HasSignature a => Binary (NameType a) where
167 put (NameType n t) = putLazyByteString n >> put t
169 get = NameType <$> get <*> get
171 -- | Constant pool item
172 data Constant stage =
173 CClass (Link stage B.ByteString)
174 | CField (Link stage B.ByteString) (Link stage (NameType Field))
175 | CMethod (Link stage B.ByteString) (Link stage (NameType Method))
176 | CIfaceMethod (Link stage B.ByteString) (Link stage (NameType Method))
177 | CString (Link stage B.ByteString)
182 | CNameType (Link stage B.ByteString) (Link stage B.ByteString)
183 | CUTF8 {getString :: B.ByteString}
184 | CUnicode {getString :: B.ByteString}
186 -- | Name of the CClass. Error on any other constant.
187 className :: Constant Direct -> B.ByteString
188 className (CClass s) = s
189 className x = error $ "Not a class: " ++ show x
191 instance Show (Constant Direct) where
192 show (CClass name) = "class " ++ toString name
193 show (CField cls nt) = "field " ++ toString cls ++ "." ++ show nt
194 show (CMethod cls nt) = "method " ++ toString cls ++ "." ++ show nt
195 show (CIfaceMethod cls nt) = "interface method " ++ toString cls ++ "." ++ show nt
196 show (CString s) = "String \"" ++ toString s ++ "\""
197 show (CInteger x) = show x
198 show (CFloat x) = show x
199 show (CLong x) = show x
200 show (CDouble x) = show x
201 show (CNameType name tp) = toString name ++ ": " ++ toString tp
202 show (CUTF8 s) = "UTF8 \"" ++ toString s ++ "\""
203 show (CUnicode s) = "Unicode \"" ++ toString s ++ "\""
206 type Pool stage = M.Map Word16 (Constant stage)
208 -- | Generic .class file format
209 data Class stage = Class {
210 magic :: Word32, -- ^ Magic value: 0xCAFEBABE
211 minorVersion :: Word16,
212 majorVersion :: Word16,
213 constsPoolSize :: Word16, -- ^ Number of items in constants pool
214 constsPool :: Pool stage, -- ^ Constants pool itself
215 accessFlags :: AccessFlags stage, -- ^ See @JVM.Types.AccessFlag@
216 thisClass :: Link stage B.ByteString, -- ^ Constants pool item index for this class
217 superClass :: Link stage B.ByteString, -- ^ --/-- for super class, zero for java.lang.Object
218 interfacesCount :: Word16, -- ^ Number of implemented interfaces
219 interfaces :: [Link stage B.ByteString], -- ^ Constants pool item indexes for implemented interfaces
220 classFieldsCount :: Word16, -- ^ Number of class fileds
221 classFields :: [Field stage], -- ^ Class fields
222 classMethodsCount :: Word16, -- ^ Number of class methods
223 classMethods :: [Method stage], -- ^ Class methods
224 classAttributesCount :: Word16, -- ^ Number of class attributes
225 classAttributes :: Attributes stage -- ^ Class attributes
228 deriving instance Eq (Class File)
229 deriving instance Eq (Class Direct)
230 deriving instance Show (Class File)
231 deriving instance Show (Class Direct)
233 deriving instance Eq (Constant File)
234 deriving instance Eq (Constant Direct)
235 deriving instance Show (Constant File)
237 -- | Default (empty) class file definition.
238 defaultClass :: (Default (AccessFlags stage), Default (Link stage B.ByteString), Default (Attributes stage))
240 defaultClass = Class {
251 classFieldsCount = 0,
253 classMethodsCount = 0,
255 classAttributesCount = 0,
256 classAttributes = def }
258 instance Binary (Class File) where
259 put (Class {..}) = do
270 forM_ classFields put
271 put classMethodsCount
272 forM_ classMethods put
273 put classAttributesCount
274 forM_ (attributesList classAttributes) put
278 when (magic /= 0xCAFEBABE) $
279 fail $ "Invalid .class file MAGIC value: " ++ show magic
283 fail $ "Too new .class file format: " ++ show major
284 poolsize <- getWord16be
285 pool <- getPool (poolsize - 1)
289 interfacesCount <- get
290 ifaces <- replicateM (fromIntegral interfacesCount) get
291 classFieldsCount <- getWord16be
292 classFields <- replicateM (fromIntegral classFieldsCount) get
293 classMethodsCount <- get
294 classMethods <- replicateM (fromIntegral classMethodsCount) get
296 as <- replicateM (fromIntegral $ asCount) get
297 return $ Class magic minor major poolsize pool af this super
298 interfacesCount ifaces classFieldsCount classFields
299 classMethodsCount classMethods asCount (AP as)
301 -- | Field signature format
311 | ObjectType String -- ^ L @{class name}@
312 | Array (Maybe Int) FieldType -- ^ @[{type}@
315 instance Show FieldType where
316 show SignedByte = "byte"
317 show CharByte = "char"
318 show DoubleType = "double"
319 show FloatType = "float"
321 show LongInt = "long"
322 show ShortInt = "short"
323 show BoolType = "bool"
324 show (ObjectType s) = "Object " ++ s
325 show (Array Nothing t) = show t ++ "[]"
326 show (Array (Just n) t) = show t ++ "[" ++ show n ++ "]"
328 -- | Class field signature
329 type FieldSignature = FieldType
331 -- | Try to read integer value from decimal representation
332 getInt :: Get (Maybe Int)
337 else return $ Just (read s)
339 getDigits :: Get [Char]
341 c <- lookAhead getChar8
349 putString :: String -> Put
350 putString str = forM_ str put
352 instance Binary FieldType where
353 put SignedByte = put 'B'
354 put CharByte = put 'C'
355 put DoubleType = put 'D'
356 put FloatType = put 'F'
357 put IntType = put 'I'
358 put LongInt = put 'J'
359 put ShortInt = put 'S'
360 put BoolType = put 'Z'
361 put (ObjectType name) = put 'L' >> putString name >> put ';'
362 put (Array Nothing sig) = put '[' >> put sig
363 put (Array (Just n) sig) = put '[' >> put (show n) >> put sig
368 'B' -> return SignedByte
369 'C' -> return CharByte
370 'D' -> return DoubleType
371 'F' -> return FloatType
372 'I' -> return IntType
373 'J' -> return LongInt
374 'S' -> return ShortInt
375 'Z' -> return BoolType
377 name <- getToSemicolon
378 return (ObjectType name)
382 return (Array mbSize sig)
383 _ -> fail $ "Unknown signature opening symbol: " ++ [b]
385 -- | Read string up to `;'
386 getToSemicolon :: Get String
392 next <- getToSemicolon
395 -- | Return value signature
396 data ReturnSignature =
401 instance Show ReturnSignature where
402 show (Returns t) = show t
403 show ReturnsVoid = "Void"
405 instance Binary ReturnSignature where
406 put (Returns sig) = put sig
407 put ReturnsVoid = put 'V'
410 x <- lookAhead getChar8
412 'V' -> skip 1 >> return ReturnsVoid
415 -- | Method argument signature
416 type ArgumentSignature = FieldType
418 -- | Class method argument signature
419 data MethodSignature =
420 MethodSignature [ArgumentSignature] ReturnSignature
423 instance Show MethodSignature where
424 show (MethodSignature args ret) = "(" ++ intercalate ", " (map show args) ++ ") returns " ++ show ret
426 instance Binary MethodSignature where
427 put (MethodSignature args ret) = do
436 fail "Cannot parse method signature: no starting `(' !"
440 fail "Internal error: method signature without `)' !?"
442 return (MethodSignature args ret)
444 -- | Read arguments signatures (up to `)')
445 getArgs :: Get [ArgumentSignature]
446 getArgs = whileJust getArg
448 getArg :: Get (Maybe ArgumentSignature)
450 x <- lookAhead getChar8
455 whileJust :: (Monad m) => m (Maybe a) -> m [a]
464 long :: Constant stage -> Bool
465 long (CLong _) = True
466 long (CDouble _) = True
469 putPool :: Pool File -> Put
471 let list = M.elems pool
472 d = length $ filter long list
473 putWord16be $ fromIntegral (M.size pool + d + 1)
476 putC (CClass i) = putWord8 7 >> put i
477 putC (CField i j) = putWord8 9 >> put i >> put j
478 putC (CMethod i j) = putWord8 10 >> put i >> put j
479 putC (CIfaceMethod i j) = putWord8 11 >> put i >> put j
480 putC (CString i) = putWord8 8 >> put i
481 putC (CInteger x) = putWord8 3 >> put x
482 putC (CFloat x) = putWord8 4 >> putFloat32be x
483 putC (CLong x) = putWord8 5 >> put x
484 putC (CDouble x) = putWord8 6 >> putFloat64be x
485 putC (CNameType i j) = putWord8 12 >> put i >> put j
488 put (fromIntegral (B.length bs) :: Word16)
490 putC (CUnicode bs) = do
492 put (fromIntegral (B.length bs) :: Word16)
495 getPool :: Word16 -> Get (Pool File)
497 items <- St.evalStateT go 1
498 return $ M.fromList items
500 go :: St.StateT Word16 Get [(Word16, Constant File)]
520 bs <- getLazyByteString (fromIntegral (l :: Word16))
524 bs <- getLazyByteString (fromIntegral (l :: Word16))
526 3 -> CInteger <$> get
527 4 -> CFloat <$> getFloat32be
529 6 -> CDouble <$> getFloat64be
532 9 -> CField <$> get <*> get
533 10 -> CMethod <$> get <*> get
534 11 -> CIfaceMethod <$> get <*> get
535 12 -> CNameType <$> get <*> get
536 _ -> fail $ "Unknown constants pool entry tag: " ++ show tag
537 -- _ -> return $ CInteger 0
539 -- | Class field format
540 data Field stage = Field {
541 fieldAccessFlags :: AccessFlags stage,
542 fieldName :: Link stage B.ByteString,
543 fieldSignature :: Link stage FieldSignature,
544 fieldAttributesCount :: Word16,
545 fieldAttributes :: Attributes stage }
547 deriving instance Eq (Field File)
548 deriving instance Eq (Field Direct)
549 deriving instance Show (Field File)
550 deriving instance Show (Field Direct)
552 lookupField :: B.ByteString -> Class Direct -> Maybe (Field Direct)
553 lookupField name cls = look (classFields cls)
557 | fieldName f == name = Just f
558 | otherwise = look fs
560 fieldNameType :: Field Direct -> NameType Field
561 fieldNameType f = NameType (fieldName f) (fieldSignature f)
563 instance Binary (Field File) where
564 put (Field {..}) = do
568 put fieldAttributesCount
569 forM_ (attributesList fieldAttributes) put
576 as <- replicateM (fromIntegral n) get
577 return $ Field af ni si n (AP as)
579 -- | Class method format
580 data Method stage = Method {
581 methodAccessFlags :: AccessFlags stage,
582 methodName :: Link stage B.ByteString,
583 methodSignature :: Link stage MethodSignature,
584 methodAttributesCount :: Word16,
585 methodAttributes :: Attributes stage }
587 deriving instance Eq (Method File)
588 deriving instance Eq (Method Direct)
589 deriving instance Show (Method File)
590 deriving instance Show (Method Direct)
592 methodNameType :: Method Direct -> NameType Method
593 methodNameType m = NameType (methodName m) (methodSignature m)
595 lookupMethod :: B.ByteString -> Class Direct -> Maybe (Method Direct)
596 lookupMethod name cls = look (classMethods cls)
600 | methodName f == name = Just f
601 | otherwise = look fs
603 instance Binary (Method File) where
604 put (Method {..}) = do
605 put methodAccessFlags
608 put methodAttributesCount
609 forM_ (attributesList methodAttributes) put
617 as <- replicateM (fromIntegral n) get
619 methodAccessFlags = af,
621 methodSignature = si,
622 methodAttributesCount = n,
623 methodAttributes = AP as }
625 -- | Any (class/ field/ method/ ...) attribute format.
626 -- Some formats specify special formats for @attributeValue@.
627 data Attribute = Attribute {
628 attributeName :: Word16,
629 attributeLength :: Word32,
630 attributeValue :: B.ByteString }
633 instance Binary Attribute where
634 put (Attribute {..}) = do
636 putWord32be attributeLength
637 putLazyByteString attributeValue
643 value <- getLazyByteString (fromIntegral len)
644 return $ Attribute name len value
646 class HasAttributes a where
647 attributes :: a stage -> Attributes stage
649 instance HasAttributes Class where
650 attributes = classAttributes
652 instance HasAttributes Field where
653 attributes = fieldAttributes
655 instance HasAttributes Method where
656 attributes = methodAttributes