1 {-# LANGUAGE RecordWildCards, BangPatterns, TypeFamilies, StandaloneDeriving, FlexibleContexts, FlexibleInstances, UndecidableInstances, TypeSynonymInstances #-}
2 -- | This module declares (low-level) data types for Java .class files
3 -- structures, and Binary instances to read/write them.
8 -- * Internal class file structures
12 FieldSignature, MethodSignature (..), ReturnSignature (..),
13 ArgumentSignature (..),
16 -- * Staged structures
18 Method (..), Field (..), Class (..),
20 AccessFlag (..), AccessFlags,
24 HasSignature (..), HasAttributes (..),
26 fieldNameType, methodNameType,
27 lookupField, lookupMethod,
31 apsize, arsize, arlist
36 import Control.Monad.Trans (lift)
37 import Control.Applicative
38 import qualified Control.Monad.State as St
40 import Data.Binary.IEEE754
41 import Data.Binary.Get
42 import Data.Binary.Put
46 import qualified Data.Set as S
47 import qualified Data.Map as M
48 import qualified Data.ByteString.Lazy as B
49 import Codec.Binary.UTF8.String hiding (encode, decode)
53 -- Java .class file uses constants pool, which stores almost all source-code-level
54 -- constants (strings, integer literals etc), and also all identifiers (class,
55 -- method, field names etc). All other structures contain indexes of constants in
56 -- the pool instead of constants theirself.
58 -- It's not convient to use that indexes programmatically. So, .class file is represented
59 -- at two stages: File and Direct. At File stage, all data structures contain only indexes,
60 -- not constants theirself. When we read a class from a file, we get structure at File stage.
61 -- We only can write File stage structure to file.
63 -- At Direct stage, structures conain constants, not indexes. Convertion functions (File <-> Direct)
64 -- are located in the JVM.Converter module.
67 -- | Read one-byte Char
71 return $ chr (fromIntegral x)
73 toString :: B.ByteString -> String
74 toString bstr = decodeString $ map (chr . fromIntegral) $ B.unpack bstr
79 -- | Direct representation stage
82 -- | Link to some object
83 type family Link stage a
85 -- | At File stage, Link contain index of object in the constants pool.
86 type instance Link File a = Word16
88 -- | At Direct stage, Link contain object itself.
89 type instance Link Direct a = a
91 -- | Object (class, method, field …) access flags
92 type family AccessFlags stage
94 -- | At File stage, access flags are represented as Word16
95 type instance AccessFlags File = Word16
97 -- | At Direct stage, access flags are represented as set of flags.
98 type instance AccessFlags Direct = S.Set AccessFlag
100 -- | Object (class, method, field) attributes
101 data family Attributes stage
103 -- | At File stage, attributes are represented as list of Attribute structures.
104 data instance Attributes File = AP {attributesList :: [Attribute]}
107 instance Default (Attributes File) where
110 -- | At Direct stage, attributes are represented as a Map.
111 data instance Attributes Direct = AR (M.Map B.ByteString B.ByteString)
114 instance Default (Attributes Direct) where
117 -- | Size of attributes set at Direct stage
118 arsize :: Attributes Direct -> Int
119 arsize (AR m) = M.size m
121 -- | Associative list of attributes at Direct stage
122 arlist :: Attributes Direct -> [(B.ByteString, B.ByteString)]
123 arlist (AR m) = M.assocs m
125 -- | Size of attributes set at File stage
126 apsize :: Attributes File -> Int
127 apsize (AP list) = length list
129 -- | Access flags. Used for classess, methods, variables.
131 ACC_PUBLIC -- ^ 0x0001 Visible for all
132 | ACC_PRIVATE -- ^ 0x0002 Visible only for defined class
133 | ACC_PROTECTED -- ^ 0x0004 Visible only for subclasses
134 | ACC_STATIC -- ^ 0x0008 Static method or variable
135 | ACC_FINAL -- ^ 0x0010 No further subclassing or assignments
136 | ACC_SYNCHRONIZED -- ^ 0x0020 Uses monitors
137 | ACC_VOLATILE -- ^ 0x0040 Could not be cached
138 | ACC_TRANSIENT -- ^ 0x0080
139 | ACC_NATIVE -- ^ 0x0100 Implemented in other language
140 | ACC_INTERFACE -- ^ 0x0200 Class is interface
141 | ACC_ABSTRACT -- ^ 0x0400
142 deriving (Eq, Show, Ord, Enum)
144 -- | Fields and methods have signatures.
145 class (Binary (Signature a), Show (Signature a), Eq (Signature a))
146 => HasSignature a where
149 instance HasSignature Field where
150 type Signature Field = FieldSignature
152 instance HasSignature Method where
153 type Signature Method = MethodSignature
155 -- | Name and signature pair. Used for methods and fields.
156 data NameType a = NameType {
157 ntName :: B.ByteString,
158 ntSignature :: Signature a }
160 instance (HasSignature a) => Show (NameType a) where
161 show (NameType n t) = toString n ++ ": " ++ show t
163 deriving instance HasSignature a => Eq (NameType a)
165 instance HasSignature a => Binary (NameType a) where
166 put (NameType n t) = putLazyByteString n >> put t
168 get = NameType <$> get <*> get
170 -- | Constant pool item
171 data Constant stage =
172 CClass (Link stage B.ByteString)
173 | CField (Link stage B.ByteString) (Link stage (NameType Field))
174 | CMethod (Link stage B.ByteString) (Link stage (NameType Method))
175 | CIfaceMethod (Link stage B.ByteString) (Link stage (NameType Method))
176 | CString (Link stage B.ByteString)
181 | CNameType (Link stage B.ByteString) (Link stage B.ByteString)
182 | CUTF8 {getString :: B.ByteString}
183 | CUnicode {getString :: B.ByteString}
185 -- | Name of the CClass. Error on any other constant.
186 className :: Constant Direct -> B.ByteString
187 className (CClass s) = s
188 className x = error $ "Not a class: " ++ show x
190 instance Show (Constant Direct) where
191 show (CClass name) = "class " ++ toString name
192 show (CField cls nt) = "field " ++ toString cls ++ "." ++ show nt
193 show (CMethod cls nt) = "method " ++ toString cls ++ "." ++ show nt
194 show (CIfaceMethod cls nt) = "interface method " ++ toString cls ++ "." ++ show nt
195 show (CString s) = "String \"" ++ toString s ++ "\""
196 show (CInteger x) = show x
197 show (CFloat x) = show x
198 show (CLong x) = show x
199 show (CDouble x) = show x
200 show (CNameType name tp) = toString name ++ ": " ++ toString tp
201 show (CUTF8 s) = "UTF8 \"" ++ toString s ++ "\""
202 show (CUnicode s) = "Unicode \"" ++ toString s ++ "\""
205 type Pool stage = M.Map Word16 (Constant stage)
207 -- | Generic .class file format
208 data Class stage = Class {
209 magic :: Word32, -- ^ Magic value: 0xCAFEBABE
210 minorVersion :: Word16,
211 majorVersion :: Word16,
212 constsPoolSize :: Word16, -- ^ Number of items in constants pool
213 constsPool :: Pool stage, -- ^ Constants pool itself
214 accessFlags :: AccessFlags stage, -- ^ See @JVM.Types.AccessFlag@
215 thisClass :: Link stage B.ByteString, -- ^ Constants pool item index for this class
216 superClass :: Link stage B.ByteString, -- ^ --/-- for super class, zero for java.lang.Object
217 interfacesCount :: Word16, -- ^ Number of implemented interfaces
218 interfaces :: [Link stage B.ByteString], -- ^ Constants pool item indexes for implemented interfaces
219 classFieldsCount :: Word16, -- ^ Number of class fileds
220 classFields :: [Field stage], -- ^ Class fields
221 classMethodsCount :: Word16, -- ^ Number of class methods
222 classMethods :: [Method stage], -- ^ Class methods
223 classAttributesCount :: Word16, -- ^ Number of class attributes
224 classAttributes :: Attributes stage -- ^ Class attributes
227 deriving instance Eq (Class File)
228 deriving instance Eq (Class Direct)
229 deriving instance Show (Class File)
230 deriving instance Show (Class Direct)
232 deriving instance Eq (Constant File)
233 deriving instance Eq (Constant Direct)
234 deriving instance Show (Constant File)
236 -- | Default (empty) class file definition.
237 defaultClass :: (Default (AccessFlags stage), Default (Link stage B.ByteString), Default (Attributes stage))
239 defaultClass = Class {
250 classFieldsCount = 0,
252 classMethodsCount = 0,
254 classAttributesCount = 0,
255 classAttributes = def }
257 instance Binary (Class File) where
258 put (Class {..}) = do
269 forM_ classFields put
270 put classMethodsCount
271 forM_ classMethods put
272 put classAttributesCount
273 forM_ (attributesList classAttributes) put
277 when (magic /= 0xCAFEBABE) $
278 fail $ "Invalid .class file MAGIC value: " ++ show magic
282 fail $ "Too new .class file format: " ++ show major
283 poolsize <- getWord16be
284 pool <- getPool (poolsize - 1)
288 interfacesCount <- get
289 ifaces <- replicateM (fromIntegral interfacesCount) get
290 classFieldsCount <- getWord16be
291 classFields <- replicateM (fromIntegral classFieldsCount) get
292 classMethodsCount <- get
293 classMethods <- replicateM (fromIntegral classMethodsCount) get
295 as <- replicateM (fromIntegral $ asCount) get
296 return $ Class magic minor major poolsize pool af this super
297 interfacesCount ifaces classFieldsCount classFields
298 classMethodsCount classMethods asCount (AP as)
300 -- | Field signature format
310 | ObjectType String -- ^ L @{class name}@
311 | Array (Maybe Int) FieldType -- ^ @[{type}@
314 instance Show FieldType where
315 show SignedByte = "byte"
316 show CharByte = "char"
317 show DoubleType = "double"
318 show FloatType = "float"
320 show LongInt = "long"
321 show ShortInt = "short"
322 show BoolType = "bool"
323 show (ObjectType s) = "Object " ++ s
324 show (Array Nothing t) = show t ++ "[]"
325 show (Array (Just n) t) = show t ++ "[" ++ show n ++ "]"
327 -- | Class field signature
328 type FieldSignature = FieldType
330 -- | Try to read integer value from decimal representation
331 getInt :: Get (Maybe Int)
336 else return $ Just (read s)
338 getDigits :: Get [Char]
340 c <- lookAhead getChar8
348 putString :: String -> Put
349 putString str = forM_ str put
351 instance Binary FieldType where
352 put SignedByte = put 'B'
353 put CharByte = put 'C'
354 put DoubleType = put 'D'
355 put FloatType = put 'F'
356 put IntType = put 'I'
357 put LongInt = put 'J'
358 put ShortInt = put 'S'
359 put BoolType = put 'Z'
360 put (ObjectType name) = put 'L' >> putString name >> put ';'
361 put (Array Nothing sig) = put '[' >> put sig
362 put (Array (Just n) sig) = put '[' >> put (show n) >> put sig
367 'B' -> return SignedByte
368 'C' -> return CharByte
369 'D' -> return DoubleType
370 'F' -> return FloatType
371 'I' -> return IntType
372 'J' -> return LongInt
373 'S' -> return ShortInt
374 'Z' -> return BoolType
376 name <- getToSemicolon
377 return (ObjectType name)
381 return (Array mbSize sig)
382 _ -> fail $ "Unknown signature opening symbol: " ++ [b]
384 -- | Read string up to `;'
385 getToSemicolon :: Get String
391 next <- getToSemicolon
394 -- | Return value signature
395 data ReturnSignature =
400 instance Show ReturnSignature where
401 show (Returns t) = show t
402 show ReturnsVoid = "Void"
404 instance Binary ReturnSignature where
405 put (Returns sig) = put sig
406 put ReturnsVoid = put 'V'
409 x <- lookAhead getChar8
411 'V' -> skip 1 >> return ReturnsVoid
414 -- | Method argument signature
415 type ArgumentSignature = FieldType
417 -- | Class method argument signature
418 data MethodSignature =
419 MethodSignature [ArgumentSignature] ReturnSignature
422 instance Show MethodSignature where
423 show (MethodSignature args ret) = "(" ++ intercalate ", " (map show args) ++ ") returns " ++ show ret
425 instance Binary MethodSignature where
426 put (MethodSignature args ret) = do
435 fail "Cannot parse method signature: no starting `(' !"
439 fail "Internal error: method signature without `)' !?"
441 return (MethodSignature args ret)
443 -- | Read arguments signatures (up to `)')
444 getArgs :: Get [ArgumentSignature]
445 getArgs = whileJust getArg
447 getArg :: Get (Maybe ArgumentSignature)
449 x <- lookAhead getChar8
454 whileJust :: (Monad m) => m (Maybe a) -> m [a]
463 long :: Constant stage -> Bool
464 long (CLong _) = True
465 long (CDouble _) = True
468 putPool :: Pool File -> Put
470 let list = M.elems pool
471 d = length $ filter long list
472 putWord16be $ fromIntegral (M.size pool + d + 1)
475 putC (CClass i) = putWord8 7 >> put i
476 putC (CField i j) = putWord8 9 >> put i >> put j
477 putC (CMethod i j) = putWord8 10 >> put i >> put j
478 putC (CIfaceMethod i j) = putWord8 11 >> put i >> put j
479 putC (CString i) = putWord8 8 >> put i
480 putC (CInteger x) = putWord8 3 >> put x
481 putC (CFloat x) = putWord8 4 >> putFloat32be x
482 putC (CLong x) = putWord8 5 >> put x
483 putC (CDouble x) = putWord8 6 >> putFloat64be x
484 putC (CNameType i j) = putWord8 12 >> put i >> put j
487 put (fromIntegral (B.length bs) :: Word16)
489 putC (CUnicode bs) = do
491 put (fromIntegral (B.length bs) :: Word16)
494 getPool :: Word16 -> Get (Pool File)
496 items <- St.evalStateT go 1
497 return $ M.fromList items
499 go :: St.StateT Word16 Get [(Word16, Constant File)]
519 bs <- getLazyByteString (fromIntegral (l :: Word16))
523 bs <- getLazyByteString (fromIntegral (l :: Word16))
525 3 -> CInteger <$> get
526 4 -> CFloat <$> getFloat32be
528 6 -> CDouble <$> getFloat64be
531 9 -> CField <$> get <*> get
532 10 -> CMethod <$> get <*> get
533 11 -> CIfaceMethod <$> get <*> get
534 12 -> CNameType <$> get <*> get
535 _ -> fail $ "Unknown constants pool entry tag: " ++ show tag
536 -- _ -> return $ CInteger 0
538 -- | Class field format
539 data Field stage = Field {
540 fieldAccessFlags :: AccessFlags stage,
541 fieldName :: Link stage B.ByteString,
542 fieldSignature :: Link stage FieldSignature,
543 fieldAttributesCount :: Word16,
544 fieldAttributes :: Attributes stage }
546 deriving instance Eq (Field File)
547 deriving instance Eq (Field Direct)
548 deriving instance Show (Field File)
549 deriving instance Show (Field Direct)
551 lookupField :: B.ByteString -> Class Direct -> Maybe (Field Direct)
552 lookupField name cls = look (classFields cls)
556 | fieldName f == name = Just f
557 | otherwise = look fs
559 fieldNameType :: Field Direct -> NameType Field
560 fieldNameType f = NameType (fieldName f) (fieldSignature f)
562 instance Binary (Field File) where
563 put (Field {..}) = do
567 put fieldAttributesCount
568 forM_ (attributesList fieldAttributes) put
575 as <- replicateM (fromIntegral n) get
576 return $ Field af ni si n (AP as)
578 -- | Class method format
579 data Method stage = Method {
580 methodAccessFlags :: AccessFlags stage,
581 methodName :: Link stage B.ByteString,
582 methodSignature :: Link stage MethodSignature,
583 methodAttributesCount :: Word16,
584 methodAttributes :: Attributes stage }
586 deriving instance Eq (Method File)
587 deriving instance Eq (Method Direct)
588 deriving instance Show (Method File)
589 deriving instance Show (Method Direct)
591 methodNameType :: Method Direct -> NameType Method
592 methodNameType m = NameType (methodName m) (methodSignature m)
594 lookupMethod :: B.ByteString -> Class Direct -> Maybe (Method Direct)
595 lookupMethod name cls = look (classMethods cls)
599 | methodName f == name = Just f
600 | otherwise = look fs
602 instance Binary (Method File) where
603 put (Method {..}) = do
604 put methodAccessFlags
607 put methodAttributesCount
608 forM_ (attributesList methodAttributes) put
616 as <- replicateM (fromIntegral n) get
618 methodAccessFlags = af,
620 methodSignature = si,
621 methodAttributesCount = n,
622 methodAttributes = AP as }
624 -- | Any (class/ field/ method/ ...) attribute format.
625 -- Some formats specify special formats for @attributeValue@.
626 data Attribute = Attribute {
627 attributeName :: Word16,
628 attributeLength :: Word32,
629 attributeValue :: B.ByteString }
632 instance Binary Attribute where
633 put (Attribute {..}) = do
635 putWord32be attributeLength
636 putLazyByteString attributeValue
642 value <- getLazyByteString (fromIntegral len)
643 return $ Attribute name len value
645 class HasAttributes a where
646 attributes :: a stage -> Attributes stage
648 instance HasAttributes Class where
649 attributes = classAttributes
651 instance HasAttributes Field where
652 attributes = fieldAttributes
654 instance HasAttributes Method where
655 attributes = methodAttributes