1 {-# LANGUAGE RecordWildCards, BangPatterns, TypeFamilies, StandaloneDeriving, FlexibleContexts, FlexibleInstances, UndecidableInstances, TypeSynonymInstances #-}
2 -- | This module declares (low-level) data types for Java .class files
3 -- structures, and Binary instances to read/write them.
8 -- * Internal class file structures
12 FieldSignature, MethodSignature (..), ReturnSignature (..),
13 ArgumentSignature (..),
16 -- * Staged structures
18 Method (..), Field (..), Class (..),
20 AccessFlag (..), AccessFlags,
24 HasSignature (..), HasAttributes (..),
26 fieldNameType, methodNameType,
27 lookupField, lookupMethod,
30 apsize, arsize, arlist
35 import Control.Applicative
37 import Data.Binary.IEEE754
38 import Data.Binary.Get
39 import Data.Binary.Put
43 import qualified Data.Set as S
44 import qualified Data.Map as M
45 import qualified Data.ByteString.Lazy as B
46 import Codec.Binary.UTF8.String hiding (encode, decode)
50 -- Java .class file uses constants pool, which stores almost all source-code-level
51 -- constants (strings, integer literals etc), and also all identifiers (class,
52 -- method, field names etc). All other structures contain indexes of constants in
53 -- the pool instead of constants theirself.
55 -- It's not convient to use that indexes programmatically. So, .class file is represented
56 -- at two stages: File and Direct. At File stage, all data structures contain only indexes,
57 -- not constants theirself. When we read a class from a file, we get structure at File stage.
58 -- We only can write File stage structure to file.
60 -- At Direct stage, structures conain constants, not indexes. Convertion functions (File <-> Direct)
61 -- are located in the JVM.Converter module.
64 -- | Read one-byte Char
68 return $ chr (fromIntegral x)
70 toString :: B.ByteString -> String
71 toString bstr = decodeString $ map (chr . fromIntegral) $ B.unpack bstr
76 -- | Direct representation stage
79 -- | Link to some object
80 type family Link stage a
82 -- | At File stage, Link contain index of object in the constants pool.
83 type instance Link File a = Word16
85 -- | At Direct stage, Link contain object itself.
86 type instance Link Direct a = a
88 -- | Object (class, method, field …) access flags
89 type family AccessFlags stage
91 -- | At File stage, access flags are represented as Word16
92 type instance AccessFlags File = Word16
94 -- | At Direct stage, access flags are represented as set of flags.
95 type instance AccessFlags Direct = S.Set AccessFlag
97 -- | Object (class, method, field) attributes
98 data family Attributes stage
100 -- | At File stage, attributes are represented as list of Attribute structures.
101 data instance Attributes File = AP {attributesList :: [Attribute]}
104 instance Default (Attributes File) where
107 -- | At Direct stage, attributes are represented as a Map.
108 data instance Attributes Direct = AR (M.Map B.ByteString B.ByteString)
111 instance Default (Attributes Direct) where
114 -- | Size of attributes set at Direct stage
115 arsize :: Attributes Direct -> Int
116 arsize (AR m) = M.size m
118 -- | Associative list of attributes at Direct stage
119 arlist :: Attributes Direct -> [(B.ByteString, B.ByteString)]
120 arlist (AR m) = M.assocs m
122 -- | Size of attributes set at File stage
123 apsize :: Attributes File -> Int
124 apsize (AP list) = length list
126 -- | Access flags. Used for classess, methods, variables.
128 ACC_PUBLIC -- ^ 0x0001 Visible for all
129 | ACC_PRIVATE -- ^ 0x0002 Visible only for defined class
130 | ACC_PROTECTED -- ^ 0x0004 Visible only for subclasses
131 | ACC_STATIC -- ^ 0x0008 Static method or variable
132 | ACC_FINAL -- ^ 0x0010 No further subclassing or assignments
133 | ACC_SYNCHRONIZED -- ^ 0x0020 Uses monitors
134 | ACC_VOLATILE -- ^ 0x0040 Could not be cached
135 | ACC_TRANSIENT -- ^ 0x0080
136 | ACC_NATIVE -- ^ 0x0100 Implemented in other language
137 | ACC_INTERFACE -- ^ 0x0200 Class is interface
138 | ACC_ABSTRACT -- ^ 0x0400
139 deriving (Eq, Show, Ord, Enum)
141 -- | Fields and methods have signatures.
142 class (Binary (Signature a), Show (Signature a), Eq (Signature a))
143 => HasSignature a where
146 instance HasSignature Field where
147 type Signature Field = FieldSignature
149 instance HasSignature Method where
150 type Signature Method = MethodSignature
152 -- | Name and signature pair. Used for methods and fields.
153 data NameType a = NameType {
154 ntName :: B.ByteString,
155 ntSignature :: Signature a }
157 instance (HasSignature a) => Show (NameType a) where
158 show (NameType n t) = toString n ++ ": " ++ show t
160 deriving instance HasSignature a => Eq (NameType a)
162 instance HasSignature a => Binary (NameType a) where
163 put (NameType n t) = putLazyByteString n >> put t
165 get = NameType <$> get <*> get
167 -- | Constant pool item
168 data Constant stage =
169 CClass (Link stage B.ByteString)
170 | CField (Link stage B.ByteString) (Link stage (NameType Field))
171 | CMethod (Link stage B.ByteString) (Link stage (NameType Method))
172 | CIfaceMethod (Link stage B.ByteString) (Link stage (NameType Method))
173 | CString (Link stage B.ByteString)
178 | CNameType (Link stage B.ByteString) (Link stage B.ByteString)
179 | CUTF8 {getString :: B.ByteString}
180 | CUnicode {getString :: B.ByteString}
182 -- | Name of the CClass. Error on any other constant.
183 className :: Constant Direct -> B.ByteString
184 className (CClass s) = s
185 className x = error $ "Not a class: " ++ show x
187 instance Show (Constant Direct) where
188 show (CClass name) = "class " ++ toString name
189 show (CField cls nt) = "field " ++ toString cls ++ "." ++ show nt
190 show (CMethod cls nt) = "method " ++ toString cls ++ "." ++ show nt
191 show (CIfaceMethod cls nt) = "interface method " ++ toString cls ++ "." ++ show nt
192 show (CString s) = "String \"" ++ toString s ++ "\""
193 show (CInteger x) = show x
194 show (CFloat x) = show x
195 show (CLong x) = show x
196 show (CDouble x) = show x
197 show (CNameType name tp) = toString name ++ ": " ++ toString tp
198 show (CUTF8 s) = "UTF8 \"" ++ toString s ++ "\""
199 show (CUnicode s) = "Unicode \"" ++ toString s ++ "\""
202 type Pool stage = M.Map Word16 (Constant stage)
204 -- | Generic .class file format
205 data Class stage = Class {
206 magic :: Word32, -- ^ Magic value: 0xCAFEBABE
207 minorVersion :: Word16,
208 majorVersion :: Word16,
209 constsPoolSize :: Word16, -- ^ Number of items in constants pool
210 constsPool :: Pool stage, -- ^ Constants pool itself
211 accessFlags :: AccessFlags stage, -- ^ See @JVM.Types.AccessFlag@
212 thisClass :: Link stage B.ByteString, -- ^ Constants pool item index for this class
213 superClass :: Link stage B.ByteString, -- ^ --/-- for super class, zero for java.lang.Object
214 interfacesCount :: Word16, -- ^ Number of implemented interfaces
215 interfaces :: [Link stage B.ByteString], -- ^ Constants pool item indexes for implemented interfaces
216 classFieldsCount :: Word16, -- ^ Number of class fileds
217 classFields :: [Field stage], -- ^ Class fields
218 classMethodsCount :: Word16, -- ^ Number of class methods
219 classMethods :: [Method stage], -- ^ Class methods
220 classAttributesCount :: Word16, -- ^ Number of class attributes
221 classAttributes :: Attributes stage -- ^ Class attributes
224 deriving instance Eq (Class File)
225 deriving instance Eq (Class Direct)
226 deriving instance Show (Class File)
227 deriving instance Show (Class Direct)
229 deriving instance Eq (Constant File)
230 deriving instance Eq (Constant Direct)
231 deriving instance Show (Constant File)
233 -- | Default (empty) class file definition.
234 defaultClass :: (Default (AccessFlags stage), Default (Link stage B.ByteString), Default (Attributes stage))
236 defaultClass = Class {
247 classFieldsCount = 0,
249 classMethodsCount = 0,
251 classAttributesCount = 0,
252 classAttributes = def }
254 instance Binary (Class File) where
255 put (Class {..}) = do
260 forM_ (M.elems constsPool) put
267 forM_ classFields put
268 put classMethodsCount
269 forM_ classMethods put
270 put classAttributesCount
271 forM_ (attributesList classAttributes) put
278 pool <- replicateM (fromIntegral poolsize - 1) get
282 interfacesCount <- get
283 ifaces <- replicateM (fromIntegral interfacesCount) get
284 classFieldsCount <- get
285 classFields <- replicateM (fromIntegral classFieldsCount) get
286 classMethodsCount <- get
287 classMethods <- replicateM (fromIntegral classMethodsCount) get
289 as <- replicateM (fromIntegral $ asCount) get
290 let pool' = M.fromList $ zip [1..] pool
291 return $ Class magic minor major poolsize pool' af this super
292 interfacesCount ifaces classFieldsCount classFields
293 classMethodsCount classMethods asCount (AP as)
295 -- | Field signature format
305 | ObjectType String -- ^ L @{class name}@
306 | Array (Maybe Int) FieldType -- ^ @[{type}@
309 instance Show FieldType where
310 show SignedByte = "byte"
311 show CharByte = "char"
312 show DoubleType = "double"
313 show FloatType = "float"
315 show LongInt = "long"
316 show ShortInt = "short"
317 show BoolType = "bool"
318 show (ObjectType s) = "Object " ++ s
319 show (Array Nothing t) = show t ++ "[]"
320 show (Array (Just n) t) = show t ++ "[" ++ show n ++ "]"
322 -- | Class field signature
323 type FieldSignature = FieldType
325 -- | Try to read integer value from decimal representation
326 getInt :: Get (Maybe Int)
331 else return $ Just (read s)
333 getDigits :: Get [Char]
335 c <- lookAhead getChar8
343 putString :: String -> Put
344 putString str = forM_ str put
346 instance Binary FieldType where
347 put SignedByte = put 'B'
348 put CharByte = put 'C'
349 put DoubleType = put 'D'
350 put FloatType = put 'F'
351 put IntType = put 'I'
352 put LongInt = put 'J'
353 put ShortInt = put 'S'
354 put BoolType = put 'Z'
355 put (ObjectType name) = put 'L' >> putString name >> put ';'
356 put (Array Nothing sig) = put '[' >> put sig
357 put (Array (Just n) sig) = put '[' >> put (show n) >> put sig
362 'B' -> return SignedByte
363 'C' -> return CharByte
364 'D' -> return DoubleType
365 'F' -> return FloatType
366 'I' -> return IntType
367 'J' -> return LongInt
368 'S' -> return ShortInt
369 'Z' -> return BoolType
371 name <- getToSemicolon
372 return (ObjectType name)
376 return (Array mbSize sig)
377 _ -> fail $ "Unknown signature opening symbol: " ++ [b]
379 -- | Read string up to `;'
380 getToSemicolon :: Get String
386 next <- getToSemicolon
389 -- | Return value signature
390 data ReturnSignature =
395 instance Show ReturnSignature where
396 show (Returns t) = show t
397 show ReturnsVoid = "Void"
399 instance Binary ReturnSignature where
400 put (Returns sig) = put sig
401 put ReturnsVoid = put 'V'
404 x <- lookAhead getChar8
406 'V' -> skip 1 >> return ReturnsVoid
409 -- | Method argument signature
410 type ArgumentSignature = FieldType
412 -- | Class method argument signature
413 data MethodSignature =
414 MethodSignature [ArgumentSignature] ReturnSignature
417 instance Show MethodSignature where
418 show (MethodSignature args ret) = "(" ++ intercalate ", " (map show args) ++ ") returns " ++ show ret
420 instance Binary MethodSignature where
421 put (MethodSignature args ret) = do
430 fail "Cannot parse method signature: no starting `(' !"
434 fail "Internal error: method signature without `)' !?"
436 return (MethodSignature args ret)
438 -- | Read arguments signatures (up to `)')
439 getArgs :: Get [ArgumentSignature]
440 getArgs = whileJust getArg
442 getArg :: Get (Maybe ArgumentSignature)
444 x <- lookAhead getChar8
449 whileJust :: (Monad m) => m (Maybe a) -> m [a]
458 instance Binary (Constant File) where
459 put (CClass i) = putWord8 7 >> put i
460 put (CField i j) = putWord8 9 >> put i >> put j
461 put (CMethod i j) = putWord8 10 >> put i >> put j
462 put (CIfaceMethod i j) = putWord8 11 >> put i >> put j
463 put (CString i) = putWord8 8 >> put i
464 put (CInteger x) = putWord8 3 >> put x
465 put (CFloat x) = putWord8 4 >> putFloat32be x
466 put (CLong x) = putWord8 5 >> put x
467 put (CDouble x) = putWord8 6 >> putFloat64be x
468 put (CNameType i j) = putWord8 12 >> put i >> put j
471 put (fromIntegral (B.length bs) :: Word16)
473 put (CUnicode bs) = do
475 put (fromIntegral (B.length bs) :: Word16)
484 bs <- getLazyByteString (fromIntegral (l :: Word16))
488 bs <- getLazyByteString (fromIntegral (l :: Word16))
490 3 -> CInteger <$> get
491 4 -> CFloat <$> getFloat32be
493 6 -> CDouble <$> getFloat64be
496 9 -> CField <$> get <*> get
497 10 -> CMethod <$> get <*> get
498 11 -> CIfaceMethod <$> get <*> get
499 12 -> CNameType <$> get <*> get
500 _ -> fail $ "Unknown constants pool entry tag: " ++ show tag
502 -- | Class field format
503 data Field stage = Field {
504 fieldAccessFlags :: AccessFlags stage,
505 fieldName :: Link stage B.ByteString,
506 fieldSignature :: Link stage FieldSignature,
507 fieldAttributesCount :: Word16,
508 fieldAttributes :: Attributes stage }
510 deriving instance Eq (Field File)
511 deriving instance Eq (Field Direct)
512 deriving instance Show (Field File)
513 deriving instance Show (Field Direct)
515 lookupField :: B.ByteString -> Class Direct -> Maybe (Field Direct)
516 lookupField name cls = look (classFields cls)
520 | fieldName f == name = Just f
521 | otherwise = look fs
523 fieldNameType :: Field Direct -> NameType Field
524 fieldNameType f = NameType (fieldName f) (fieldSignature f)
526 instance Binary (Field File) where
527 put (Field {..}) = do
531 put fieldAttributesCount
532 forM_ (attributesList fieldAttributes) put
539 as <- replicateM (fromIntegral n) get
540 return $ Field af ni si n (AP as)
542 -- | Class method format
543 data Method stage = Method {
544 methodAccessFlags :: AccessFlags stage,
545 methodName :: Link stage B.ByteString,
546 methodSignature :: Link stage MethodSignature,
547 methodAttributesCount :: Word16,
548 methodAttributes :: Attributes stage }
550 deriving instance Eq (Method File)
551 deriving instance Eq (Method Direct)
552 deriving instance Show (Method File)
553 deriving instance Show (Method Direct)
555 methodNameType :: Method Direct -> NameType Method
556 methodNameType m = NameType (methodName m) (methodSignature m)
558 lookupMethod :: B.ByteString -> Class Direct -> Maybe (Method Direct)
559 lookupMethod name cls = look (classMethods cls)
563 | methodName f == name = Just f
564 | otherwise = look fs
566 instance Binary (Method File) where
567 put (Method {..}) = do
568 put methodAccessFlags
571 put methodAttributesCount
572 forM_ (attributesList methodAttributes) put
580 as <- replicateM (fromIntegral n) get
582 methodAccessFlags = af,
584 methodSignature = si,
585 methodAttributesCount = n,
586 methodAttributes = AP as }
588 -- | Any (class/ field/ method/ ...) attribute format.
589 -- Some formats specify special formats for @attributeValue@.
590 data Attribute = Attribute {
591 attributeName :: Word16,
592 attributeLength :: Word32,
593 attributeValue :: B.ByteString }
596 instance Binary Attribute where
597 put (Attribute {..}) = do
599 putWord32be attributeLength
600 putLazyByteString attributeValue
606 value <- getLazyByteString (fromIntegral len)
607 return $ Attribute name len value
609 class HasAttributes a where
610 attributes :: a stage -> Attributes stage
612 instance HasAttributes Class where
613 attributes = classAttributes
615 instance HasAttributes Field where
616 attributes = fieldAttributes
618 instance HasAttributes Method where
619 attributes = methodAttributes