1 {-# LANGUAGE RecordWildCards, BangPatterns, TypeFamilies, StandaloneDeriving, FlexibleContexts, FlexibleInstances, UndecidableInstances, TypeSynonymInstances #-}
2 -- | This module declares (low-level) data types for Java .class files
3 -- structures, and Binary instances to read/write them.
8 -- * Internal class file structures
12 FieldSignature, MethodSignature (..), ReturnSignature (..),
13 ArgumentSignature (..),
16 -- * Staged structures
18 Method (..), Field (..), Class (..),
20 AccessFlag (..), AccessFlags,
24 HasSignature (..), HasAttributes (..),
27 apsize, arsize, arlist
32 import Control.Applicative
34 import Data.Binary.IEEE754
35 import Data.Binary.Get
36 import Data.Binary.Put
40 import qualified Data.Set as S
41 import qualified Data.Map as M
42 import qualified Data.ByteString.Lazy as B
43 import Codec.Binary.UTF8.String hiding (encode, decode)
47 -- Java .class file uses constants pool, which stores almost all source-code-level
48 -- constants (strings, integer literals etc), and also all identifiers (class,
49 -- method, field names etc). All other structures contain indexes of constants in
50 -- the pool instead of constants theirself.
52 -- It's not convient to use that indexes programmatically. So, .class file is represented
53 -- at two stages: File and Direct. At File stage, all data structures contain only indexes,
54 -- not constants theirself. When we read a class from a file, we get structure at File stage.
55 -- We only can write File stage structure to file.
57 -- At Direct stage, structures conain constants, not indexes. Convertion functions (File <-> Direct)
58 -- are located in the JVM.Converter module.
61 -- | Read one-byte Char
65 return $ chr (fromIntegral x)
67 toString :: B.ByteString -> String
68 toString bstr = decodeString $ map (chr . fromIntegral) $ B.unpack bstr
73 -- | Direct representation stage
76 -- | Link to some object
77 type family Link stage a
79 -- | At File stage, Link contain index of object in the constants pool.
80 type instance Link File a = Word16
82 -- | At Direct stage, Link contain object itself.
83 type instance Link Direct a = a
85 -- | Object (class, method, field …) access flags
86 type family AccessFlags stage
88 -- | At File stage, access flags are represented as Word16
89 type instance AccessFlags File = Word16
91 -- | At Direct stage, access flags are represented as set of flags.
92 type instance AccessFlags Direct = S.Set AccessFlag
94 -- | Object (class, method, field) attributes
95 data family Attributes stage
97 -- | At File stage, attributes are represented as list of Attribute structures.
98 data instance Attributes File = AP {attributesList :: [Attribute]}
101 instance Default (Attributes File) where
104 -- | At Direct stage, attributes are represented as a Map.
105 data instance Attributes Direct = AR (M.Map B.ByteString B.ByteString)
108 instance Default (Attributes Direct) where
111 -- | Size of attributes set at Direct stage
112 arsize :: Attributes Direct -> Int
113 arsize (AR m) = M.size m
115 -- | Associative list of attributes at Direct stage
116 arlist :: Attributes Direct -> [(B.ByteString, B.ByteString)]
117 arlist (AR m) = M.assocs m
119 -- | Size of attributes set at File stage
120 apsize :: Attributes File -> Int
121 apsize (AP list) = length list
123 -- | Access flags. Used for classess, methods, variables.
125 ACC_PUBLIC -- ^ 0x0001 Visible for all
126 | ACC_PRIVATE -- ^ 0x0002 Visible only for defined class
127 | ACC_PROTECTED -- ^ 0x0004 Visible only for subclasses
128 | ACC_STATIC -- ^ 0x0008 Static method or variable
129 | ACC_FINAL -- ^ 0x0010 No further subclassing or assignments
130 | ACC_SYNCHRONIZED -- ^ 0x0020 Uses monitors
131 | ACC_VOLATILE -- ^ 0x0040 Could not be cached
132 | ACC_TRANSIENT -- ^ 0x0080
133 | ACC_NATIVE -- ^ 0x0100 Implemented in other language
134 | ACC_INTERFACE -- ^ 0x0200 Class is interface
135 | ACC_ABSTRACT -- ^ 0x0400
136 deriving (Eq, Show, Ord, Enum)
138 -- | Fields and methods have signatures.
139 class (Binary (Signature a), Show (Signature a), Eq (Signature a))
140 => HasSignature a where
143 instance HasSignature Field where
144 type Signature Field = FieldSignature
146 instance HasSignature Method where
147 type Signature Method = MethodSignature
149 -- | Name and signature pair. Used for methods and fields.
150 data NameType a = NameType {
151 ntName :: B.ByteString,
152 ntSignature :: Signature a }
154 instance (HasSignature a) => Show (NameType a) where
155 show (NameType n t) = toString n ++ ": " ++ show t
157 deriving instance HasSignature a => Eq (NameType a)
159 instance HasSignature a => Binary (NameType a) where
160 put (NameType n t) = putLazyByteString n >> put t
162 get = NameType <$> get <*> get
164 -- | Constant pool item
165 data Constant stage =
166 CClass (Link stage B.ByteString)
167 | CField {refClass :: Link stage B.ByteString, fieldNameType :: Link stage (NameType Field)}
168 | CMethod {refClass :: Link stage B.ByteString, nameType :: Link stage (NameType Method)}
169 | CIfaceMethod {refClass :: Link stage B.ByteString, nameType :: Link stage (NameType Method)}
170 | CString (Link stage B.ByteString)
175 | CNameType (Link stage B.ByteString) (Link stage B.ByteString)
176 | CUTF8 {getString :: B.ByteString}
177 | CUnicode {getString :: B.ByteString}
179 -- | Name of the CClass. Error on any other constant.
180 className :: Constant Direct -> B.ByteString
181 className (CClass s) = s
182 className x = error $ "Not a class: " ++ show x
184 instance Show (Constant Direct) where
185 show (CClass name) = "class " ++ toString name
186 show (CField cls nt) = "field " ++ toString cls ++ "." ++ show nt
187 show (CMethod cls nt) = "method " ++ toString cls ++ "." ++ show nt
188 show (CIfaceMethod cls nt) = "interface method " ++ toString cls ++ "." ++ show nt
189 show (CString s) = "String \"" ++ toString s ++ "\""
190 show (CInteger x) = show x
191 show (CFloat x) = show x
192 show (CLong x) = show x
193 show (CDouble x) = show x
194 show (CNameType name tp) = toString name ++ ": " ++ toString tp
195 show (CUTF8 s) = "UTF8 \"" ++ toString s ++ "\""
196 show (CUnicode s) = "Unicode \"" ++ toString s ++ "\""
199 type Pool stage = M.Map Word16 (Constant stage)
201 -- | Generic .class file format
202 data Class stage = Class {
203 magic :: Word32, -- ^ Magic value: 0xCAFEBABE
204 minorVersion :: Word16,
205 majorVersion :: Word16,
206 constsPoolSize :: Word16, -- ^ Number of items in constants pool
207 constsPool :: Pool stage, -- ^ Constants pool itself
208 accessFlags :: AccessFlags stage, -- ^ See @JVM.Types.AccessFlag@
209 thisClass :: Link stage B.ByteString, -- ^ Constants pool item index for this class
210 superClass :: Link stage B.ByteString, -- ^ --/-- for super class, zero for java.lang.Object
211 interfacesCount :: Word16, -- ^ Number of implemented interfaces
212 interfaces :: [Link stage B.ByteString], -- ^ Constants pool item indexes for implemented interfaces
213 classFieldsCount :: Word16, -- ^ Number of class fileds
214 classFields :: [Field stage], -- ^ Class fields
215 classMethodsCount :: Word16, -- ^ Number of class methods
216 classMethods :: [Method stage], -- ^ Class methods
217 classAttributesCount :: Word16, -- ^ Number of class attributes
218 classAttributes :: Attributes stage -- ^ Class attributes
221 deriving instance Eq (Constant File)
222 deriving instance Eq (Constant Direct)
223 deriving instance Show (Constant File)
225 -- | Default (empty) class file definition.
226 defaultClass :: (Default (AccessFlags stage), Default (Link stage B.ByteString), Default (Attributes stage))
228 defaultClass = Class {
239 classFieldsCount = 0,
241 classMethodsCount = 0,
243 classAttributesCount = 0,
244 classAttributes = def }
246 instance Binary (Class File) where
247 put (Class {..}) = do
252 forM_ (M.elems constsPool) put
259 forM_ classFields put
260 put classMethodsCount
261 forM_ classMethods put
262 put classAttributesCount
263 forM_ (attributesList classAttributes) put
270 pool <- replicateM (fromIntegral poolsize - 1) get
274 interfacesCount <- get
275 ifaces <- replicateM (fromIntegral interfacesCount) get
276 classFieldsCount <- get
277 classFields <- replicateM (fromIntegral classFieldsCount) get
278 classMethodsCount <- get
279 classMethods <- replicateM (fromIntegral classMethodsCount) get
281 as <- replicateM (fromIntegral $ asCount) get
282 let pool' = M.fromList $ zip [1..] pool
283 return $ Class magic minor major poolsize pool' af this super
284 interfacesCount ifaces classFieldsCount classFields
285 classMethodsCount classMethods asCount (AP as)
287 -- | Field signature format
297 | ObjectType String -- ^ L @{class name}@
298 | Array (Maybe Int) FieldType -- ^ @[{type}@
301 instance Show FieldType where
302 show SignedByte = "byte"
303 show CharByte = "char"
304 show DoubleType = "double"
305 show FloatType = "float"
307 show LongInt = "long"
308 show ShortInt = "short"
309 show BoolType = "bool"
310 show (ObjectType s) = "Object " ++ s
311 show (Array Nothing t) = show t ++ "[]"
312 show (Array (Just n) t) = show t ++ "[" ++ show n ++ "]"
314 -- | Class field signature
315 type FieldSignature = FieldType
317 -- | Try to read integer value from decimal representation
318 getInt :: Get (Maybe Int)
323 else return $ Just (read s)
325 getDigits :: Get [Char]
327 c <- lookAhead getChar8
335 putString :: String -> Put
336 putString str = forM_ str put
338 instance Binary FieldType where
339 put SignedByte = put 'B'
340 put CharByte = put 'C'
341 put DoubleType = put 'D'
342 put FloatType = put 'F'
343 put IntType = put 'I'
344 put LongInt = put 'J'
345 put ShortInt = put 'S'
346 put BoolType = put 'Z'
347 put (ObjectType name) = put 'L' >> putString name >> put ';'
348 put (Array Nothing sig) = put '[' >> put sig
349 put (Array (Just n) sig) = put '[' >> put (show n) >> put sig
354 'B' -> return SignedByte
355 'C' -> return CharByte
356 'D' -> return DoubleType
357 'F' -> return FloatType
358 'I' -> return IntType
359 'J' -> return LongInt
360 'S' -> return ShortInt
361 'Z' -> return BoolType
363 name <- getToSemicolon
364 return (ObjectType name)
368 return (Array mbSize sig)
369 _ -> fail $ "Unknown signature opening symbol: " ++ [b]
371 -- | Read string up to `;'
372 getToSemicolon :: Get String
378 next <- getToSemicolon
381 -- | Return value signature
382 data ReturnSignature =
387 instance Show ReturnSignature where
388 show (Returns t) = show t
389 show ReturnsVoid = "Void"
391 instance Binary ReturnSignature where
392 put (Returns sig) = put sig
393 put ReturnsVoid = put 'V'
396 x <- lookAhead getChar8
398 'V' -> skip 1 >> return ReturnsVoid
401 -- | Method argument signature
402 type ArgumentSignature = FieldType
404 -- | Class method argument signature
405 data MethodSignature =
406 MethodSignature [ArgumentSignature] ReturnSignature
409 instance Show MethodSignature where
410 show (MethodSignature args ret) = "(" ++ intercalate ", " (map show args) ++ ") returns " ++ show ret
412 instance Binary MethodSignature where
413 put (MethodSignature args ret) = do
422 fail "Cannot parse method signature: no starting `(' !"
426 fail "Internal error: method signature without `)' !?"
428 return (MethodSignature args ret)
430 -- | Read arguments signatures (up to `)')
431 getArgs :: Get [ArgumentSignature]
432 getArgs = whileJust getArg
434 getArg :: Get (Maybe ArgumentSignature)
436 x <- lookAhead getChar8
441 whileJust :: (Monad m) => m (Maybe a) -> m [a]
450 instance Binary (Constant File) where
451 put (CClass i) = putWord8 7 >> put i
452 put (CField i j) = putWord8 9 >> put i >> put j
453 put (CMethod i j) = putWord8 10 >> put i >> put j
454 put (CIfaceMethod i j) = putWord8 11 >> put i >> put j
455 put (CString i) = putWord8 8 >> put i
456 put (CInteger x) = putWord8 3 >> put x
457 put (CFloat x) = putWord8 4 >> putFloat32be x
458 put (CLong x) = putWord8 5 >> put x
459 put (CDouble x) = putWord8 6 >> putFloat64be x
460 put (CNameType i j) = putWord8 12 >> put i >> put j
463 put (fromIntegral (B.length bs) :: Word16)
465 put (CUnicode bs) = do
467 put (fromIntegral (B.length bs) :: Word16)
476 bs <- getLazyByteString (fromIntegral (l :: Word16))
480 bs <- getLazyByteString (fromIntegral (l :: Word16))
482 3 -> CInteger <$> get
483 4 -> CFloat <$> getFloat32be
485 6 -> CDouble <$> getFloat64be
488 9 -> CField <$> get <*> get
489 10 -> CMethod <$> get <*> get
490 11 -> CIfaceMethod <$> get <*> get
491 12 -> CNameType <$> get <*> get
492 _ -> fail $ "Unknown constants pool entry tag: " ++ show tag
494 -- | Class field format
495 data Field stage = Field {
496 fieldAccessFlags :: AccessFlags stage,
497 fieldName :: Link stage B.ByteString,
498 fieldSignature :: Link stage FieldSignature,
499 fieldAttributesCount :: Word16,
500 fieldAttributes :: Attributes stage }
502 deriving instance Eq (Field File)
503 deriving instance Eq (Field Direct)
504 deriving instance Show (Field File)
505 deriving instance Show (Field Direct)
507 instance Binary (Field File) where
508 put (Field {..}) = do
512 put fieldAttributesCount
513 forM_ (attributesList fieldAttributes) put
520 as <- replicateM (fromIntegral n) get
521 return $ Field af ni si n (AP as)
523 -- | Class method format
524 data Method stage = Method {
525 methodAccessFlags :: AccessFlags stage,
526 methodName :: Link stage B.ByteString,
527 methodSignature :: Link stage MethodSignature,
528 methodAttributesCount :: Word16,
529 methodAttributes :: Attributes stage }
531 deriving instance Eq (Method File)
532 deriving instance Eq (Method Direct)
533 deriving instance Show (Method File)
534 deriving instance Show (Method Direct)
536 instance Binary (Method File) where
537 put (Method {..}) = do
538 put methodAccessFlags
541 put methodAttributesCount
542 forM_ (attributesList methodAttributes) put
550 as <- replicateM (fromIntegral n) get
552 methodAccessFlags = af,
554 methodSignature = si,
555 methodAttributesCount = n,
556 methodAttributes = AP as }
558 -- | Any (class/ field/ method/ ...) attribute format.
559 -- Some formats specify special formats for @attributeValue@.
560 data Attribute = Attribute {
561 attributeName :: Word16,
562 attributeLength :: Word32,
563 attributeValue :: B.ByteString }
566 instance Binary Attribute where
567 put (Attribute {..}) = do
569 putWord32be attributeLength
570 putLazyByteString attributeValue
576 value <- getLazyByteString (fromIntegral len)
577 return $ Attribute name len value
579 class HasAttributes a where
580 attributes :: a stage -> Attributes stage
582 instance HasAttributes Class where
583 attributes = classAttributes
585 instance HasAttributes Field where
586 attributes = fieldAttributes
588 instance HasAttributes Method where
589 attributes = methodAttributes