1 {-# LANGUAGE RecordWildCards, BangPatterns, TypeFamilies, StandaloneDeriving, FlexibleContexts, FlexibleInstances, UndecidableInstances, TypeSynonymInstances #-}
2 -- | This module declares (low-level) data types for Java .class files
3 -- structures, and Binary instances to read/write them.
8 -- * Internal class file structures
12 FieldSignature, MethodSignature (..), ReturnSignature (..),
13 ArgumentSignature (..),
16 -- * Staged structures
18 Method (..), Field (..), Class (..),
20 AccessFlag (..), AccessFlags,
24 HasSignature (..), HasAttributes (..),
28 apsize, arsize, arlist
33 import Control.Applicative
35 import Data.Binary.IEEE754
36 import Data.Binary.Get
37 import Data.Binary.Put
41 import qualified Data.Set as S
42 import qualified Data.Map as M
43 import qualified Data.ByteString.Lazy as B
44 import Codec.Binary.UTF8.String hiding (encode, decode)
48 -- Java .class file uses constants pool, which stores almost all source-code-level
49 -- constants (strings, integer literals etc), and also all identifiers (class,
50 -- method, field names etc). All other structures contain indexes of constants in
51 -- the pool instead of constants theirself.
53 -- It's not convient to use that indexes programmatically. So, .class file is represented
54 -- at two stages: File and Direct. At File stage, all data structures contain only indexes,
55 -- not constants theirself. When we read a class from a file, we get structure at File stage.
56 -- We only can write File stage structure to file.
58 -- At Direct stage, structures conain constants, not indexes. Convertion functions (File <-> Direct)
59 -- are located in the JVM.Converter module.
62 -- | Read one-byte Char
66 return $ chr (fromIntegral x)
68 toString :: B.ByteString -> String
69 toString bstr = decodeString $ map (chr . fromIntegral) $ B.unpack bstr
74 -- | Direct representation stage
77 -- | Link to some object
78 type family Link stage a
80 -- | At File stage, Link contain index of object in the constants pool.
81 type instance Link File a = Word16
83 -- | At Direct stage, Link contain object itself.
84 type instance Link Direct a = a
86 -- | Object (class, method, field …) access flags
87 type family AccessFlags stage
89 -- | At File stage, access flags are represented as Word16
90 type instance AccessFlags File = Word16
92 -- | At Direct stage, access flags are represented as set of flags.
93 type instance AccessFlags Direct = S.Set AccessFlag
95 -- | Object (class, method, field) attributes
96 data family Attributes stage
98 -- | At File stage, attributes are represented as list of Attribute structures.
99 data instance Attributes File = AP {attributesList :: [Attribute]}
102 instance Default (Attributes File) where
105 -- | At Direct stage, attributes are represented as a Map.
106 data instance Attributes Direct = AR (M.Map B.ByteString B.ByteString)
109 instance Default (Attributes Direct) where
112 -- | Size of attributes set at Direct stage
113 arsize :: Attributes Direct -> Int
114 arsize (AR m) = M.size m
116 -- | Associative list of attributes at Direct stage
117 arlist :: Attributes Direct -> [(B.ByteString, B.ByteString)]
118 arlist (AR m) = M.assocs m
120 -- | Size of attributes set at File stage
121 apsize :: Attributes File -> Int
122 apsize (AP list) = length list
124 -- | Access flags. Used for classess, methods, variables.
126 ACC_PUBLIC -- ^ 0x0001 Visible for all
127 | ACC_PRIVATE -- ^ 0x0002 Visible only for defined class
128 | ACC_PROTECTED -- ^ 0x0004 Visible only for subclasses
129 | ACC_STATIC -- ^ 0x0008 Static method or variable
130 | ACC_FINAL -- ^ 0x0010 No further subclassing or assignments
131 | ACC_SYNCHRONIZED -- ^ 0x0020 Uses monitors
132 | ACC_VOLATILE -- ^ 0x0040 Could not be cached
133 | ACC_TRANSIENT -- ^ 0x0080
134 | ACC_NATIVE -- ^ 0x0100 Implemented in other language
135 | ACC_INTERFACE -- ^ 0x0200 Class is interface
136 | ACC_ABSTRACT -- ^ 0x0400
137 deriving (Eq, Show, Ord, Enum)
139 -- | Fields and methods have signatures.
140 class (Binary (Signature a), Show (Signature a), Eq (Signature a))
141 => HasSignature a where
144 instance HasSignature Field where
145 type Signature Field = FieldSignature
147 instance HasSignature Method where
148 type Signature Method = MethodSignature
150 -- | Name and signature pair. Used for methods and fields.
151 data NameType a = NameType {
152 ntName :: B.ByteString,
153 ntSignature :: Signature a }
155 instance (HasSignature a) => Show (NameType a) where
156 show (NameType n t) = toString n ++ ": " ++ show t
158 deriving instance HasSignature a => Eq (NameType a)
160 instance HasSignature a => Binary (NameType a) where
161 put (NameType n t) = putLazyByteString n >> put t
163 get = NameType <$> get <*> get
165 -- | Constant pool item
166 data Constant stage =
167 CClass (Link stage B.ByteString)
168 | CField {refClass :: Link stage B.ByteString, fieldNameType :: Link stage (NameType Field)}
169 | CMethod {refClass :: Link stage B.ByteString, nameType :: Link stage (NameType Method)}
170 | CIfaceMethod {refClass :: Link stage B.ByteString, nameType :: Link stage (NameType Method)}
171 | CString (Link stage B.ByteString)
176 | CNameType (Link stage B.ByteString) (Link stage B.ByteString)
177 | CUTF8 {getString :: B.ByteString}
178 | CUnicode {getString :: B.ByteString}
180 -- | Name of the CClass. Error on any other constant.
181 className :: Constant Direct -> B.ByteString
182 className (CClass s) = s
183 className x = error $ "Not a class: " ++ show x
185 instance Show (Constant Direct) where
186 show (CClass name) = "class " ++ toString name
187 show (CField cls nt) = "field " ++ toString cls ++ "." ++ show nt
188 show (CMethod cls nt) = "method " ++ toString cls ++ "." ++ show nt
189 show (CIfaceMethod cls nt) = "interface method " ++ toString cls ++ "." ++ show nt
190 show (CString s) = "String \"" ++ toString s ++ "\""
191 show (CInteger x) = show x
192 show (CFloat x) = show x
193 show (CLong x) = show x
194 show (CDouble x) = show x
195 show (CNameType name tp) = toString name ++ ": " ++ toString tp
196 show (CUTF8 s) = "UTF8 \"" ++ toString s ++ "\""
197 show (CUnicode s) = "Unicode \"" ++ toString s ++ "\""
200 type Pool stage = M.Map Word16 (Constant stage)
202 -- | Generic .class file format
203 data Class stage = Class {
204 magic :: Word32, -- ^ Magic value: 0xCAFEBABE
205 minorVersion :: Word16,
206 majorVersion :: Word16,
207 constsPoolSize :: Word16, -- ^ Number of items in constants pool
208 constsPool :: Pool stage, -- ^ Constants pool itself
209 accessFlags :: AccessFlags stage, -- ^ See @JVM.Types.AccessFlag@
210 thisClass :: Link stage B.ByteString, -- ^ Constants pool item index for this class
211 superClass :: Link stage B.ByteString, -- ^ --/-- for super class, zero for java.lang.Object
212 interfacesCount :: Word16, -- ^ Number of implemented interfaces
213 interfaces :: [Link stage B.ByteString], -- ^ Constants pool item indexes for implemented interfaces
214 classFieldsCount :: Word16, -- ^ Number of class fileds
215 classFields :: [Field stage], -- ^ Class fields
216 classMethodsCount :: Word16, -- ^ Number of class methods
217 classMethods :: [Method stage], -- ^ Class methods
218 classAttributesCount :: Word16, -- ^ Number of class attributes
219 classAttributes :: Attributes stage -- ^ Class attributes
222 deriving instance Eq (Class File)
223 deriving instance Eq (Class Direct)
224 deriving instance Show (Class File)
225 deriving instance Show (Class Direct)
227 deriving instance Eq (Constant File)
228 deriving instance Eq (Constant Direct)
229 deriving instance Show (Constant File)
231 -- | Default (empty) class file definition.
232 defaultClass :: (Default (AccessFlags stage), Default (Link stage B.ByteString), Default (Attributes stage))
234 defaultClass = Class {
245 classFieldsCount = 0,
247 classMethodsCount = 0,
249 classAttributesCount = 0,
250 classAttributes = def }
252 instance Binary (Class File) where
253 put (Class {..}) = do
258 forM_ (M.elems constsPool) put
265 forM_ classFields put
266 put classMethodsCount
267 forM_ classMethods put
268 put classAttributesCount
269 forM_ (attributesList classAttributes) put
276 pool <- replicateM (fromIntegral poolsize - 1) get
280 interfacesCount <- get
281 ifaces <- replicateM (fromIntegral interfacesCount) get
282 classFieldsCount <- get
283 classFields <- replicateM (fromIntegral classFieldsCount) get
284 classMethodsCount <- get
285 classMethods <- replicateM (fromIntegral classMethodsCount) get
287 as <- replicateM (fromIntegral $ asCount) get
288 let pool' = M.fromList $ zip [1..] pool
289 return $ Class magic minor major poolsize pool' af this super
290 interfacesCount ifaces classFieldsCount classFields
291 classMethodsCount classMethods asCount (AP as)
293 -- | Field signature format
303 | ObjectType String -- ^ L @{class name}@
304 | Array (Maybe Int) FieldType -- ^ @[{type}@
307 instance Show FieldType where
308 show SignedByte = "byte"
309 show CharByte = "char"
310 show DoubleType = "double"
311 show FloatType = "float"
313 show LongInt = "long"
314 show ShortInt = "short"
315 show BoolType = "bool"
316 show (ObjectType s) = "Object " ++ s
317 show (Array Nothing t) = show t ++ "[]"
318 show (Array (Just n) t) = show t ++ "[" ++ show n ++ "]"
320 -- | Class field signature
321 type FieldSignature = FieldType
323 -- | Try to read integer value from decimal representation
324 getInt :: Get (Maybe Int)
329 else return $ Just (read s)
331 getDigits :: Get [Char]
333 c <- lookAhead getChar8
341 putString :: String -> Put
342 putString str = forM_ str put
344 instance Binary FieldType where
345 put SignedByte = put 'B'
346 put CharByte = put 'C'
347 put DoubleType = put 'D'
348 put FloatType = put 'F'
349 put IntType = put 'I'
350 put LongInt = put 'J'
351 put ShortInt = put 'S'
352 put BoolType = put 'Z'
353 put (ObjectType name) = put 'L' >> putString name >> put ';'
354 put (Array Nothing sig) = put '[' >> put sig
355 put (Array (Just n) sig) = put '[' >> put (show n) >> put sig
360 'B' -> return SignedByte
361 'C' -> return CharByte
362 'D' -> return DoubleType
363 'F' -> return FloatType
364 'I' -> return IntType
365 'J' -> return LongInt
366 'S' -> return ShortInt
367 'Z' -> return BoolType
369 name <- getToSemicolon
370 return (ObjectType name)
374 return (Array mbSize sig)
375 _ -> fail $ "Unknown signature opening symbol: " ++ [b]
377 -- | Read string up to `;'
378 getToSemicolon :: Get String
384 next <- getToSemicolon
387 -- | Return value signature
388 data ReturnSignature =
393 instance Show ReturnSignature where
394 show (Returns t) = show t
395 show ReturnsVoid = "Void"
397 instance Binary ReturnSignature where
398 put (Returns sig) = put sig
399 put ReturnsVoid = put 'V'
402 x <- lookAhead getChar8
404 'V' -> skip 1 >> return ReturnsVoid
407 -- | Method argument signature
408 type ArgumentSignature = FieldType
410 -- | Class method argument signature
411 data MethodSignature =
412 MethodSignature [ArgumentSignature] ReturnSignature
415 instance Show MethodSignature where
416 show (MethodSignature args ret) = "(" ++ intercalate ", " (map show args) ++ ") returns " ++ show ret
418 instance Binary MethodSignature where
419 put (MethodSignature args ret) = do
428 fail "Cannot parse method signature: no starting `(' !"
432 fail "Internal error: method signature without `)' !?"
434 return (MethodSignature args ret)
436 -- | Read arguments signatures (up to `)')
437 getArgs :: Get [ArgumentSignature]
438 getArgs = whileJust getArg
440 getArg :: Get (Maybe ArgumentSignature)
442 x <- lookAhead getChar8
447 whileJust :: (Monad m) => m (Maybe a) -> m [a]
456 instance Binary (Constant File) where
457 put (CClass i) = putWord8 7 >> put i
458 put (CField i j) = putWord8 9 >> put i >> put j
459 put (CMethod i j) = putWord8 10 >> put i >> put j
460 put (CIfaceMethod i j) = putWord8 11 >> put i >> put j
461 put (CString i) = putWord8 8 >> put i
462 put (CInteger x) = putWord8 3 >> put x
463 put (CFloat x) = putWord8 4 >> putFloat32be x
464 put (CLong x) = putWord8 5 >> put x
465 put (CDouble x) = putWord8 6 >> putFloat64be x
466 put (CNameType i j) = putWord8 12 >> put i >> put j
469 put (fromIntegral (B.length bs) :: Word16)
471 put (CUnicode bs) = do
473 put (fromIntegral (B.length bs) :: Word16)
482 bs <- getLazyByteString (fromIntegral (l :: Word16))
486 bs <- getLazyByteString (fromIntegral (l :: Word16))
488 3 -> CInteger <$> get
489 4 -> CFloat <$> getFloat32be
491 6 -> CDouble <$> getFloat64be
494 9 -> CField <$> get <*> get
495 10 -> CMethod <$> get <*> get
496 11 -> CIfaceMethod <$> get <*> get
497 12 -> CNameType <$> get <*> get
498 _ -> fail $ "Unknown constants pool entry tag: " ++ show tag
500 -- | Class field format
501 data Field stage = Field {
502 fieldAccessFlags :: AccessFlags stage,
503 fieldName :: Link stage B.ByteString,
504 fieldSignature :: Link stage FieldSignature,
505 fieldAttributesCount :: Word16,
506 fieldAttributes :: Attributes stage }
508 deriving instance Eq (Field File)
509 deriving instance Eq (Field Direct)
510 deriving instance Show (Field File)
511 deriving instance Show (Field Direct)
513 instance Binary (Field File) where
514 put (Field {..}) = do
518 put fieldAttributesCount
519 forM_ (attributesList fieldAttributes) put
526 as <- replicateM (fromIntegral n) get
527 return $ Field af ni si n (AP as)
529 -- | Class method format
530 data Method stage = Method {
531 methodAccessFlags :: AccessFlags stage,
532 methodName :: Link stage B.ByteString,
533 methodSignature :: Link stage MethodSignature,
534 methodAttributesCount :: Word16,
535 methodAttributes :: Attributes stage }
537 deriving instance Eq (Method File)
538 deriving instance Eq (Method Direct)
539 deriving instance Show (Method File)
540 deriving instance Show (Method Direct)
542 instance Binary (Method File) where
543 put (Method {..}) = do
544 put methodAccessFlags
547 put methodAttributesCount
548 forM_ (attributesList methodAttributes) put
556 as <- replicateM (fromIntegral n) get
558 methodAccessFlags = af,
560 methodSignature = si,
561 methodAttributesCount = n,
562 methodAttributes = AP as }
564 -- | Any (class/ field/ method/ ...) attribute format.
565 -- Some formats specify special formats for @attributeValue@.
566 data Attribute = Attribute {
567 attributeName :: Word16,
568 attributeLength :: Word32,
569 attributeValue :: B.ByteString }
572 instance Binary Attribute where
573 put (Attribute {..}) = do
575 putWord32be attributeLength
576 putLazyByteString attributeValue
582 value <- getLazyByteString (fromIntegral len)
583 return $ Attribute name len value
585 class HasAttributes a where
586 attributes :: a stage -> Attributes stage
588 instance HasAttributes Class where
589 attributes = classAttributes
591 instance HasAttributes Field where
592 attributes = fieldAttributes
594 instance HasAttributes Method where
595 attributes = methodAttributes