1 {-# LANGUAGE RecordWildCards, BangPatterns, TypeFamilies, StandaloneDeriving, FlexibleContexts, FlexibleInstances, UndecidableInstances, TypeSynonymInstances #-}
2 -- | This module declares (low-level) data types for Java .class files
3 -- structures, and Binary instances to read/write them.
7 FieldSignature, MethodSignature (..), ReturnSignature (..),
8 ArgumentSignature (..),
10 Method (..), Field (..), Class (..),
14 HasSignature (..), HasAttributes (..),
15 AccessFlag (..), AccessFlags,
18 apsize, arsize, arlist
23 import Control.Applicative
25 import Data.Binary.IEEE754
26 import Data.Binary.Get
27 import Data.Binary.Put
30 import qualified Data.Set as S
31 import qualified Data.Map as M
32 import qualified Data.ByteString.Lazy as B
33 import Codec.Binary.UTF8.String hiding (encode, decode)
35 -- | Read one-byte Char
39 return $ chr (fromIntegral x)
41 toString :: B.ByteString -> String
42 toString bstr = decodeString $ map (chr . fromIntegral) $ B.unpack bstr
46 data Pointers = Pointers
48 data Resolved = Resolved
50 type instance Link Pointers a = Word16
52 type instance Link Resolved a = a
54 type family AccessFlags stage
56 type instance AccessFlags Pointers = Word16
58 type instance AccessFlags Resolved = S.Set AccessFlag
60 data family Attributes stage
62 data instance Attributes Pointers = AP {attributesList :: [Attribute]}
64 data instance Attributes Resolved = AR (M.Map B.ByteString B.ByteString)
67 arsize :: Attributes Resolved -> Int
68 arsize (AR m) = M.size m
70 arlist :: Attributes Resolved -> [(B.ByteString, B.ByteString)]
71 arlist (AR m) = M.assocs m
73 apsize :: Attributes Pointers -> Int
74 apsize (AP list) = length list
76 -- | Access flags. Used for classess, methods, variables.
78 ACC_PUBLIC -- ^ 0x0001 Visible for all
79 | ACC_PRIVATE -- ^ 0x0002 Visible only for defined class
80 | ACC_PROTECTED -- ^ 0x0004 Visible only for subclasses
81 | ACC_STATIC -- ^ 0x0008 Static method or variable
82 | ACC_FINAL -- ^ 0x0010 No further subclassing or assignments
83 | ACC_SYNCHRONIZED -- ^ 0x0020 Uses monitors
84 | ACC_VOLATILE -- ^ 0x0040 Could not be cached
85 | ACC_TRANSIENT -- ^ 0x0080
86 | ACC_NATIVE -- ^ 0x0100 Implemented in other language
87 | ACC_INTERFACE -- ^ 0x0200 Class is interface
88 | ACC_ABSTRACT -- ^ 0x0400
89 deriving (Eq, Show, Ord, Enum)
91 class HasSignature a where
94 instance HasSignature Field where
95 type Signature Field = FieldSignature
97 instance HasSignature Method where
98 type Signature Method = MethodSignature
100 -- | Name and signature pair. Used for methods and fields.
101 data NameType a = NameType {
102 ntName :: B.ByteString,
103 ntSignature :: Signature a }
105 instance Show (Signature a) => Show (NameType a) where
106 show (NameType n t) = toString n ++ ": " ++ show t
108 deriving instance Eq (Signature a) => Eq (NameType a)
110 instance (Binary (Signature a)) => Binary (NameType a) where
111 put (NameType n t) = putLazyByteString n >> put t
113 get = NameType <$> get <*> get
115 -- | Constant pool item
116 data Constant stage =
117 CClass (Link stage B.ByteString)
118 | CField {refClass :: Link stage B.ByteString, fieldNameType :: Link stage (NameType Field)}
119 | CMethod {refClass :: Link stage B.ByteString, nameType :: Link stage (NameType Method)}
120 | CIfaceMethod {refClass :: Link stage B.ByteString, nameType :: Link stage (NameType Method)}
121 | CString (Link stage B.ByteString)
126 | CNameType (Link stage B.ByteString) (Link stage B.ByteString)
127 | CUTF8 {getString :: B.ByteString}
128 | CUnicode {getString :: B.ByteString}
130 className :: Constant Resolved -> B.ByteString
131 className (CClass s) = s
132 className x = error $ "Not a class: " ++ show x
134 instance Show (Constant Resolved) where
135 show (CClass name) = "class " ++ toString name
136 show (CField cls nt) = "field " ++ toString cls ++ "." ++ show nt
137 show (CMethod cls nt) = "method " ++ toString cls ++ "." ++ show nt
138 show (CIfaceMethod cls nt) = "interface method " ++ toString cls ++ "." ++ show nt
139 show (CString s) = "String \"" ++ toString s ++ "\""
140 show (CInteger x) = show x
141 show (CFloat x) = show x
142 show (CLong x) = show x
143 show (CDouble x) = show x
144 show (CNameType name tp) = toString name ++ ": " ++ toString tp
145 show (CUTF8 s) = "UTF8 \"" ++ toString s ++ "\""
146 show (CUnicode s) = "Unicode \"" ++ toString s ++ "\""
149 type Pool stage = M.Map Word16 (Constant stage)
151 -- | Generic .class file format
152 data Class stage = Class {
153 magic :: Word32, -- ^ Magic value: 0xCAFEBABE
154 minorVersion :: Word16,
155 majorVersion :: Word16,
156 constsPoolSize :: Word16, -- ^ Number of items in constants pool
157 constsPool :: Pool stage, -- ^ Constants pool itself
158 accessFlags :: AccessFlags stage, -- ^ See @JVM.Types.AccessFlag@
159 thisClass :: Link stage B.ByteString, -- ^ Constants pool item index for this class
160 superClass :: Link stage B.ByteString, -- ^ --/-- for super class, zero for java.lang.Object
161 interfacesCount :: Word16, -- ^ Number of implemented interfaces
162 interfaces :: [Link stage B.ByteString], -- ^ Constants pool item indexes for implemented interfaces
163 classFieldsCount :: Word16, -- ^ Number of class fileds
164 classFields :: [Field stage], -- ^ Class fields
165 classMethodsCount :: Word16, -- ^ Number of class methods
166 classMethods :: [Method stage], -- ^ Class methods
167 classAttributesCount :: Word16, -- ^ Number of class attributes
168 classAttributes :: Attributes stage -- ^ Class attributes
171 deriving instance Eq (Constant Pointers)
172 deriving instance Eq (Constant Resolved)
173 deriving instance Show (Constant Pointers)
175 instance Binary (Class Pointers) where
176 put (Class {..}) = do
181 forM_ (M.elems constsPool) put
188 forM_ classFields put
189 put classMethodsCount
190 forM_ classMethods put
191 put classAttributesCount
192 forM_ (attributesList classAttributes) put
199 pool <- replicateM (fromIntegral poolsize - 1) get
203 interfacesCount <- get
204 ifaces <- replicateM (fromIntegral interfacesCount) get
205 classFieldsCount <- get
206 classFields <- replicateM (fromIntegral classFieldsCount) get
207 classMethodsCount <- get
208 classMethods <- replicateM (fromIntegral classMethodsCount) get
210 as <- replicateM (fromIntegral $ asCount) get
211 let pool' = M.fromList $ zip [1..] pool
212 return $ Class magic minor major poolsize pool' af this super
213 interfacesCount ifaces classFieldsCount classFields
214 classMethodsCount classMethods asCount (AP as)
216 -- | Field signature format
226 | ObjectType String -- ^ L @{class name}@
227 | Array (Maybe Int) FieldType -- ^ @[{type}@
230 instance Show FieldType where
231 show SignedByte = "byte"
232 show CharByte = "char"
233 show DoubleType = "double"
234 show FloatType = "float"
236 show LongInt = "long"
237 show ShortInt = "short"
238 show BoolType = "bool"
239 show (ObjectType s) = "Object " ++ s
240 show (Array Nothing t) = show t ++ "[]"
241 show (Array (Just n) t) = show t ++ "[" ++ show n ++ "]"
243 -- | Class field signature
244 type FieldSignature = FieldType
246 -- | Try to read integer value from decimal representation
247 getInt :: Get (Maybe Int)
252 else return $ Just (read s)
254 getDigits :: Get [Char]
256 c <- lookAhead getChar8
264 putString :: String -> Put
265 putString str = forM_ str put
267 instance Binary FieldType where
268 put SignedByte = put 'B'
269 put CharByte = put 'C'
270 put DoubleType = put 'D'
271 put FloatType = put 'F'
272 put IntType = put 'I'
273 put LongInt = put 'J'
274 put ShortInt = put 'S'
275 put BoolType = put 'Z'
276 put (ObjectType name) = put 'L' >> putString name >> put ';'
277 put (Array Nothing sig) = put '[' >> put sig
278 put (Array (Just n) sig) = put '[' >> put (show n) >> put sig
283 'B' -> return SignedByte
284 'C' -> return CharByte
285 'D' -> return DoubleType
286 'F' -> return FloatType
287 'I' -> return IntType
288 'J' -> return LongInt
289 'S' -> return ShortInt
290 'Z' -> return BoolType
292 name <- getToSemicolon
293 return (ObjectType name)
297 return (Array mbSize sig)
298 _ -> fail $ "Unknown signature opening symbol: " ++ [b]
300 -- | Read string up to `;'
301 getToSemicolon :: Get String
307 next <- getToSemicolon
310 -- | Return value signature
311 data ReturnSignature =
316 instance Show ReturnSignature where
317 show (Returns t) = show t
318 show ReturnsVoid = "Void"
320 instance Binary ReturnSignature where
321 put (Returns sig) = put sig
322 put ReturnsVoid = put 'V'
325 x <- lookAhead getChar8
327 'V' -> skip 1 >> return ReturnsVoid
330 -- | Method argument signature
331 type ArgumentSignature = FieldType
333 -- | Class method argument signature
334 data MethodSignature =
335 MethodSignature [ArgumentSignature] ReturnSignature
338 instance Show MethodSignature where
339 show (MethodSignature args ret) = "(" ++ intercalate ", " (map show args) ++ ") returns " ++ show ret
341 instance Binary MethodSignature where
342 put (MethodSignature args ret) = do
351 fail "Cannot parse method signature: no starting `(' !"
355 fail "Internal error: method signature without `)' !?"
357 return (MethodSignature args ret)
359 -- | Read arguments signatures (up to `)')
360 getArgs :: Get [ArgumentSignature]
361 getArgs = whileJust getArg
363 getArg :: Get (Maybe ArgumentSignature)
365 x <- lookAhead getChar8
370 whileJust :: (Monad m) => m (Maybe a) -> m [a]
379 instance Binary (Constant Pointers) where
380 put (CClass i) = putWord8 7 >> put i
381 put (CField i j) = putWord8 9 >> put i >> put j
382 put (CMethod i j) = putWord8 10 >> put i >> put j
383 put (CIfaceMethod i j) = putWord8 11 >> put i >> put j
384 put (CString i) = putWord8 8 >> put i
385 put (CInteger x) = putWord8 3 >> put x
386 put (CFloat x) = putWord8 4 >> putFloat32be x
387 put (CLong x) = putWord8 5 >> put x
388 put (CDouble x) = putWord8 6 >> putFloat64be x
389 put (CNameType i j) = putWord8 12 >> put i >> put j
392 put (fromIntegral (B.length bs) :: Word16)
394 put (CUnicode bs) = do
396 put (fromIntegral (B.length bs) :: Word16)
405 bs <- getLazyByteString (fromIntegral (l :: Word16))
409 bs <- getLazyByteString (fromIntegral (l :: Word16))
411 3 -> CInteger <$> get
412 4 -> CFloat <$> getFloat32be
414 6 -> CDouble <$> getFloat64be
417 9 -> CField <$> get <*> get
418 10 -> CMethod <$> get <*> get
419 11 -> CIfaceMethod <$> get <*> get
420 12 -> CNameType <$> get <*> get
421 _ -> fail $ "Unknown constants pool entry tag: " ++ show tag
423 -- | Class field format
424 data Field stage = Field {
425 fieldAccessFlags :: AccessFlags stage,
426 fieldName :: Link stage B.ByteString,
427 fieldSignature :: Link stage FieldSignature,
428 fieldAttributesCount :: Word16,
429 fieldAttributes :: Attributes stage }
431 deriving instance Eq (Field Pointers)
432 deriving instance Eq (Field Resolved)
433 deriving instance Show (Field Pointers)
434 deriving instance Show (Field Resolved)
436 instance Binary (Field Pointers) where
437 put (Field {..}) = do
441 put fieldAttributesCount
442 forM_ (attributesList fieldAttributes) put
449 as <- replicateM (fromIntegral n) get
450 return $ Field af ni si n (AP as)
452 -- | Class method format
453 data Method stage = Method {
454 methodAccessFlags :: AccessFlags stage,
455 methodName :: Link stage B.ByteString,
456 methodSignature :: Link stage MethodSignature,
457 methodAttributesCount :: Word16,
458 methodAttributes :: Attributes stage }
460 deriving instance Eq (Method Pointers)
461 deriving instance Eq (Method Resolved)
462 deriving instance Show (Method Pointers)
463 deriving instance Show (Method Resolved)
465 instance Binary (Method Pointers) where
466 put (Method {..}) = do
467 put methodAccessFlags
470 put methodAttributesCount
471 forM_ (attributesList methodAttributes) put
479 as <- replicateM (fromIntegral n) get
481 methodAccessFlags = af,
483 methodSignature = si,
484 methodAttributesCount = n,
485 methodAttributes = AP as }
487 -- | Any (class/ field/ method/ ...) attribute format.
488 -- Some formats specify special formats for @attributeValue@.
489 data Attribute = Attribute {
490 attributeName :: Word16,
491 attributeLength :: Word32,
492 attributeValue :: B.ByteString }
495 instance Binary Attribute where
496 put (Attribute {..}) = do
498 putWord32be attributeLength
499 putLazyByteString attributeValue
505 value <- getLazyByteString (fromIntegral len)
506 return $ Attribute name len value
508 class HasAttributes a where
509 attributes :: a stage -> Attributes stage
511 instance HasAttributes Class where
512 attributes = classAttributes
514 instance HasAttributes Field where
515 attributes = fieldAttributes
517 instance HasAttributes Method where
518 attributes = methodAttributes