{-# LANGUAGE RecordWildCards, BangPatterns #-} module JVM.ClassFile where import Control.Monad import Control.Applicative import Data.Binary import Data.Binary.IEEE754 import Data.Binary.Get import Data.Binary.Put import Data.Word import Data.Char import Data.List import qualified Data.ByteString.Lazy as B import Debug.Trace traceS :: (Show a) => String -> a -> a traceS msg x = trace (msg ++ ": " ++ show x) x char :: Word8 -> Char char n = chr (fromIntegral n) getChar8 :: Get Char getChar8 = do x <- getWord8 return (char x) data ClassFile = ClassFile { magic :: Word32, minorVersion :: Word16, majorVersion :: Word16, constsPoolSize :: Word16, constsPool :: [CpInfo], accessFlags :: Word16, thisClass :: Word16, superClass :: Word16, interfacesCount :: Word16, interfaces :: [Word16], classFieldsCount :: Word16, classFields :: [FieldInfo], classMethodsCount :: Word16, classMethods :: [MethodInfo], classAttributesCount :: Word16, classAttributes :: [AttributeInfo] } deriving (Eq, Show) traceM msg x = do r <- x return $ traceS msg r replicateMT n m = replicateM n (traceM ">" m) instance Binary ClassFile where put (ClassFile {..}) = do put magic put minorVersion put majorVersion put constsPoolSize forM_ constsPool put put accessFlags put thisClass put superClass put interfacesCount forM_ interfaces put put classFieldsCount forM_ classFields put put classMethodsCount forM_ classMethods put put classAttributesCount forM_ classAttributes put get = do magic <- get minor <- get major <- get poolsize <- get pool <- replicateM (fromIntegral poolsize - 1) get af <- get this <- get super <- get interfacesCount <- get ifaces <- replicateM (fromIntegral interfacesCount) get classFieldsCount <- get classFields <- replicateM (fromIntegral classFieldsCount) get classMethodsCount <- get classMethods <- replicateM (fromIntegral classMethodsCount) get asCount <- get as <- replicateM (fromIntegral $ asCount - 1) get return $ ClassFile magic minor major poolsize pool af this super interfacesCount ifaces classFieldsCount classFields classMethodsCount classMethods asCount as data FieldType = SignedByte -- B | CharByte -- C | DoubleType -- D | FloatType -- F | IntType -- I | LongInt -- J | ShortInt -- S | BoolType -- Z | ObjectType String -- L | Array (Maybe Int) FieldType deriving (Eq) instance Show FieldType where show SignedByte = "byte" show CharByte = "char" show DoubleType = "double" show FloatType = "float" show IntType = "int" show LongInt = "long" show ShortInt = "short" show BoolType = "bool" show (ObjectType s) = "Object " ++ s show (Array Nothing t) = show t ++ "[]" show (Array (Just n) t) = show t ++ "[" ++ show n ++ "]" type FieldSignature = FieldType getInt :: Get (Maybe Int) getInt = do s <- getDigits if null s then return Nothing else return $ Just (read s) where getDigits :: Get [Char] getDigits = do c <- lookAhead getChar8 if isDigit c then do skip 1 next <- getDigits return (c: next) else return [] instance Binary FieldType where put SignedByte = put 'B' put CharByte = put 'C' put DoubleType = put 'D' put FloatType = put 'F' put IntType = put 'I' put LongInt = put 'J' put ShortInt = put 'S' put BoolType = put 'Z' put (ObjectType name) = put 'L' >> put name put (Array Nothing sig) = put '[' >> put sig put (Array (Just n) sig) = put '[' >> put (show n) >> put sig get = do b <- getChar8 case b of 'B' -> return SignedByte 'C' -> return CharByte 'D' -> return DoubleType 'F' -> return FloatType 'I' -> return IntType 'J' -> return LongInt 'S' -> return ShortInt 'Z' -> return BoolType 'L' -> do name <- getToSemicolon return (ObjectType name) '[' -> do mbSize <- getInt sig <- get return (Array mbSize sig) _ -> fail $ "Unknown signature opening symbol: " ++ [b] getToSemicolon :: Get String getToSemicolon = do x <- get if x == ';' then return [] else do next <- getToSemicolon return (x: next) data ReturnSignature = Returns FieldType | ReturnsVoid deriving (Eq) instance Show ReturnSignature where show (Returns t) = show t show ReturnsVoid = "Void" instance Binary ReturnSignature where put (Returns sig) = put sig put ReturnsVoid = put 'V' get = do x <- lookAhead getChar8 case x of 'V' -> skip 1 >> return ReturnsVoid _ -> Returns <$> get type ArgumentSignature = FieldType data MethodSignature = MethodSignature [ArgumentSignature] ReturnSignature deriving (Eq) instance Show MethodSignature where show (MethodSignature args ret) = "(" ++ intercalate ", " (map show args) ++ ") returns " ++ show ret instance Binary MethodSignature where put (MethodSignature args ret) = do put '(' forM_ args put put ')' put ret get = do x <- getChar8 when (x /= '(') $ fail "Cannot parse method signature: no starting `(' !" args <- getArgs y <- getChar8 when (y /= ')') $ fail "Internal error: method signature without `)' !?" ret <- get return (MethodSignature args ret) getArgs :: Get [ArgumentSignature] getArgs = whileJust getArg where getArg :: Get (Maybe ArgumentSignature) getArg = do x <- lookAhead getChar8 if x == ')' then return Nothing else Just <$> get whileJust :: (Monad m) => m (Maybe a) -> m [a] whileJust m = do r <- m case r of Just x -> do next <- whileJust m return (x: next) Nothing -> return [] data CpInfo = CONSTANT_Class {nameIndex :: Word16} -- 7 | CONSTANT_Fieldref {classIndex :: Word16, nameAndTypeIndex :: Word16} -- 9 | CONSTANT_Methodref {classIndex :: Word16, nameAndTypeIndex :: Word16} -- 10 | CONSTANT_InterfaceMethodref {classIndex :: Word16, nameAndTypeIndex :: Word16}-- 11 | CONSTANT_String {stringIndex :: Word16} -- 8 | CONSTANT_Integer {fourBytes :: Word32} -- 3 | CONSTANT_Float Float -- 4 | CONSTANT_Long Word64 -- 5 | CONSTANT_Double Double -- 6 | CONSTANT_NameAndType {nameIndex :: Word16, signatureIndex :: Word16} -- 12 | CONSTANT_Utf8 {stringLength :: Word16, stringBytes :: B.ByteString} -- 1 | CONSTANT_Unicode {stringLength :: Word16, stringBytes :: B.ByteString} -- 2 deriving (Eq, Show) instance Binary CpInfo where put (CONSTANT_Class i) = putWord8 7 >> put i put (CONSTANT_Fieldref i j) = putWord8 9 >> put i >> put j put (CONSTANT_Methodref i j) = putWord8 10 >> put i >> put j put (CONSTANT_InterfaceMethodref i j) = putWord8 11 >> put i >> put j put (CONSTANT_String i) = putWord8 8 >> put i put (CONSTANT_Integer x) = putWord8 3 >> put x put (CONSTANT_Float x) = putWord8 4 >> putFloat32be x put (CONSTANT_Long x) = putWord8 5 >> put x put (CONSTANT_Double x) = putWord8 6 >> putFloat64be x put (CONSTANT_NameAndType i j) = putWord8 12 >> put i >> put j put (CONSTANT_Utf8 l bs) = putWord8 1 >> put l >> putLazyByteString bs put (CONSTANT_Unicode l bs) = putWord8 2 >> put l >> putLazyByteString bs get = do !offset <- bytesRead tag <- getWord8 case tag of 1 -> do l <- get bs <- getLazyByteString (fromIntegral l) return $ CONSTANT_Utf8 l bs 2 -> do l <- get bs <- getLazyByteString (fromIntegral l) return $ CONSTANT_Unicode l bs 3 -> CONSTANT_Integer <$> get 4 -> CONSTANT_Float <$> getFloat32be 5 -> CONSTANT_Long <$> get 6 -> CONSTANT_Double <$> getFloat64be 7 -> CONSTANT_Class <$> get 8 -> CONSTANT_String <$> get 9 -> CONSTANT_Fieldref <$> get <*> get 10 -> CONSTANT_Methodref <$> get <*> get 11 -> CONSTANT_InterfaceMethodref <$> get <*> get 12 -> CONSTANT_NameAndType <$> get <*> get _ -> fail $ "Unknown constants pool entry tag: " ++ show tag data FieldInfo = FieldInfo { fieldAccessFlags :: Word16, fieldNameIndex :: Word16, fieldSignatureIndex :: Word16, fieldAttributesCount :: Word16, fieldAttributes :: [AttributeInfo] } deriving (Eq, Show) instance Binary FieldInfo where put (FieldInfo {..}) = do put fieldAccessFlags put fieldNameIndex put fieldSignatureIndex put fieldAttributesCount forM_ fieldAttributes put get = do af <- get ni <- get si <- get n <- get as <- replicateM (fromIntegral n) get return $ FieldInfo af ni si n as data MethodInfo = MethodInfo { methodAccessFlags :: Word16, methodNameIndex :: Word16, methodSignatureIndex :: Word16, methodAttributesCount :: Word16, methodAttributes :: [AttributeInfo] } deriving (Eq, Show) instance Binary MethodInfo where put (MethodInfo {..}) = do put methodAccessFlags put methodNameIndex put methodSignatureIndex put methodAttributesCount forM_ methodAttributes put get = do offset <- bytesRead af <- get ni <- get si <- get n <- get as <- replicateM (fromIntegral n) get return $ MethodInfo af ni si n as data AttributeInfo = AttributeInfo { attributeName :: Word16, attributeLength :: Word32, attributeValue :: B.ByteString } deriving (Eq, Show) instance Binary AttributeInfo where put (AttributeInfo {..}) = do put attributeName putWord32be attributeLength putLazyByteString attributeValue get = do offset <- bytesRead name <- get len <- getWord32be value <- getLazyByteString (fromIntegral len) return $ AttributeInfo name len value