From d47b4af2d4cf72352782e8c88a6e03670ca15737 Mon Sep 17 00:00:00 2001 From: Ilya Portnov Date: Sun, 2 Oct 2011 19:07:15 +0600 Subject: [PATCH] More docs. --- JVM/Assembler.hs | 2 +- JVM/Builder/Instructions.hs | 28 +++++----- JVM/Builder/Monad.hs | 20 +++---- JVM/ClassFile.hs | 107 +++++++++++++++++++++++++----------- JVM/Converter.hs | 50 ++++++++--------- JVM/Dump.hs | 2 +- TestGen.hs | 2 +- dump-class.hs | 2 +- rebuild-class.hs | 2 +- 9 files changed, 128 insertions(+), 87 deletions(-) diff --git a/JVM/Assembler.hs b/JVM/Assembler.hs index b47c4cc..7ac59c6 100644 --- a/JVM/Assembler.hs +++ b/JVM/Assembler.hs @@ -54,7 +54,7 @@ data Code = Code { codeExceptionsN :: Word16, codeExceptions :: [CodeException], codeAttrsN :: Word16, - codeAttributes :: Attributes Pointers } + codeAttributes :: Attributes File } deriving (Eq, Show) -- | Exception descriptor diff --git a/JVM/Builder/Instructions.hs b/JVM/Builder/Instructions.hs index 78e3545..bc4948a 100644 --- a/JVM/Builder/Instructions.hs +++ b/JVM/Builder/Instructions.hs @@ -48,21 +48,21 @@ bipush x = i0 (BIPUSH x) sipush :: Word16 -> Generate () sipush x = i0 (SIPUSH x) -ldc1 :: Constant Resolved -> Generate () +ldc1 :: Constant Direct -> Generate () ldc1 x = i8 LDC1 x -ldc2 :: Constant Resolved -> Generate () +ldc2 :: Constant Direct -> Generate () ldc2 x = i1 LDC2 x -ldc2w :: Constant Resolved -> Generate () +ldc2w :: Constant Direct -> Generate () ldc2w x = i1 LDC2W x -iload :: Constant Resolved -> Generate () +iload :: Constant Direct -> Generate () iload x = i8 ILOAD x -lload :: Constant Resolved -> Generate () +lload :: Constant Direct -> Generate () lload x = i8 LLOAD x -fload :: Constant Resolved -> Generate () +fload :: Constant Direct -> Generate () fload x = i8 FLOAD x -dload :: Constant Resolved -> Generate () +dload :: Constant Direct -> Generate () dload x = i8 DLOAD x -aload :: Constant Resolved -> Generate () +aload :: Constant Direct -> Generate () aload x = i8 ALOAD x iload_ :: IMM -> Generate () @@ -91,15 +91,15 @@ caload = i0 CALOAD saload :: Generate () saload = i0 SALOAD -istore :: Constant Resolved -> Generate () +istore :: Constant Direct -> Generate () istore x = i8 ISTORE x -lstore :: Constant Resolved -> Generate () +lstore :: Constant Direct -> Generate () lstore x = i8 LSTORE x -fstore :: Constant Resolved -> Generate () +fstore :: Constant Direct -> Generate () fstore x = i8 FSTORE x -dstore :: Constant Resolved -> Generate () +dstore :: Constant Direct -> Generate () dstore x = i8 DSTORE x -astore :: Constant Resolved -> Generate () +astore :: Constant Direct -> Generate () astore x = i8 ASTORE x istore_ :: Word8 -> Generate () @@ -258,7 +258,7 @@ lcmp :: Generate () lcmp = i0 LCMP -- | Wide instruction -wide :: (Word8 -> Instruction) -> Constant Resolved -> Generate () +wide :: (Word8 -> Instruction) -> Constant Direct -> Generate () wide fn c = do ix <- addToPool c let ix0 = fromIntegral (ix `div` 0x100) :: Word8 diff --git a/JVM/Builder/Monad.hs b/JVM/Builder/Monad.hs index 2d4a290..85aaa5e 100644 --- a/JVM/Builder/Monad.hs +++ b/JVM/Builder/Monad.hs @@ -27,9 +27,9 @@ import JVM.Assembler -- | Generator state data GState = GState { generated :: [Instruction], -- ^ Already generated code (in current method) - currentPool :: Pool Resolved, -- ^ Already generated constants pool - doneMethods :: [Method Resolved], -- ^ Already generated class methods - currentMethod :: Maybe (Method Resolved), -- ^ Current method + currentPool :: Pool Direct, -- ^ Already generated constants pool + doneMethods :: [Method Direct], -- ^ Already generated class methods + currentMethod :: Maybe (Method Direct), -- ^ Current method stackSize :: Word16, -- ^ Maximum stack size for current method locals :: Word16 -- ^ Maximum number of local variables for current method } @@ -49,14 +49,14 @@ emptyGState = GState { type Generate a = State GState a -- | Append a constant to pool -appendPool :: Constant Resolved -> Pool Resolved -> (Pool Resolved, Word16) +appendPool :: Constant Direct -> Pool Direct -> (Pool Direct, Word16) appendPool c pool = let size = fromIntegral (M.size pool) pool' = M.insert size c pool in (pool', size) -- | Add a constant to pool -addItem :: Constant Resolved -> Generate Word16 +addItem :: Constant Direct -> Generate Word16 addItem c = do pool <- St.gets currentPool case lookupPool c pool of @@ -68,7 +68,7 @@ addItem c = do return (i+1) -- | Lookup in a pool -lookupPool :: Constant Resolved -> Pool Resolved -> Maybe Word16 +lookupPool :: Constant Direct -> Pool Direct -> Maybe Word16 lookupPool c pool = fromIntegral `fmap` findIndex (== c) (M.elems pool) @@ -86,7 +86,7 @@ addSig c@(MethodSignature args ret) = do addItem (CUTF8 bsig) -- | Add a constant into pool -addToPool :: Constant Resolved -> Generate Word16 +addToPool :: Constant Direct -> Generate Word16 addToPool c@(CClass str) = do addItem (CUTF8 str) addItem c @@ -122,13 +122,13 @@ i0 :: Instruction -> Generate () i0 = putInstruction -- | Generate one one-argument instruction -i1 :: (Word16 -> Instruction) -> Constant Resolved -> Generate () +i1 :: (Word16 -> Instruction) -> Constant Direct -> Generate () i1 fn c = do ix <- addToPool c i0 (fn ix) -- | Generate one one-argument instruction -i8 :: (Word8 -> Instruction) -> Constant Resolved -> Generate () +i8 :: (Word8 -> Instruction) -> Constant Direct -> Generate () i8 fn c = do ix <- addToPool c i0 (fn $ fromIntegral ix) @@ -213,7 +213,7 @@ initClass name = do addToPool (CString "Code") -- | Generate a class -generate :: B.ByteString -> Generate () -> Class Resolved +generate :: B.ByteString -> Generate () -> Class Direct generate name gen = let generator = do initClass name diff --git a/JVM/ClassFile.hs b/JVM/ClassFile.hs index 18c2526..0359320 100644 --- a/JVM/ClassFile.hs +++ b/JVM/ClassFile.hs @@ -2,18 +2,26 @@ -- | This module declares (low-level) data types for Java .class files -- structures, and Binary instances to read/write them. module JVM.ClassFile - (Attribute (..), + (-- * About + -- $about + -- + -- * Internal class file structures + Attribute (..), FieldType (..), + -- * Signatures FieldSignature, MethodSignature (..), ReturnSignature (..), ArgumentSignature (..), + -- * Stage types + File, Direct, + -- * Staged structures Pool, Link, Method (..), Field (..), Class (..), Constant (..), - Pointers, Resolved, - NameType (..), - HasSignature (..), HasAttributes (..), AccessFlag (..), AccessFlags, Attributes (..), + -- * Misc + HasSignature (..), HasAttributes (..), + NameType (..), className, apsize, arsize, arlist ) @@ -32,6 +40,22 @@ import qualified Data.Map as M import qualified Data.ByteString.Lazy as B import Codec.Binary.UTF8.String hiding (encode, decode) +-- $about +-- +-- Java .class file uses constants pool, which stores almost all source-code-level +-- constants (strings, integer literals etc), and also all identifiers (class, +-- method, field names etc). All other structures contain indexes of constants in +-- the pool instead of constants theirself. +-- +-- It's not convient to use that indexes programmatically. So, .class file is represented +-- at two stages: File and Direct. At File stage, all data structures contain only indexes, +-- not constants theirself. When we read a class from a file, we get structure at File stage. +-- We only can write File stage structure to file. +-- +-- At Direct stage, structures conain constants, not indexes. Convertion functions (File <-> Direct) +-- are located in the JVM.Converter module. +-- + -- | Read one-byte Char getChar8 :: Get Char getChar8 = do @@ -41,36 +65,51 @@ getChar8 = do toString :: B.ByteString -> String toString bstr = decodeString $ map (chr . fromIntegral) $ B.unpack bstr -type family Link s a +-- | File stage +data File = File -data Pointers = Pointers +-- | Direct representation stage +data Direct = Direct -data Resolved = Resolved +-- | Link to some object +type family Link stage a -type instance Link Pointers a = Word16 +-- | At File stage, Link contain index of object in the constants pool. +type instance Link File a = Word16 -type instance Link Resolved a = a +-- | At Direct stage, Link contain object itself. +type instance Link Direct a = a +-- | Object (class, method, field …) access flags type family AccessFlags stage -type instance AccessFlags Pointers = Word16 +-- | At File stage, access flags are represented as Word16 +type instance AccessFlags File = Word16 -type instance AccessFlags Resolved = S.Set AccessFlag +-- | At Direct stage, access flags are represented as set of flags. +type instance AccessFlags Direct = S.Set AccessFlag +-- | Object (class, method, field) attributes data family Attributes stage -data instance Attributes Pointers = AP {attributesList :: [Attribute]} +-- | At File stage, attributes are represented as list of Attribute structures. +data instance Attributes File = AP {attributesList :: [Attribute]} deriving (Eq, Show) -data instance Attributes Resolved = AR (M.Map B.ByteString B.ByteString) + +-- | At Direct stage, attributes are represented as a Map. +data instance Attributes Direct = AR (M.Map B.ByteString B.ByteString) deriving (Eq, Show) -arsize :: Attributes Resolved -> Int +-- | Size of attributes set at Direct stage +arsize :: Attributes Direct -> Int arsize (AR m) = M.size m -arlist :: Attributes Resolved -> [(B.ByteString, B.ByteString)] +-- | Associative list of attributes at Direct stage +arlist :: Attributes Direct -> [(B.ByteString, B.ByteString)] arlist (AR m) = M.assocs m -apsize :: Attributes Pointers -> Int +-- | Size of attributes set at File stage +apsize :: Attributes File -> Int apsize (AP list) = length list -- | Access flags. Used for classess, methods, variables. @@ -88,6 +127,7 @@ data AccessFlag = | ACC_ABSTRACT -- ^ 0x0400 deriving (Eq, Show, Ord, Enum) +-- | Fields and methods have signatures. class HasSignature a where type Signature a @@ -127,11 +167,12 @@ data Constant stage = | CUTF8 {getString :: B.ByteString} | CUnicode {getString :: B.ByteString} -className :: Constant Resolved -> B.ByteString +-- | Name of the CClass. Error on any other constant. +className :: Constant Direct -> B.ByteString className (CClass s) = s className x = error $ "Not a class: " ++ show x -instance Show (Constant Resolved) where +instance Show (Constant Direct) where show (CClass name) = "class " ++ toString name show (CField cls nt) = "field " ++ toString cls ++ "." ++ show nt show (CMethod cls nt) = "method " ++ toString cls ++ "." ++ show nt @@ -168,11 +209,11 @@ data Class stage = Class { classAttributes :: Attributes stage -- ^ Class attributes } -deriving instance Eq (Constant Pointers) -deriving instance Eq (Constant Resolved) -deriving instance Show (Constant Pointers) +deriving instance Eq (Constant File) +deriving instance Eq (Constant Direct) +deriving instance Show (Constant File) -instance Binary (Class Pointers) where +instance Binary (Class File) where put (Class {..}) = do put magic put minorVersion @@ -376,7 +417,7 @@ whileJust m = do return (x: next) Nothing -> return [] -instance Binary (Constant Pointers) where +instance Binary (Constant File) where put (CClass i) = putWord8 7 >> put i put (CField i j) = putWord8 9 >> put i >> put j put (CMethod i j) = putWord8 10 >> put i >> put j @@ -428,12 +469,12 @@ data Field stage = Field { fieldAttributesCount :: Word16, fieldAttributes :: Attributes stage } -deriving instance Eq (Field Pointers) -deriving instance Eq (Field Resolved) -deriving instance Show (Field Pointers) -deriving instance Show (Field Resolved) +deriving instance Eq (Field File) +deriving instance Eq (Field Direct) +deriving instance Show (Field File) +deriving instance Show (Field Direct) -instance Binary (Field Pointers) where +instance Binary (Field File) where put (Field {..}) = do put fieldAccessFlags put fieldName @@ -457,12 +498,12 @@ data Method stage = Method { methodAttributesCount :: Word16, methodAttributes :: Attributes stage } -deriving instance Eq (Method Pointers) -deriving instance Eq (Method Resolved) -deriving instance Show (Method Pointers) -deriving instance Show (Method Resolved) +deriving instance Eq (Method File) +deriving instance Eq (Method Direct) +deriving instance Show (Method File) +deriving instance Show (Method Direct) -instance Binary (Method Pointers) where +instance Binary (Method File) where put (Method {..}) = do put methodAccessFlags put methodName diff --git a/JVM/Converter.hs b/JVM/Converter.hs index b780d0d..777bac6 100644 --- a/JVM/Converter.hs +++ b/JVM/Converter.hs @@ -25,17 +25,17 @@ import JVM.Common import JVM.Exceptions -- | Parse .class file data -parseClass :: B.ByteString -> Class Resolved +parseClass :: B.ByteString -> Class Direct parseClass bstr = convertClass $ decode bstr -- | Parse class data from file -parseClassFile :: FilePath -> IO (Class Resolved) +parseClassFile :: FilePath -> IO (Class Direct) parseClassFile path = convertClass `fmap` decodeFile path -encodeClass :: (Class Resolved) -> B.ByteString +encodeClass :: (Class Direct) -> B.ByteString encodeClass cls = encode $ classFile cls -convertClass :: Class Pointers -> Class Resolved +convertClass :: Class File -> Class Direct convertClass (Class {..}) = let pool = constantPoolArray constsPool superName = className $ pool ! superClass @@ -57,7 +57,7 @@ convertClass (Class {..}) = classAttributesCount = classAttributesCount, classAttributes = convertAttrs pool classAttributes } -classFile :: Class Resolved -> Class Pointers +classFile :: Class Direct -> Class File classFile (Class {..}) = Class { magic = 0xCAFEBABE, minorVersion = 0, @@ -77,15 +77,15 @@ classFile (Class {..}) = Class { classAttributes = to (arlist classAttributes) } where poolInfo = toCPInfo constsPool - to :: [(B.ByteString, B.ByteString)] -> Attributes Pointers + to :: [(B.ByteString, B.ByteString)] -> Attributes File to pairs = AP (map (attrInfo poolInfo) pairs) -toCPInfo :: Pool Resolved -> Pool Pointers +toCPInfo :: Pool Direct -> Pool File toCPInfo pool = result where result = M.map cpInfo pool - cpInfo :: Constant Resolved -> Constant Pointers + cpInfo :: Constant Direct -> Constant File cpInfo (CClass name) = CClass (force "class" $ poolIndex result name) cpInfo (CField cls name) = CField (force "field a" $ poolClassIndex result cls) (force "field b" $ poolNTIndex result name) @@ -104,7 +104,7 @@ toCPInfo pool = result cpInfo (CUnicode s) = CUnicode s -- | Find index of given string in the list of constants -poolIndex :: (Throws NoItemInPool e) => Pool Pointers -> B.ByteString -> EM e Word16 +poolIndex :: (Throws NoItemInPool e) => Pool File -> B.ByteString -> EM e Word16 poolIndex list name = case findIndex test (M.elems list) of Nothing -> throw (NoItemInPool name) Just i -> return $ fromIntegral $ i+1 @@ -114,7 +114,7 @@ poolIndex list name = case findIndex test (M.elems list) of test _ = False -- | Find index of given string in the list of constants -poolClassIndex :: (Throws NoItemInPool e) => Pool Pointers -> B.ByteString -> EM e Word16 +poolClassIndex :: (Throws NoItemInPool e) => Pool File -> B.ByteString -> EM e Word16 poolClassIndex list name = case findIndex checkString (M.elems list) of Nothing -> throw (NoItemInPool name) Just i -> case findIndex (checkClass $ fromIntegral $ i+1) (M.elems list) of @@ -139,7 +139,7 @@ poolNTIndex list x@(NameType n t) = do | (ni == n') && (ti == t') = True check _ _ _ = False -fieldInfo :: Pool Pointers -> Field Resolved -> Field Pointers +fieldInfo :: Pool File -> Field Direct -> Field File fieldInfo pool (Field {..}) = Field { fieldAccessFlags = access2word16 fieldAccessFlags, fieldName = force "field name" $ poolIndex pool fieldName, @@ -147,10 +147,10 @@ fieldInfo pool (Field {..}) = Field { fieldAttributesCount = fromIntegral (arsize fieldAttributes), fieldAttributes = to (arlist fieldAttributes) } where - to :: [(B.ByteString, B.ByteString)] -> Attributes Pointers + to :: [(B.ByteString, B.ByteString)] -> Attributes File to pairs = AP (map (attrInfo pool) pairs) -methodInfo :: Pool Pointers -> Method Resolved -> Method Pointers +methodInfo :: Pool File -> Method Direct -> Method File methodInfo pool (Method {..}) = Method { methodAccessFlags = access2word16 methodAccessFlags, methodName = force "method name" $ poolIndex pool methodName, @@ -158,19 +158,19 @@ methodInfo pool (Method {..}) = Method { methodAttributesCount = fromIntegral (arsize methodAttributes), methodAttributes = to (arlist methodAttributes) } where - to :: [(B.ByteString, B.ByteString)] -> Attributes Pointers + to :: [(B.ByteString, B.ByteString)] -> Attributes File to pairs = AP (map (attrInfo pool) pairs) -attrInfo :: Pool Pointers -> (B.ByteString, B.ByteString) -> Attribute +attrInfo :: Pool File -> (B.ByteString, B.ByteString) -> Attribute attrInfo pool (name, value) = Attribute { attributeName = force "attr name" $ poolIndex pool name, attributeLength = fromIntegral (B.length value), attributeValue = value } -constantPoolArray :: Pool Pointers -> Pool Resolved +constantPoolArray :: Pool File -> Pool Direct constantPoolArray ps = pool where - pool :: Pool Resolved + pool :: Pool Direct pool = M.map convert ps n = fromIntegral $ M.size ps @@ -193,7 +193,7 @@ constantPoolArray ps = pool convert (CUTF8 bs) = CUTF8 bs convert (CUnicode bs) = CUnicode bs -convertAccess :: AccessFlags Pointers -> AccessFlags Resolved +convertAccess :: AccessFlags File -> AccessFlags Direct convertAccess w = S.fromList $ concat $ zipWith (\i f -> if testBit w i then [f] else []) [0..] $ [ ACC_PUBLIC, ACC_PRIVATE, @@ -207,13 +207,13 @@ convertAccess w = S.fromList $ concat $ zipWith (\i f -> if testBit w i then [f] ACC_INTERFACE, ACC_ABSTRACT ] -access2word16 :: AccessFlags Resolved -> AccessFlags Pointers +access2word16 :: AccessFlags Direct -> AccessFlags File access2word16 fs = bitsOr $ map toBit $ S.toList fs where bitsOr = foldl (.|.) 0 toBit f = 1 `shiftL` (fromIntegral $ fromEnum f) -convertField :: Pool Resolved -> Field Pointers -> Field Resolved +convertField :: Pool Direct -> Field File -> Field Direct convertField pool (Field {..}) = Field { fieldAccessFlags = convertAccess fieldAccessFlags, fieldName = getString $ pool ! fieldName, @@ -221,7 +221,7 @@ convertField pool (Field {..}) = Field { fieldAttributesCount = fromIntegral (apsize fieldAttributes), fieldAttributes = convertAttrs pool fieldAttributes } -convertMethod :: Pool Resolved -> Method Pointers -> Method Resolved +convertMethod :: Pool Direct -> Method File -> Method Direct convertMethod pool (Method {..}) = Method { methodAccessFlags = convertAccess methodAccessFlags, methodName = getString $ pool ! methodName, @@ -229,7 +229,7 @@ convertMethod pool (Method {..}) = Method { methodAttributesCount = fromIntegral (apsize methodAttributes), methodAttributes = convertAttrs pool methodAttributes } -convertAttrs :: Pool Resolved -> Attributes Pointers -> Attributes Resolved +convertAttrs :: Pool Direct -> Attributes File -> Attributes Direct convertAttrs pool (AP attrs) = AR (M.fromList $ map go attrs) where go :: Attribute -> (B.ByteString, B.ByteString) @@ -237,18 +237,18 @@ convertAttrs pool (AP attrs) = AR (M.fromList $ map go attrs) attributeValue) -- | Try to get class method by name -methodByName :: Class Resolved -> B.ByteString -> Maybe (Method Resolved) +methodByName :: Class Direct -> B.ByteString -> Maybe (Method Direct) methodByName cls name = find (\m -> methodName m == name) (classMethods cls) -- | Try to get object attribute by name -attrByName :: (HasAttributes a) => a Resolved -> B.ByteString -> Maybe B.ByteString +attrByName :: (HasAttributes a) => a Direct -> B.ByteString -> Maybe B.ByteString attrByName x name = let (AR m) = attributes x in M.lookup name m -- | Try to get Code for class method (no Code for interface methods) -methodCode :: Class Resolved +methodCode :: Class Direct -> B.ByteString -- ^ Method name -> Maybe B.ByteString methodCode cls name = do diff --git a/JVM/Dump.hs b/JVM/Dump.hs index a2c4c2e..53c8a2d 100644 --- a/JVM/Dump.hs +++ b/JVM/Dump.hs @@ -12,7 +12,7 @@ import JVM.Converter import JVM.Assembler -- | Dump a class to console. -dumpClass :: Class Resolved -> IO () +dumpClass :: Class Direct -> IO () dumpClass cls = do putStr "Class: " B.putStrLn (thisClass cls) diff --git a/TestGen.hs b/TestGen.hs index 886751d..69786e9 100644 --- a/TestGen.hs +++ b/TestGen.hs @@ -47,7 +47,7 @@ test = do return () -testClass :: Class Resolved +testClass :: Class Direct testClass = generate "Test" test main = do diff --git a/dump-class.hs b/dump-class.hs index 76fd681..a87c1cb 100644 --- a/dump-class.hs +++ b/dump-class.hs @@ -15,7 +15,7 @@ main = do case args of [clspath] -> do clsFile <- decodeFile clspath - putStrLn $ showListIx $ M.elems $ constsPool (clsFile :: Class Pointers) + putStrLn $ showListIx $ M.elems $ constsPool (clsFile :: Class File) cls <- parseClassFile clspath dumpClass cls _ -> error "Synopsis: dump-class File.class" diff --git a/rebuild-class.hs b/rebuild-class.hs index 45a9843..cb98f24 100644 --- a/rebuild-class.hs +++ b/rebuild-class.hs @@ -15,7 +15,7 @@ main = do case args of [clspath,outpath] -> do cls <- parseClassFile clspath - clsfile <- decodeFile clspath :: IO (Class Pointers) + clsfile <- decodeFile clspath :: IO (Class File) dumpClass cls putStrLn $ "Source pool:\n" ++ showListIx (M.elems $ constsPool clsfile) let result = classFile cls -- 2.25.1