From: Ilya V. Portnov Date: Wed, 5 Oct 2011 09:15:55 +0000 (+0600) Subject: Enhace constants pool handling. X-Git-Tag: v0.3.2~10^2~6 X-Git-Url: http://wien.tomnetworks.com/gitweb/?p=hs-java.git;a=commitdiff_plain;h=743c0f2dca8434e69cce8389630091d37c28bc25 Enhace constants pool handling. --- diff --git a/.gitignore b/.gitignore index f64c10b..5d0ca25 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,9 @@ *.swp .*.swp dump-class +rebuild-class +TestGen dist/ +MANIFEST.MF +META-INF +*.jar diff --git a/JVM/Builder/Instructions.hs b/JVM/Builder/Instructions.hs index 8577a44..f74429c 100644 --- a/JVM/Builder/Instructions.hs +++ b/JVM/Builder/Instructions.hs @@ -5,6 +5,8 @@ module JVM.Builder.Instructions where import Data.Word import qualified Data.ByteString.Lazy as B +import Codec.Binary.UTF8.String (encodeString) +import Data.String import JVM.ClassFile import JVM.Assembler @@ -293,9 +295,9 @@ getStaticField :: Generator e g => B.ByteString -> NameType Field -> g e () getStaticField cls sig = i1 GETSTATIC (CField cls sig) -loadString :: Generator e g => B.ByteString -> g e () +loadString :: Generator e g => String -> g e () loadString str = - i8 LDC1 (CString str) + i8 LDC1 (CString $ fromString $ encodeString $ str) allocArray :: Generator e g => B.ByteString -> g e () allocArray cls = diff --git a/JVM/Builder/Monad.hs b/JVM/Builder/Monad.hs index f283eac..b61b6f3 100644 --- a/JVM/Builder/Monad.hs +++ b/JVM/Builder/Monad.hs @@ -19,13 +19,12 @@ import Prelude hiding (catch) import Control.Monad.State as St import Control.Monad.Exception import Data.Word -import Data.List import Data.Binary import qualified Data.Map as M import qualified Data.Set as S import qualified Data.ByteString.Lazy as B -import JVM.Common () -- import instances only +import JVM.Common import JVM.ClassFile import JVM.Assembler import JVM.Exceptions @@ -102,26 +101,26 @@ withClassPath cp = do -- | Append a constant to pool appendPool :: Constant Direct -> Pool Direct -> (Pool Direct, Word16) appendPool c pool = - let size = fromIntegral (M.size pool) - pool' = M.insert size c pool - in (pool', size) + let ix = if M.null pool then 1 else maximum (M.keys pool) + 1 + pool' = M.insert ix c pool + in (pool', ix) -- | Add a constant to pool addItem :: (Generator e g) => Constant Direct -> g e Word16 addItem c = do pool <- St.gets currentPool case lookupPool c pool of - Just i -> return (i+1) + Just i -> return i Nothing -> do let (pool', i) = appendPool c pool st <- St.get St.put $ st {currentPool = pool'} - return (i+1) + return i -- | Lookup in a pool lookupPool :: Constant Direct -> Pool Direct -> Maybe Word16 lookupPool c pool = - fromIntegral `fmap` findIndex (== c) (M.elems pool) + fromIntegral `fmap` mapFindIndex (== c) pool addNT :: (Generator e g, HasSignature a) => NameType a -> g e Word16 addNT (NameType name sig) = do diff --git a/JVM/ClassFile.hs b/JVM/ClassFile.hs index 02a7ada..6171c1b 100644 --- a/JVM/ClassFile.hs +++ b/JVM/ClassFile.hs @@ -32,7 +32,9 @@ module JVM.ClassFile where import Control.Monad +import Control.Monad.Trans (lift) import Control.Applicative +import qualified Control.Monad.State as St import Data.Binary import Data.Binary.IEEE754 import Data.Binary.Get @@ -173,7 +175,7 @@ data Constant stage = | CString (Link stage B.ByteString) | CInteger Word32 | CFloat Float - | CLong Integer + | CLong Word64 | CDouble Double | CNameType (Link stage B.ByteString) (Link stage B.ByteString) | CUTF8 {getString :: B.ByteString} @@ -256,8 +258,7 @@ instance Binary (Class File) where put magic put minorVersion put majorVersion - put constsPoolSize - forM_ (M.elems constsPool) put + putPool constsPool put accessFlags put thisClass put superClass @@ -272,23 +273,26 @@ instance Binary (Class File) where get = do magic <- get + when (magic /= 0xCAFEBABE) $ + fail $ "Invalid .class file MAGIC value: " ++ show magic minor <- get major <- get - poolsize <- get - pool <- replicateM (fromIntegral poolsize - 1) get - af <- get + when (major > 50) $ + fail $ "Too new .class file format: " ++ show major + poolsize <- getWord16be + pool <- getPool (poolsize - 1) + af <- get this <- get super <- get interfacesCount <- get ifaces <- replicateM (fromIntegral interfacesCount) get - classFieldsCount <- get + classFieldsCount <- getWord16be classFields <- replicateM (fromIntegral classFieldsCount) get classMethodsCount <- get classMethods <- replicateM (fromIntegral classMethodsCount) get asCount <- get as <- replicateM (fromIntegral $ asCount) get - let pool' = M.fromList $ zip [1..] pool - return $ Class magic minor major poolsize pool' af this super + return $ Class magic minor major poolsize pool af this super interfacesCount ifaces classFieldsCount classFields classMethodsCount classMethods asCount (AP as) @@ -455,49 +459,79 @@ whileJust m = do return (x: next) Nothing -> return [] -instance Binary (Constant File) where - put (CClass i) = putWord8 7 >> put i - put (CField i j) = putWord8 9 >> put i >> put j - put (CMethod i j) = putWord8 10 >> put i >> put j - put (CIfaceMethod i j) = putWord8 11 >> put i >> put j - put (CString i) = putWord8 8 >> put i - put (CInteger x) = putWord8 3 >> put x - put (CFloat x) = putWord8 4 >> putFloat32be x - put (CLong x) = putWord8 5 >> put x - put (CDouble x) = putWord8 6 >> putFloat64be x - put (CNameType i j) = putWord8 12 >> put i >> put j - put (CUTF8 bs) = do - putWord8 1 - put (fromIntegral (B.length bs) :: Word16) - putLazyByteString bs - put (CUnicode bs) = do - putWord8 2 - put (fromIntegral (B.length bs) :: Word16) - putLazyByteString bs +long (CLong _) = True +long (CDouble _) = True +long _ = False - get = do - !offset <- bytesRead - tag <- getWord8 - case tag of - 1 -> do - l <- get - bs <- getLazyByteString (fromIntegral (l :: Word16)) - return $ CUTF8 bs - 2 -> do - l <- get - bs <- getLazyByteString (fromIntegral (l :: Word16)) - return $ CUnicode bs - 3 -> CInteger <$> get - 4 -> CFloat <$> getFloat32be - 5 -> CLong <$> get - 6 -> CDouble <$> getFloat64be - 7 -> CClass <$> get - 8 -> CString <$> get - 9 -> CField <$> get <*> get - 10 -> CMethod <$> get <*> get - 11 -> CIfaceMethod <$> get <*> get - 12 -> CNameType <$> get <*> get - _ -> fail $ "Unknown constants pool entry tag: " ++ show tag +putPool :: Pool File -> Put +putPool pool = do + let list = M.elems pool + d = length $ filter long list + putWord16be $ fromIntegral (M.size pool + d + 1) + forM_ list putC + where + putC (CClass i) = putWord8 7 >> put i + putC (CField i j) = putWord8 9 >> put i >> put j + putC (CMethod i j) = putWord8 10 >> put i >> put j + putC (CIfaceMethod i j) = putWord8 11 >> put i >> put j + putC (CString i) = putWord8 8 >> put i + putC (CInteger x) = putWord8 3 >> put x + putC (CFloat x) = putWord8 4 >> putFloat32be x + putC (CLong x) = putWord8 5 >> put x + putC (CDouble x) = putWord8 6 >> putFloat64be x + putC (CNameType i j) = putWord8 12 >> put i >> put j + putC (CUTF8 bs) = do + putWord8 1 + put (fromIntegral (B.length bs) :: Word16) + putLazyByteString bs + putC (CUnicode bs) = do + putWord8 2 + put (fromIntegral (B.length bs) :: Word16) + putLazyByteString bs + +getPool :: Word16 -> Get (Pool File) +getPool n = do + items <- St.evalStateT go 1 + return $ M.fromList items + where + go :: St.StateT Word16 Get [(Word16, Constant File)] + go = do + i <- St.get + if i > n + then return [] + else do + c <- lift getC + let i' = if long c + then i+2 + else i+1 + St.put i' + next <- go + return $ (i,c): next + + getC = do + !offset <- bytesRead + tag <- getWord8 + case tag of + 1 -> do + l <- get + bs <- getLazyByteString (fromIntegral (l :: Word16)) + return $ CUTF8 bs + 2 -> do + l <- get + bs <- getLazyByteString (fromIntegral (l :: Word16)) + return $ CUnicode bs + 3 -> CInteger <$> get + 4 -> CFloat <$> getFloat32be + 5 -> CLong <$> get + 6 -> CDouble <$> getFloat64be + 7 -> CClass <$> get + 8 -> CString <$> get + 9 -> CField <$> get <*> get + 10 -> CMethod <$> get <*> get + 11 -> CIfaceMethod <$> get <*> get + 12 -> CNameType <$> get <*> get + _ -> fail $ "Unknown constants pool entry tag: " ++ show tag +-- _ -> return $ CInteger 0 -- | Class field format data Field stage = Field { @@ -533,9 +567,9 @@ instance Binary (Field File) where get = do af <- get - ni <- get + ni <- getWord16be si <- get - n <- get + n <- getWord16be as <- replicateM (fromIntegral n) get return $ Field af ni si n (AP as) @@ -601,7 +635,7 @@ instance Binary Attribute where get = do offset <- bytesRead - name <- get + name <- getWord16be len <- getWord32be value <- getLazyByteString (fromIntegral len) return $ Attribute name len value diff --git a/JVM/Common.hs b/JVM/Common.hs index 60eb9f2..422cc8b 100644 --- a/JVM/Common.hs +++ b/JVM/Common.hs @@ -5,23 +5,19 @@ module JVM.Common poolSize, (!), showListIx, + mapFindIndex, byteString ) where -import Codec.Binary.UTF8.String (encodeString) import Data.Binary import Data.Binary.Put import qualified Data.ByteString.Lazy as B import qualified Data.Map as M import Data.Default -import Data.Char -import Data.String +import Data.List import JVM.ClassFile -instance IsString B.ByteString where - fromString s = B.pack $ map (fromIntegral . ord) $ encodeString s - instance Default B.ByteString where def = B.empty @@ -37,10 +33,16 @@ poolSize = M.size (!) :: (Ord k) => M.Map k a -> k -> a (!) = (M.!) -showListIx :: (Show a) => [a] -> String -showListIx list = unlines $ zipWith s [1..] list - where s i x = show i ++ ":\t" ++ show x +showListIx :: (Show i, Show a) => [(i,a)] -> String +showListIx list = unlines $ map s list + where s (i, x) = show i ++ ":\t" ++ show x byteString :: (Binary t) => t -> B.ByteString byteString x = runPut (put x) +mapFindIndex :: (Num k) => (v -> Bool) -> M.Map k v -> Maybe k +mapFindIndex check m = + case find (check . snd) (M.assocs m) of + Nothing -> Nothing + Just (k,_) -> Just k + diff --git a/JVM/Converter.hs b/JVM/Converter.hs index 52b3483..6380168 100644 --- a/JVM/Converter.hs +++ b/JVM/Converter.hs @@ -18,6 +18,7 @@ import Data.Bits import Data.Binary import Data.Default () -- import instances only import qualified Data.ByteString.Lazy as B +import qualified Data.ByteString.Lazy.Char8 () import qualified Data.Set as S import qualified Data.Map as M @@ -103,9 +104,9 @@ poolDirect2File pool = result -- | Find index of given string in the list of constants poolIndex :: (Throws NoItemInPool e) => Pool File -> B.ByteString -> EM e Word16 -poolIndex list name = case findIndex test (M.elems list) of +poolIndex list name = case mapFindIndex test list of Nothing -> throw (NoItemInPool name) - Just i -> return $ fromIntegral $ i+1 + Just i -> return $ fromIntegral i where test (CUTF8 s) | s == name = True test (CUnicode s) | s == name = True @@ -113,11 +114,11 @@ poolIndex list name = case findIndex test (M.elems list) of -- | Find index of given string in the list of constants poolClassIndex :: (Throws NoItemInPool e) => Pool File -> B.ByteString -> EM e Word16 -poolClassIndex list name = case findIndex checkString (M.elems list) of +poolClassIndex list name = case mapFindIndex checkString list of Nothing -> throw (NoItemInPool name) - Just i -> case findIndex (checkClass $ fromIntegral $ i+1) (M.elems list) of - Nothing -> throw (NoItemInPool $ i+1) - Just j -> return $ fromIntegral $ j+1 + Just i -> case mapFindIndex (checkClass $ fromIntegral i) list of + Nothing -> throw (NoItemInPool i) + Just j -> return $ fromIntegral j where checkString (CUTF8 s) | s == name = True checkString (CUnicode s) | s == name = True @@ -129,9 +130,9 @@ poolClassIndex list name = case findIndex checkString (M.elems list) of poolNTIndex list x@(NameType n t) = do ni <- poolIndex list n ti <- poolIndex list (byteString t) - case findIndex (check ni ti) (M.elems list) of + case mapFindIndex (check ni ti) list of Nothing -> throw (NoItemInPool x) - Just i -> return $ fromIntegral (i+1) + Just i -> return $ fromIntegral i where check ni ti (CNameType n' t') | (ni == n') && (ti == t') = True @@ -175,10 +176,13 @@ poolFile2Direct ps = pool convertNameType :: (HasSignature a) => Word16 -> NameType a convertNameType i = - let (CNameType n s) = pool ! i - in NameType n (decode s) + case pool ! i of + CNameType n s -> NameType n (decode s) + x -> error $ "Unexpected: " ++ show i - convert (CClass i) = CClass $ getString $ pool ! i + convert (CClass i) = case pool ! i of + CUTF8 name -> CClass name + x -> error $ "Unexpected class name: " ++ show x ++ " at " ++ show i convert (CField i j) = CField (className $ pool ! i) (convertNameType j) convert (CMethod i j) = CMethod (className $ pool ! i) (convertNameType j) convert (CIfaceMethod i j) = CIfaceMethod (className $ pool ! i) (convertNameType j) diff --git a/Java/ClassPath/Common.hs b/Java/ClassPath/Common.hs index 3c61a35..e20286f 100644 --- a/Java/ClassPath/Common.hs +++ b/Java/ClassPath/Common.hs @@ -57,6 +57,7 @@ merge1 [] x = [x] merge1 (x@(File e): es) y@(File e') | e == e' = x: es | otherwise = x: merge1 es y merge1 (d@(Directory _ _):es) f@(File _) = d: merge1 es f +merge1 (f@(File _):es) d@(Directory _ _) = f: merge1 es d merge1 (x@(Directory dir f):es) y@(Directory dir' f') | dir == dir' = Directory dir (merge $ f ++ f'): es | otherwise = x: merge1 es y diff --git a/Java/JAR.hs b/Java/JAR.hs index 1c92f50..93128b2 100644 --- a/Java/JAR.hs +++ b/Java/JAR.hs @@ -31,7 +31,7 @@ readOne jarfile str = do files <- Zip.fileNames [] return $ mapF (NotLoadedJAR jarfile) (buildTree $ filter good files) where - good name = str `isPrefixOf` name + good name = (str `isPrefixOf` name) && (".class" `isSuffixOf` name) -- | Read entries from JAR file, using MANIFEST.MF if it exists. readJAR :: FilePath -> IO [Tree CPEntry] diff --git a/Java/JAR/Archive.hs b/Java/JAR/Archive.hs index 0963a15..a4dd026 100644 --- a/Java/JAR/Archive.hs +++ b/Java/JAR/Archive.hs @@ -3,6 +3,7 @@ module Java.JAR.Archive where import qualified Codec.Archive.LibZip as Zip import Data.Binary +import Data.List import qualified Data.ByteString.Lazy as B import Java.ClassPath.Types @@ -18,8 +19,10 @@ readJAREntry jarfile path = do -- | Read all entires from JAR file readAllJAR :: FilePath -> IO [Tree CPEntry] readAllJAR jarfile = do - files <- Zip.withArchive [] jarfile $ Zip.fileNames [] - return $ mapF (NotLoadedJAR jarfile) (buildTree files) + files <- Zip.withArchive [] jarfile $ Zip.fileNames [] + return $ mapF (NotLoadedJAR jarfile) (buildTree $ filter good files) + where + good file = ".class" `isSuffixOf` file -- | Read one class from JAR file readFromJAR :: FilePath -> FilePath -> IO (Class Direct) diff --git a/TestGen.hs b/TestGen.hs index e0af50b..9d1bcd8 100644 --- a/TestGen.hs +++ b/TestGen.hs @@ -20,7 +20,7 @@ test = do addDirectory "." -- Load method signature: Hello.hello() from Hello.class - helloJava <- getClassMethod "Hello" "hello" + helloJava <- getClassMethod "./Hello" "hello" -- Initializer method. Just calls java.lang.Object. newMethod [ACC_PUBLIC] "" [] ReturnsVoid $ do diff --git a/dump-class.hs b/dump-class.hs index a87c1cb..f62ea1e 100644 --- a/dump-class.hs +++ b/dump-class.hs @@ -15,7 +15,7 @@ main = do case args of [clspath] -> do clsFile <- decodeFile clspath - putStrLn $ showListIx $ M.elems $ constsPool (clsFile :: Class File) + putStrLn $ showListIx $ M.assocs $ constsPool (clsFile :: Class File) cls <- parseClassFile clspath dumpClass cls _ -> error "Synopsis: dump-class File.class" diff --git a/rebuild-class.hs b/rebuild-class.hs index 4afb954..301e158 100644 --- a/rebuild-class.hs +++ b/rebuild-class.hs @@ -17,9 +17,9 @@ main = do cls <- parseClassFile clspath clsfile <- decodeFile clspath :: IO (Class File) dumpClass cls - putStrLn $ "Source pool:\n" ++ showListIx (M.elems $ constsPool clsfile) + putStrLn $ "Source pool:\n" ++ showListIx (M.assocs $ constsPool clsfile) let result = classDirect2File cls - putStrLn $ "Result pool:\n" ++ showListIx (M.elems $ constsPool result) + putStrLn $ "Result pool:\n" ++ showListIx (M.assocs $ constsPool result) B.writeFile outpath (encodeClass cls) _ -> error "Synopsis: rebuild-class File.class Output.class"