From 2cb914fa072ac52bd375324a7ef9b8fc073cb944 Mon Sep 17 00:00:00 2001 From: Bernhard Urban Date: Mon, 16 Apr 2012 22:01:41 +0200 Subject: [PATCH] methodpool: save information in codegen context use that for on-demand compiling --- Mate.hs | 2 +- Mate/BasicBlocks.hs | 14 +-------- Mate/MethodPool.hs | 65 +++++++++++++++++++++++---------------- Mate/Utilities.hs | 19 ++++++++++++ Mate/X86CodeGen.hs | 75 +++++++++++++++++++++++++++++++-------------- ffi/trap.c | 21 +++++++++++-- 6 files changed, 129 insertions(+), 67 deletions(-) diff --git a/Mate.hs b/Mate.hs index d68c8da..b787c26 100644 --- a/Mate.hs +++ b/Mate.hs @@ -25,7 +25,7 @@ main = do printMapBB hmap case hmap of Just hmap' -> do - entry <- compileBB hmap' "main" + entry <- compileBB hmap' cls "main" printf "executing `main' now:\n" executeFuncPtr entry Nothing -> error "main not found" diff --git a/Mate/BasicBlocks.hs b/Mate/BasicBlocks.hs index 332bd56..de1d11b 100644 --- a/Mate/BasicBlocks.hs +++ b/Mate/BasicBlocks.hs @@ -81,21 +81,9 @@ test_03 = testInstance "./tests/While.class" "g" parseMethod :: Class Resolved -> B.ByteString -> IO (Maybe MapBB) parseMethod cls method = do - -- TODO(bernhard): remove me! just playing around with - -- hs-java interface. - -- we get that index at the INVOKESTATIC insn - putStrLn "via constpool @2:" - let cp = constsPool cls - let (CMethod rc nt) = cp M.! (2 :: Word16) - -- rc :: Link stage B.ByteString - -- nt :: Link stage (NameType Method) - B.putStrLn $ "rc: " `B.append` rc - B.putStrLn $ "nt: " `B.append` (encode $ ntSignature nt) - - putStrLn "via methods:" + putStr "BB: analysing: " let msig = methodSignature $ (classMethods cls) !! 1 B.putStrLn (method `B.append` ": " `B.append` (encode msig)) - return $ testCFG $ lookupMethod method cls diff --git a/Mate/MethodPool.hs b/Mate/MethodPool.hs index 213e44c..0d089ca 100644 --- a/Mate/MethodPool.hs +++ b/Mate/MethodPool.hs @@ -3,7 +3,6 @@ module Mate.MethodPool where import Data.Binary -import Data.String import qualified Data.Map as M import qualified Data.ByteString.Lazy as B @@ -11,10 +10,9 @@ import Text.Printf import Foreign.Ptr import Foreign.C.Types -import Foreign.C.String import Foreign.StablePtr -import JVM.Converter +import JVM.ClassFile import Harpy import Harpy.X86Disassembler @@ -34,30 +32,25 @@ foreign import ccall "set_mmap" -- Word32 = entrypoint of method type MMap = M.Map B.ByteString Word32 --- TODO(bernhard): not in use yet --- Word32 = point of method call --- B.ByteString = name of called method -type CMap = M.Map Word32 B.ByteString - -foreign export ccall getMethodEntry :: Ptr () -> CString -> IO CUInt -getMethodEntry :: Ptr () -> CString -> IO CUInt -getMethodEntry ptr_mmap cstr = do +foreign export ccall getMethodEntry :: CUInt -> Ptr () -> Ptr () -> IO CUInt +getMethodEntry :: CUInt -> Ptr () -> Ptr () -> IO CUInt +getMethodEntry signal_from ptr_mmap ptr_cmap = do mmap <- ptr2mmap ptr_mmap - str' <- peekCString cstr - let method = fromString str' + cmap <- ptr2cmap ptr_cmap + + let w32_from = fromIntegral signal_from + let (method, cls, cpidx) = cmap M.! w32_from case M.lookup method mmap of Nothing -> do - printf "getMethodEntry: no method found. compile it\n" - -- TODO(bernhard): hardcoded... fixme! - cls <- parseClassFile "tests/Fib.class" - hmap <- parseMethod cls method + printf "getMethodEntry(from 0x%08x): no method found. compile it\n" w32_from + -- TODO(bernhard): maybe we have to load the class first? + -- (Or better in X86CodeGen?) + let (CMethod _ nt) = (constsPool cls) M.! cpidx + hmap <- parseMethod cls (ntName nt) case hmap of Just hmap' -> do - entry <- compileBB hmap' method - let w32_entry = ((fromIntegral $ ptrToIntPtr entry) :: Word32) - let mmap' = M.insert method w32_entry mmap - mmap2ptr mmap' >>= set_mmap - return $ fromIntegral w32_entry + entry <- compileBB hmap' cls method + return $ fromIntegral $ ((fromIntegral $ ptrToIntPtr entry) :: Word32) Nothing -> error $ (show method) ++ " not found. abort" Just w32 -> return (fromIntegral w32) @@ -71,26 +64,36 @@ getMethodEntry ptr_mmap cstr = do -- demo_mmap -- access Data.Map from C initMethodPool :: IO () -initMethodPool = mmap2ptr M.empty >>= set_mmap +initMethodPool = do + mmap2ptr M.empty >>= set_mmap + cmap2ptr M.empty >>= set_cmap -compileBB :: MapBB -> B.ByteString -> IO (Ptr Word8) -compileBB hmap name = do +compileBB :: MapBB -> Class Resolved -> B.ByteString -> IO (Ptr Word8) +compileBB hmap cls name = do mmap <- get_mmap >>= ptr2mmap - let ebb = emitFromBB hmap - (_, Right ((entry, bbstarts, end), disasm)) <- runCodeGen ebb () () + cmap <- get_cmap >>= ptr2cmap + + let ebb = emitFromBB cls hmap + (_, Right ((entry, _, _, new_cmap), disasm)) <- runCodeGen ebb () () let w32_entry = ((fromIntegral $ ptrToIntPtr entry) :: Word32) + let mmap' = M.insert name w32_entry mmap + let cmap' = M.union cmap new_cmap -- prefers elements in cmap mmap2ptr mmap' >>= set_mmap + cmap2ptr cmap' >>= set_cmap + printf "disasm:\n" mapM_ (putStrLn . showAtt) disasm return entry + foreign import ccall "dynamic" code_void :: FunPtr (IO ()) -> (IO ()) executeFuncPtr :: Ptr Word8 -> IO () executeFuncPtr entry = code_void $ ((castPtrToFunPtr entry) :: FunPtr (IO ())) +-- TODO(bernhard): make some typeclass magic 'n stuff mmap2ptr :: MMap -> IO (Ptr ()) mmap2ptr mmap = do ptr_mmap <- newStablePtr mmap @@ -98,3 +101,11 @@ mmap2ptr mmap = do ptr2mmap :: Ptr () -> IO MMap ptr2mmap vmap = deRefStablePtr $ ((castPtrToStablePtr vmap) :: StablePtr MMap) + +cmap2ptr :: CMap -> IO (Ptr ()) +cmap2ptr cmap = do + ptr_cmap <- newStablePtr cmap + return $ castStablePtrToPtr ptr_cmap + +ptr2cmap :: Ptr () -> IO CMap +ptr2cmap vmap = deRefStablePtr $ ((castPtrToStablePtr vmap) :: StablePtr cmap) diff --git a/Mate/Utilities.hs b/Mate/Utilities.hs index 2764921..ed92530 100644 --- a/Mate/Utilities.hs +++ b/Mate/Utilities.hs @@ -1,6 +1,13 @@ +{-# LANGUAGE OverloadedStrings #-} module Mate.Utilities where +import Data.Char +import Data.Word +import Data.Binary +import qualified Data.Map as M import qualified Data.ByteString.Lazy as B +-- import qualified Data.ByteString.Lazy.Char8 as B8 +import Codec.Binary.UTF8.String hiding (encode,decode) import JVM.ClassFile @@ -13,3 +20,15 @@ lookupMethod name cls = look (classMethods cls) look (f:fs) | methodName f == name = Just f | otherwise = look fs + +toString :: B.ByteString -> String +toString bstr = decodeString $ map (chr . fromIntegral) $ B.unpack bstr + +buildMethodID :: Class Resolved -> Word16 -> B.ByteString +buildMethodID cls idx = (rc `B.append` dot) `B.append` (ntName nt) `B.append` nt' + where + (CMethod rc nt) = (constsPool cls) M.! idx + nt' = encode $ ntSignature nt + dot :: B.ByteString + -- TODO(bernhard): WTF? why -XOverloadedStrings doesn't apply here? + dot = B.pack $ map (fromIntegral . ord) "." diff --git a/Mate/X86CodeGen.hs b/Mate/X86CodeGen.hs index 608f9bc..cf493f3 100644 --- a/Mate/X86CodeGen.hs +++ b/Mate/X86CodeGen.hs @@ -4,6 +4,7 @@ module Mate.X86CodeGen where import Data.Binary import Data.Int +import Data.Maybe import qualified Data.Map as M import qualified Data.ByteString.Lazy as B @@ -14,12 +15,14 @@ import Text.Printf import qualified JVM.Assembler as J import JVM.Assembler hiding (Instruction) +import JVM.ClassFile import JVM.Converter import Harpy import Harpy.X86Disassembler import Mate.BasicBlocks +import Mate.Utilities foreign import ccall "dynamic" code_int :: FunPtr (CInt -> CInt -> IO CInt) -> (CInt -> CInt -> IO CInt) @@ -33,6 +36,12 @@ foreign import ccall "callertrap" foreign import ccall "register_signal" register_signal :: IO () +foreign import ccall "get_cmap" + get_cmap :: IO (Ptr ()) + +foreign import ccall "set_cmap" + set_cmap :: Ptr () -> IO () + test_01, test_02, test_03 :: IO () test_01 = do register_signal @@ -91,8 +100,8 @@ testCase cf method = do case hmap of Nothing -> error "sorry, no code generation" Just hmap' -> do - let ebb = emitFromBB hmap' - (_, Right ((entry, bbstarts, end), disasm)) <- runCodeGen ebb () () + let ebb = emitFromBB cls hmap' + (_, Right ((entry, bbstarts, end, _), disasm)) <- runCodeGen ebb () () let int_entry = ((fromIntegral $ ptrToIntPtr entry) :: Int) printf "disasm:\n" mapM_ (putStrLn . showAtt) disasm @@ -107,45 +116,73 @@ type PatchInfo = (BlockID, EntryPointOffset) type BBStarts = M.Map BlockID Int -type CompileInfo = (EntryPoint, BBStarts, Int) +type CompileInfo = (EntryPoint, BBStarts, Int, CMap) + +-- B.ByteString: encoded name: . +-- Class Resolved: classfile +-- Word16: index of invoke-instruction +type MethodInfo = (B.ByteString, Class Resolved, Word16) -emitFromBB :: MapBB -> CodeGen e s (CompileInfo, [Instruction]) -emitFromBB hmap = do +-- Word32 = point of method call in generated code +-- MethodInfo = relevant information about callee +type CMap = M.Map Word32 MethodInfo + + +emitFromBB :: Class Resolved -> MapBB -> CodeGen e s (CompileInfo, [Instruction]) +emitFromBB cls hmap = do llmap <- sequence [newNamedLabel ("bb_" ++ show x) | (x,_) <- M.toList hmap] let lmap = zip (Prelude.fst $ unzip $ M.toList hmap) llmap ep <- getEntryPoint push ebp mov ebp esp - bbstarts <- efBB (0,(hmap M.! 0)) M.empty lmap + (calls, bbstarts) <- efBB (0,(hmap M.! 0)) M.empty M.empty lmap d <- disassemble end <- getCodeOffset - return ((ep, bbstarts, end), d) + return ((ep, bbstarts, end, calls), d) where getLabel :: BlockID -> [(BlockID, Label)] -> Label getLabel _ [] = error "label not found!" getLabel i ((x,l):xs) = if i==x then l else getLabel i xs - efBB :: (BlockID, BasicBlock) -> BBStarts -> [(BlockID, Label)] -> CodeGen e s (BBStarts) - efBB (bid, bb) bbstarts lmap = + efBB :: (BlockID, BasicBlock) -> CMap -> BBStarts -> [(BlockID, Label)] -> CodeGen e s (CMap, BBStarts) + efBB (bid, bb) calls bbstarts lmap = if M.member bid bbstarts then - return bbstarts + return (calls, bbstarts) else do bb_offset <- getCodeOffset let bbstarts' = M.insert bid bb_offset bbstarts defineLabel $ getLabel bid lmap - mapM_ emit $ code bb + cs <- mapM emit' $ code bb + let calls' = calls `M.union` (M.fromList $ catMaybes cs) case successor bb of - Return -> return bbstarts' + Return -> return (calls', bbstarts') OneTarget t -> do - efBB (t, hmap M.! t) bbstarts' lmap + efBB (t, hmap M.! t) calls' bbstarts' lmap TwoTarget t1 t2 -> do - bbstarts'' <- efBB (t1, hmap M.! t1) bbstarts' lmap - efBB (t2, hmap M.! t2) bbstarts'' lmap + (calls'', bbstarts'') <- efBB (t1, hmap M.! t1) calls' bbstarts' lmap + efBB (t2, hmap M.! t2) calls'' bbstarts'' lmap -- TODO(bernhard): also use metainformation -- TODO(bernhard): implement `emit' as function which accepts a list of -- instructions, so we can use patterns for optimizations where + emit' :: J.Instruction -> CodeGen e s (Maybe (Word32, MethodInfo)) + emit' (INVOKESTATIC cpidx) = do + ep <- getEntryPoint + let w32_ep = (fromIntegral $ ptrToIntPtr ep) :: Word32 + let l = buildMethodID cls cpidx + calladdr <- getCodeOffset + let w32_calladdr = w32_ep + (fromIntegral calladdr) :: Word32 + newNamedLabel (toString l) >>= defineLabel + -- TODO(bernhard): better try SIGILL instead of SIGSEGV? + mov (Addr 0) eax + -- discard arguments (TODO(bernhard): don't hardcode it) + add esp (4 :: Word32) + -- push result on stack (TODO(bernhard): if any) + push eax + return $ Just $ (w32_calladdr, (l, cls, cpidx)) + emit' insn = emit insn >> return Nothing + emit :: J.Instruction -> CodeGen e s () emit POP = do -- print dropped value ep <- getEntryPoint @@ -193,14 +230,6 @@ emitFromBB hmap = do emit (GOTO _ ) = do let sid = case successor bb of OneTarget t -> t; _ -> error "bad" jmp $ getLabel sid lmap - emit (INVOKESTATIC _) = do - -- TODO(bernhard): get and save information about this call - -- TODO(bernhard): better try SIGILL instead of SIGSEGV? - mov (Addr 0) eax - -- discard arguments (TODO(bernhard): don't hardcode it) - add esp (4 :: Word32) - -- push result on stack (TODO(bernhard): if any) - push eax emit RETURN = do mov esp ebp; pop ebp; ret emit IRETURN = do diff --git a/ffi/trap.c b/ffi/trap.c index 897fa7f..477897c 100644 --- a/ffi/trap.c +++ b/ffi/trap.c @@ -3,8 +3,10 @@ #include #include -unsigned int getMethodEntry(void *, char *); +unsigned int getMethodEntry(unsigned int, void *, void *); + void *method_map = NULL; +void *caller_map = NULL; void set_mmap(void *mmap) { @@ -18,6 +20,18 @@ void *get_mmap() return method_map; } +void set_cmap(void *cmap) +{ + printf("set_cmap: 0x%08x\n", (unsigned int) cmap); + caller_map = cmap; +} + +void *get_cmap() +{ + printf("get_cmap: 0x%08x\n", (unsigned int) caller_map); + return caller_map; +} + void mainresult(unsigned int a) { @@ -27,9 +41,10 @@ void mainresult(unsigned int a) void callertrap(int nSignal, siginfo_t *info, void *ctx) { struct ucontext *uctx = (struct ucontext *) ctx; - unsigned int patchme = getMethodEntry(method_map, "fib"); + unsigned int from = (unsigned int) uctx->uc_mcontext.eip; + unsigned int patchme = getMethodEntry(from, method_map, caller_map); - printf("callertrap(mctx) by 0x%08x\n", (unsigned int) uctx->uc_mcontext.eip); + printf("callertrap(mctx) by 0x%08x\n", from); // printf("callertrap(addr) by 0x%08x\n", info->si_addr); // printf("callertrap(*esp) by 0x%08x\n", * (unsigned int *) uctx->uc_mcontext.esp); -- 2.25.1