1 {-# LANGUAGE ScopedTypeVariables #-}
2 {-# LANGUAGE NoMonomorphismRestriction #-}
3 {-# LANGUAGE OverloadedStrings #-}
4 {-# LANGUAGE TemplateHaskell #-}
6 -- Purpose of this file is just do test some Intermediate representations and stuff ;-)
8 {- Some important material:
10 - Java HotSpotâ„¢ Client Compiler: www.cdl.uni-saarland.de/ssasem/talks/Christian.Wimmer.pdf
11 - http://www.complang.tuwien.ac.at/andi/185A50
13 - [Poletto:1999] http://dl.acm.org/citation.cfm?doid=330249.330250
14 - [Wimmer:2010] http://dl.acm.org/citation.cfm?id=1772954.1772979
19 module ScratchHS where
22 import qualified Data.Set as Set
24 import qualified Data.Map as M
26 import Control.Monad.State
28 import qualified Data.Heap as H
29 --import qualified Data.Heap as M
31 import Harpy hiding(fst,add)
32 import qualified Harpy.X86Disassembler as H
44 import qualified JVM.Assembler as J
48 import Mate.BasicBlocks
50 import qualified Data.ByteString.Lazy as B
53 $(callDecl "callAsWord32" [t|Word32|])
55 data SimpleStack = PushLit Int
61 testP = [PushLit 3, PushLit 2, Mul]
64 data ROp = RMul | RAdd
66 data RegIL = RMov Reg Reg
68 | RBin Reg Reg Reg ROp
70 data MateState = MateState String
72 compileRegIL :: RegIL -> CodeGen (Ptr Int32) MateState ()
73 compileRegIL (RMov t s) = do
75 let (mt,ms) = (eax,eax)
79 entryCode :: CodeGen e s ()
80 entryCode = do push ebp
83 exitCode :: CodeGen e s ()
84 exitCode = do mov esp ebp
90 run :: [RegIL] -> Ptr Int32 -> IO (MateState, Either ErrMsg [H.Instruction])
91 run program env = let compileAndFeedback = mapM_ compileRegIL program >> disassemble
92 in runCodeGen compileAndFeedback env (MateState "none")
95 -- Allocates a buffer with size n. All zero.
96 emptyMemory :: (Storable a, Num a) => Int -> IO (Ptr a)
97 emptyMemory n = mallocArray n
98 >>= (\ptr -> pokeArray ptr (replicate n 0) >> return ptr)
102 ptr <- emptyMemory 26
103 (_, Right code) <- run p' ptr
104 return $ map H.showIntel code
107 simpleTest :: [RegIL]
108 simpleTest = [RMov 0 1]
111 -- Just some class file sand
112 loadMethod methodName classFile = do cls <- parseClassFile classFile
114 return (cls, lookupMethod methodName cls)
117 getFib = do (cls, Just m) <- loadMethod "fac" "../tests/Fac.class"
120 fibBasicBlocks = do (cls,m) <- getFib
121 hmap <- parseMethod cls "facFor"
126 {- Thoughs on types and representations
127 - We start from constructing a CFG. What do we need here
128 - ** Fast traversal which is aware of cycles
129 - ** Fast successor, do we need predecessors?
130 - ** Find all paths to current node (including back references)
131 - ** Generic Node type in order to write mentioned operations
132 - generically. There should be no intermediate language "lock in"
133 - i.e. adding another IR should not kill CFG code
134 - Furthermore operations like SSA construction should
135 - not affect the CFG datastructure. Nodes contents should be
136 - interchangable in a way.
137 - ** Some form of unique naming - we would like to identify blocks
138 - and check whether code should be produced for this node
139 - ** Should be Haskell idiomatic - should be composed with
140 - standard haskell infrastructure
141 - ** Convinient printing
143 - From this a inductive type should be appropriate?
147 data G a = Node a (G a) (G a)
152 testG = [(0,[1,2]),(1,[3]),(2,[0]),(3,[])]
153 nodeLoads :: [(Int,String)]
154 nodeLoads = [(0,"a"),(1,"b"),(2,"c"),(3,"d")]
156 toG :: (Ord k, Show k) => [(k,[k])] -> k -> [(k, a)] -> Maybe (G a)
157 toG xs start pls = let nodeTable = M.fromList pls
158 payload s = fromJust $ M.lookup s nodeTable
160 toBin xs pl = case xs of
161 (x:y:[]) -> Node pl x y
162 (x:[] ) -> Node pl x Nil
165 conn = M.fromList $ map (
166 \(f,ts) -> let pl t = node f (payload t) t
168 in (f, toBin succ (payload f))
171 node f p t = case M.lookup t conn of
173 in M.lookup start conn
175 type LoopCheck a = Set a
178 {- Actually i am not sure about the cata and algebra definition.
180 - check: http://dl.acm.org/citation.cfm?id=128035
183 type TreeAlgebra a r = (a -> r, r -> r -> r)
185 foldG :: TreeAlgebra a r -> r -> G a -> r
186 foldG (f,g) s (Leaf val) = g (f val) s
187 foldG (f,g) s (Node val l r) = g (f val) $ g (foldG (f,g) s l) (foldG (f,g) s r)
189 printG = foldG ((: []), (++)) []
191 loopCheck :: (Ord k) => G a -> (G a -> k) -> State (LoopCheck k) Bool
192 loopCheck g f = do state <- get
193 return $ Set.member (f g) state
195 addNode :: (Ord k) => k -> State (LoopCheck k) ()
196 addNode k = do s <- get
200 foldGM :: (Ord k) => TreeAlgebra a r -> (G a -> k) -> r -> G a -> State (LoopCheck k) r
201 foldGM _ _ s Nil = return s
202 foldGM (f,g) c s k@(Leaf val) = loopCheck k c >>= \p -> if p then return s else return $ g (f val) s
203 foldGM (f,g) c s k@(Node val l r) = loopCheck k c >>= \p -> if p then return s else continue
204 where t = foldGM (f,g) c s
206 continue = do addNode $ c k
209 return $ self $ g left right
211 methodCodeM :: B.ByteString-> Maybe (Class Resolved)-> Maybe (Class Resolved, B.ByteString)
212 methodCodeM name mcls = do cls <- mcls
213 ins <- methodCode cls name
216 getMethod :: FilePath-> B.ByteString-> IO (Maybe (B.ByteString, Class Resolved))
217 getMethod file name = do cls <- parseClassFile file
218 return (methodCode cls name >>= \ins -> return (ins,cls))
220 fullDecode :: (B.ByteString, t) -> ([J.Instruction], t)
221 fullDecode (ins, cls) = (J.codeInstructions $ J.decodeMethod ins, cls)
223 getMethodIO :: FilePath-> B.ByteString-> IO (Maybe ([J.Instruction], Class Resolved))
224 getMethodIO file name = do context <- getMethod file name
225 return $ liftM fullDecode context
233 -- Means NumberedINSruction
235 type NInst = (Source, Size, J.Instruction)
236 -- Putting Source and Target in data types prevents from interchanging ;-)
237 data Source = Source Int deriving (Eq, Ord, Show)
238 data Target = Target Int deriving (Eq, Ord, Show)
239 -- Source, Target Instruction id
240 type SourceTarget = (Source,Target)
241 -- State type for findContinuations
242 type ContState = (H.MinPrioHeap Target Source, H.MinPrioHeap Source Target)
244 findContinuations :: [NInst] -> State ContState ()
245 findContinuations = mapM_ addCont
247 addCont :: NInst -> State ContState ()
248 addCont (s, b, IF_ICMP _ w) = do add s (addTarget s w); jmpNext s b;
249 addCont (s, _, GOTO w) = add s (addTarget s w);
250 addCont _ = return ();
252 jmpNext :: Source -> Int -> State ContState ()
253 jmpNext s@(Source i) o = add s (Target $ i + o)
256 addTarget :: Source -> Word16 -> Target
257 addTarget (Source s) w16 = Target $ fromIntegral result
258 where result = s16 + fromIntegral w16 :: Int16
259 s16 = fromIntegral s :: Int16
262 addW16Signed :: Int -> Word16 -> Int
263 addW16Signed i w16 = i + fromIntegral s16
264 where s16 = fromIntegral w16 :: Int16
267 add :: Source -> Target -> State ContState ()
268 add s t = do (targetSource,sourceTarget) <- get
269 put (H.insert (t, s) targetSource, H.insert (s,t) sourceTarget)
271 getInstOffsets :: [J.Instruction] -> [Int]
272 getInstOffsets = map (\i -> fromIntegral $ B.length $ J.encodeInstructions [i])
274 sumSeries'':: [Int] -> [Int]
275 sumSeries'' = reverse . snd . foldl (\(s,xs) x -> (s+x,s : xs)) (0,[])
279 data Block = BB [NInst] deriving (Show)
282 -- Context of the Method to be compiled
283 type Context = ([J.Instruction],Class Resolved)
284 -- Maybe a context of the Method to be compiled
285 type MContext = Maybe Context
286 -- The state for CFG creation
287 type CFGState = (ContState, M.Map Int CFG)
290 getCFG :: FilePath -> B.ByteString -> IO (Maybe CFG)
291 getCFG file name = do context <- getMethodIO file name
292 return $ buildCFGContext context
294 buildCFGContext :: MContext -> Maybe CFG
295 buildCFGContext = liftM genCFGPure
297 genCFGPure :: Context -> CFG
298 genCFGPure (ins,cls) = let offsets = getInstOffsets ins
299 taggedInst = zip3 (map Source $ sumSeries'' offsets) offsets ins
300 cont = execState (findContinuations taggedInst) (H.empty,H.empty)
301 in evalState (genCFGState taggedInst) (cont, M.empty)
304 joinM :: Maybe a -> Maybe b -> Maybe (a,b)
305 joinM Nothing _ = Nothing
306 joinM _ Nothing = Nothing
307 joinM (Just a) (Just b) = Just (a,b)
309 joinM' = uncurry joinM
311 leq :: Target -> Source -> Bool
312 leq (Target t) (Source s) = t <= s
314 getCont :: State CFGState ContState
315 getCont = do ((ts,st),_) <- get
319 takeTill :: Int -> [NInst] -> ([NInst],[NInst])
320 takeTill ref = span (\(Source loc,s,inst) -> loc < ref)
323 writeBack :: ContState -> State CFGState ()
324 writeBack s = do (_,cls) <- get
327 from :: Int -> [NInst] -> [NInst]
328 from ref = dropWhile (\(Source loc,s,inst) -> loc < ref)
331 data Branch = BackRef Target Source
332 | ForwardRef Source [Target] deriving (Show)
336 getForwardRef ts s' t' restForward restBack =
337 do writeBack (ts, restForward) -- remove forward ref from state
338 -- maybe this one is a twoRef
339 anotherOne <- getNextBranch
341 Just (ForwardRef s'' t'')
342 -- If the next reference has the same source concat them
343 -> if s'' == s' then return $ Just $ ForwardRef s' (t' : t'')
344 else return $ Just $ ForwardRef s' [t']
345 Nothing -> --fail ("bahh" ++ show s' ++ show t')
346 return $ Just $ ForwardRef s' [t']
348 getNextBranch :: State CFGState (Maybe Branch)
349 getNextBranch = do ((ts,st),m) <- get
350 case joinM' (H.view ts, H.view st) of
351 -- Here we found the next front and the next back reference.
352 -- which one is earlier
353 Just ( ( (t,s), restBack), ( (s',t'), restForward) )
355 then -- back reference
356 do writeBack (restBack, st) -- remove back reference from ref state
357 return $ Just $ BackRef t s
358 else -- forward reference
359 --getForwardRef ts s' t' restForward restBack
360 do writeBack (ts,restForward)
361 return $ Just $ ForwardRef s' [t']
362 Nothing -> return Nothing
365 genCFGState :: [NInst] -> State CFGState CFG
366 genCFGState xs = do nextBranch <- getNextBranch
368 Nothing -> return $ Leaf $ BB xs
369 Just (BackRef (Target t) (Source s))
371 let (previousBlock,rest) = takeTill t xs
373 let patched = let p = M.insert s c m
374 c = evalState (genCFGState rest) (refs,p)
377 Just (ForwardRef _ _)
382 Nothing -> return $ Leaf $ BB xs
383 Just b -> return (Node (BB xs) b Nil)
384 _ -> fail (show nextBranch)
385 -- return $ Leaf $ BB xs
387 genCFGState :: [NInst] -> State CFGState (CFG, M.Map Source CFG)
388 genCFGState xs = do (ts,st) <- getCont
389 case joinM' (H.view ts, H.view st) of
390 Just ( ( (t, s), backRef), ( (s', t'), sndBranch) )
391 -> if t `leq` s then do writeBack (backRef,st);
392 runUntilBackRef (t , s )
393 else runUntilBranch (s', t') sndBranch ts
397 -- There is no back reference left. Run block until Control leaves.
398 runUntilExit = fail "run until exit" --Nothing = return takeWhile (\(s,w,inst) -> undefined) undefined
400 -- In the program future a goto lands at this instruction.
401 runUntilBackRef (Target ref,_) =
403 let (block,rest) = takeTill ref xs
404 (followUpBlock,_) <- genCFGState rest
405 let cfg = Node (BB block) followUpBlock Nil
406 return $ (cfg, M.empty)
408 runUntilBranch (Source s,Target t) st ts =
410 let (block,rest) = takeTill (s+1) xs
411 -- check wheter this instruction branches also to another location
413 Just ((Source sndBr, Target t),restBranches)
414 -> if sndBr == s then do writeBack (ts,restBranches)
415 twoBranch sndBr t block
417 Nothing -> oneBranch block
418 where twoBranch sndBranch t' block =
419 do (left, _) <- genCFGState $ from t xs
420 (right,_) <- genCFGState $ from t' xs
421 return $ (Node (BB block) left right, M.empty)
422 oneBranch block = return (Leaf (BB block), M.empty) --fail $ "oneBranch" ++ (show s) ++ " " ++ (show st)
425 fib = do con@(Just (ins,cls)) <- getMethodIO "../tests/Fac.class" "facFor"
426 let offsets = getInstOffsets ins
427 let taggedInst = zip3 (map Source $ sumSeries'' offsets) offsets ins
428 mapM_ print taggedInst
429 let continuations = execState (findContinuations taggedInst) (H.empty, H.empty)
431 let cfg = buildCFGContext con
438 diamant = let start = Node "a" left right
439 left = Node "l" end Nil
440 right = Node "r" end Nil
441 end = Node "g" start Nil
444 dag = Node "a" (Node "b" (Leaf "c") (Leaf "d")) (Node "b" (Leaf "c") (Leaf "d"))
447 value (Node val _ _) = Just val
448 value (Leaf val ) = Just val
452 printG' :: Ord k => G k -> [k]
453 printG' g = evalState (foldGM ((: []), (++)) (\node -> let (Just v) = value node in v) [] g) Set.empty