1 {-# LANGUAGE ScopedTypeVariables #-}
2 {-# LANGUAGE NoMonomorphismRestriction #-}
3 {-# LANGUAGE OverloadedStrings #-}
4 {-# LANGUAGE TemplateHaskell #-}
6 -- Purpose of this file is just do test some Intermediate representations and stuff ;-)
8 {- Some important material:
10 - Java HotSpotâ„¢ Client Compiler: www.cdl.uni-saarland.de/ssasem/talks/Christian.Wimmer.pdf
11 - http://www.complang.tuwien.ac.at/andi/185A50
13 - [Poletto:1999] http://dl.acm.org/citation.cfm?doid=330249.330250
14 - [Wimmer:2010] http://dl.acm.org/citation.cfm?id=1772954.1772979
19 module ScratchHS where
22 import qualified Data.Set as Set
24 import qualified Data.Map as M
26 import Control.Monad.State
28 import qualified Data.Heap as H
29 --import qualified Data.Heap as M
31 import Harpy hiding(fst,add)
32 import qualified Harpy.X86Disassembler as H
44 import qualified JVM.Assembler as J
48 import Mate.BasicBlocks
50 import qualified Data.ByteString.Lazy as B
53 $(callDecl "callAsWord32" [t|Word32|])
55 data SimpleStack = PushLit Int
61 testP = [PushLit 3, PushLit 2, Mul]
64 data ROp = RMul | RAdd
66 data RegIL = RMov Reg Reg
68 | RBin Reg Reg Reg ROp
70 data MateState = MateState String
72 compileRegIL :: RegIL -> CodeGen (Ptr Int32) MateState ()
73 compileRegIL (RMov t s) = do
75 let (mt,ms) = (eax,eax)
79 entryCode :: CodeGen e s ()
80 entryCode = do push ebp
83 exitCode :: CodeGen e s ()
84 exitCode = do mov esp ebp
90 run :: [RegIL] -> Ptr Int32 -> IO (MateState, Either ErrMsg [H.Instruction])
91 run program env = let compileAndFeedback = mapM_ compileRegIL program >> disassemble
92 in runCodeGen compileAndFeedback env (MateState "none")
95 -- Allocates a buffer with size n. All zero.
96 emptyMemory :: (Storable a, Num a) => Int -> IO (Ptr a)
97 emptyMemory n = mallocArray n
98 >>= (\ptr -> pokeArray ptr (replicate n 0) >> return ptr)
102 ptr <- emptyMemory 26
103 (_, Right code) <- run p' ptr
104 return $ map H.showIntel code
107 simpleTest :: [RegIL]
108 simpleTest = [RMov 0 1]
111 -- Just some class file sand
112 loadMethod methodName classFile = do cls <- parseClassFile classFile
114 return (cls, lookupMethod methodName cls)
117 getFib = do (cls, Just m) <- loadMethod "fac" "../tests/Fac.class"
120 fibBasicBlocks = do (cls,m) <- getFib
121 hmap <- parseMethod cls "facFor"
126 {- Thoughs on types and representations
127 - We start from constructing a CFG. What do we need here
128 - ** Fast traversal which is aware of cycles
129 - ** Fast successor, do we need predecessors?
130 - ** Find all paths to current node (including back references)
131 - ** Generic Node type in order to write mentioned operations
132 - generically. There should be no intermediate language "lock in"
133 - i.e. adding another IR should not kill CFG code
134 - Furthermore operations like SSA construction should
135 - not affect the CFG datastructure. Nodes contents should be
136 - interchangable in a way.
137 - ** Some form of unique naming - we would like to identify blocks
138 - and check whether code should be produced for this node
139 - ** Should be Haskell idiomatic - should be composed with
140 - standard haskell infrastructure
141 - ** Convinient printing
143 - From this a inductive type should be appropriate?
147 data G a = Node a (G a) (G a)
151 type LoopCheck a = Set a
154 {- Actually i am not sure about the cata and algebra definition.
156 - check: http://dl.acm.org/citation.cfm?id=128035
159 type TreeAlgebra a r = (a -> r, r -> r -> r)
161 foldG :: TreeAlgebra a r -> r -> G a -> r
162 foldG (f,g) s (Leaf val) = g (f val) s
163 foldG (f,g) s (Node val l r) = g (f val) $ g (foldG (f,g) s l) (foldG (f,g) s r)
165 printG = foldG ((: []), (++)) []
167 loopCheck :: (Ord k) => G a -> (G a -> k) -> State (LoopCheck k) Bool
168 loopCheck g f = do state <- get
169 return $ Set.member (f g) state
171 addNode :: (Ord k) => k -> State (LoopCheck k) ()
172 addNode k = do s <- get
176 foldGM :: (Ord k) => TreeAlgebra a r -> (G a -> k) -> r -> G a -> State (LoopCheck k) r
177 foldGM _ _ s Nil = return s
178 foldGM (f,g) c s k@(Leaf val) = loopCheck k c >>= \p -> if p then return s else return $ g (f val) s
179 foldGM (f,g) c s k@(Node val l r) = loopCheck k c >>= \p -> if p then return s else continue
180 where t = foldGM (f,g) c s
182 continue = do addNode $ c k
185 return $ self $ g left right
187 methodCodeM :: B.ByteString-> Maybe (Class Resolved)-> Maybe (Class Resolved, B.ByteString)
188 methodCodeM name mcls = do cls <- mcls
189 ins <- methodCode cls name
192 getMethod :: FilePath-> B.ByteString-> IO (Maybe (B.ByteString, Class Resolved))
193 getMethod file name = do cls <- parseClassFile file
194 return (methodCode cls name >>= \ins -> return (ins,cls))
196 fullDecode :: (B.ByteString, t) -> ([J.Instruction], t)
197 fullDecode (ins, cls) = (J.codeInstructions $ J.decodeMethod ins, cls)
199 getMethodIO :: FilePath-> B.ByteString-> IO (Maybe ([J.Instruction], Class Resolved))
200 getMethodIO file name = do context <- getMethod file name
201 return $ liftM fullDecode context
209 -- Means NumberedINSruction
211 type NInst = (Source, Size, J.Instruction)
212 -- Putting Source and Target in data types prevents from interchanging ;-)
213 data Source = Source Int deriving (Eq, Ord, Show)
214 data Target = Target Int deriving (Eq, Ord, Show)
215 -- Source, Target Instruction id
216 type SourceTarget = (Source,Target)
217 -- State type for findContinuations
218 type ContState = (H.MinPrioHeap Target Source, H.MinPrioHeap Source Target)
220 findContinuations :: [NInst] -> State ContState ()
221 findContinuations = mapM_ addCont
223 addCont :: NInst -> State ContState ()
224 addCont (s, b, IF_ICMP _ w) = do add s (addTarget s w); jmpNext s b;
225 addCont (s, _, GOTO w) = add s (addTarget s w);
226 addCont _ = return ();
228 jmpNext :: Source -> Int -> State ContState ()
229 jmpNext s@(Source i) o = add s (Target $ i + o)
232 addTarget :: Source -> Word16 -> Target
233 addTarget (Source s) w16 = Target $ fromIntegral result
234 where result = s16 + fromIntegral w16 :: Int16
235 s16 = fromIntegral s :: Int16
238 addW16Signed :: Int -> Word16 -> Int
239 addW16Signed i w16 = i + fromIntegral s16
240 where s16 = fromIntegral w16 :: Int16
243 add :: Source -> Target -> State ContState ()
244 add s t = do (targetSource,sourceTarget) <- get
245 put (H.insert (t, s) targetSource, H.insert (s,t) sourceTarget)
247 getInstOffsets :: [J.Instruction] -> [Int]
248 getInstOffsets = map (\i -> fromIntegral $ B.length $ J.encodeInstructions [i])
250 sumSeries'':: [Int] -> [Int]
251 sumSeries'' = reverse . snd . foldl (\(s,xs) x -> (s+x,s : xs)) (0,[])
255 data Block = BB [NInst] deriving (Show)
258 -- Context of the Method to be compiled
259 type Context = ([J.Instruction],Class Resolved)
260 -- Maybe a context of the Method to be compiled
261 type MContext = Maybe Context
262 -- The state for CFG creation
263 type CFGState = (ContState, M.Map Int CFG)
266 getCFG :: FilePath -> B.ByteString -> IO (Maybe CFG)
267 getCFG file name = do context <- getMethodIO file name
268 return $ buildCFGContext context
270 buildCFGContext :: MContext -> Maybe CFG
271 buildCFGContext = liftM genCFGPure
273 genCFGPure :: Context -> CFG
274 genCFGPure (ins,cls) = let offsets = getInstOffsets ins
275 taggedInst = zip3 (map Source $ sumSeries'' offsets) offsets ins
276 cont = execState (findContinuations taggedInst) (H.empty,H.empty)
277 in evalState (genCFGState taggedInst) (cont, M.empty)
280 joinM :: Maybe a -> Maybe b -> Maybe (a,b)
281 joinM Nothing _ = Nothing
282 joinM _ Nothing = Nothing
283 joinM (Just a) (Just b) = Just (a,b)
285 joinM' = uncurry joinM
287 leq :: Target -> Source -> Bool
288 leq (Target t) (Source s) = t <= s
290 getCont :: State CFGState ContState
291 getCont = do ((ts,st),_) <- get
295 takeTill :: Int -> [NInst] -> ([NInst],[NInst])
296 takeTill ref = span (\(Source loc,s,inst) -> loc < ref)
299 writeBack :: ContState -> State CFGState ()
300 writeBack s = do (_,cls) <- get
303 from :: Int -> [NInst] -> [NInst]
304 from ref = dropWhile (\(Source loc,s,inst) -> loc < ref)
307 data Branch = BackRef Target Source
308 | ForwardRef Source [Target] deriving (Show)
312 getForwardRef ts s' t' restForward restBack =
313 do writeBack (ts, restForward) -- remove forward ref from state
314 -- maybe this one is a twoRef
315 anotherOne <- getNextBranch
317 Just (ForwardRef s'' t'')
318 -- If the next reference has the same source concat them
319 -> if s'' == s' then return $ Just $ ForwardRef s' (t' : t'')
320 else return $ Just $ ForwardRef s' [t']
321 Nothing -> --fail ("bahh" ++ show s' ++ show t')
322 return $ Just $ ForwardRef s' [t']
324 getNextBranch :: State CFGState (Maybe Branch)
325 getNextBranch = do ((ts,st),m) <- get
326 case joinM' (H.view ts, H.view st) of
327 -- Here we found the next front and the next back reference.
328 -- which one is earlier
329 Just ( ( (t,s), restBack), ( (s',t'), restForward) )
331 then -- back reference
332 do writeBack (restBack, st) -- remove back reference from ref state
333 return $ Just $ BackRef t s
334 else -- forward reference
335 --getForwardRef ts s' t' restForward restBack
336 do writeBack (ts,restForward)
337 return $ Just $ ForwardRef s' [t']
338 Nothing -> return Nothing
341 genCFGState :: [NInst] -> State CFGState CFG
342 genCFGState xs = do nextBranch <- getNextBranch
344 Nothing -> return $ Leaf $ BB xs
345 Just (BackRef (Target t) (Source s))
347 let (previousBlock,rest) = takeTill t xs
349 let patched = let p = M.insert s c m
350 c = evalState (genCFGState rest) (refs,p)
353 Just (ForwardRef _ _)
358 Nothing -> return $ Leaf $ BB xs
359 Just b -> return (Node (BB xs) b Nil)
360 _ -> fail (show nextBranch)
361 -- return $ Leaf $ BB xs
363 genCFGState :: [NInst] -> State CFGState (CFG, M.Map Source CFG)
364 genCFGState xs = do (ts,st) <- getCont
365 case joinM' (H.view ts, H.view st) of
366 Just ( ( (t, s), backRef), ( (s', t'), sndBranch) )
367 -> if t `leq` s then do writeBack (backRef,st);
368 runUntilBackRef (t , s )
369 else runUntilBranch (s', t') sndBranch ts
373 -- There is no back reference left. Run block until Control leaves.
374 runUntilExit = fail "run until exit" --Nothing = return takeWhile (\(s,w,inst) -> undefined) undefined
376 -- In the program future a goto lands at this instruction.
377 runUntilBackRef (Target ref,_) =
379 let (block,rest) = takeTill ref xs
380 (followUpBlock,_) <- genCFGState rest
381 let cfg = Node (BB block) followUpBlock Nil
382 return $ (cfg, M.empty)
384 runUntilBranch (Source s,Target t) st ts =
386 let (block,rest) = takeTill (s+1) xs
387 -- check wheter this instruction branches also to another location
389 Just ((Source sndBr, Target t),restBranches)
390 -> if sndBr == s then do writeBack (ts,restBranches)
391 twoBranch sndBr t block
393 Nothing -> oneBranch block
394 where twoBranch sndBranch t' block =
395 do (left, _) <- genCFGState $ from t xs
396 (right,_) <- genCFGState $ from t' xs
397 return $ (Node (BB block) left right, M.empty)
398 oneBranch block = return (Leaf (BB block), M.empty) --fail $ "oneBranch" ++ (show s) ++ " " ++ (show st)
401 fib = do con@(Just (ins,cls)) <- getMethodIO "../tests/Fac.class" "facFor"
402 let offsets = getInstOffsets ins
403 let taggedInst = zip3 (map Source $ sumSeries'' offsets) offsets ins
404 mapM_ print taggedInst
405 let continuations = execState (findContinuations taggedInst) (H.empty, H.empty)
407 let cfg = buildCFGContext con
414 diamant = let start = Node "a" left right
415 left = Node "l" end Nil
416 right = Node "r" end Nil
417 end = Node "g" start Nil
420 dag = Node "a" (Node "b" (Leaf "c") (Leaf "d")) (Node "b" (Leaf "c") (Leaf "d"))
423 value (Node val _ _) = Just val
424 value (Leaf val ) = Just val
428 printG' :: Ord k => G k -> [k]
429 printG' g = evalState (foldGM ((: []), (++)) (\node -> let (Just v) = value node in v) [] g) Set.empty