5 import Control.Monad.State
14 import qualified JVM.Assembler as J
15 import qualified Data.Heap as H
16 import qualified Data.ByteString.Lazy as B
17 import qualified Data.Map as M
24 {- Thoughs on types and representations
25 - We start from constructing a CFG. What do we need here
26 - ** Fast traversal which is aware of cycles
27 - ** Fast successor, do we need predecessors?
28 - ** Find all paths to current node (including back references)
29 - ** Generic Node type in order to write mentioned operations
30 - generically. There should be no intermediate language "lock in"
31 - i.e. adding another IR should not kill CFG code
32 - Furthermore operations like SSA construction should
33 - not affect the CFG datastructure. Nodes contents should be
34 - interchangable in a way.
35 - ** Some form of unique naming - we would like to identify blocks
36 - and check whether code should be produced for this node
37 - ** Should be Haskell idiomatic - should be composed with
38 - standard haskell infrastructure
39 - ** Convinient printing
41 - From this a inductive type should be appropriate?
46 -- Means NumberedINSruction
48 type NInst = (Source, Size, J.Instruction)
49 -- Putting Source and Target in data types prevents from interchanging ;-)
50 data Source = Source Int deriving (Eq, Ord, Show)
51 data Target = Target Int deriving (Eq, Ord, Show)
52 -- Source, Target Instruction id
53 type SourceTarget = (Source,Target)
54 -- State type for findContinuations
55 type ContState = ([SourceTarget], H.MinPrioHeap Target Source)
57 data Block = BB [NInst] deriving (Show)
59 data BlockID = Id Int deriving (Show,Ord,Eq)
61 type IndirectCFG = [([NInst],[Int],BlockID)]
63 data FinalBlock = FinalBlock BlockID [NInst] deriving (Show)
65 instance Eq FinalBlock where
66 FinalBlock (Id x) _ == FinalBlock (Id y) _ = x == y
68 instance Ord FinalBlock where
69 FinalBlock (Id x) _ <= FinalBlock (Id y) _ = x <= y
71 less :: Target -> Source -> Bool
72 less (Target t) (Source s) = t < s
74 addW16Signed :: Int -> Word16 -> Int
75 addW16Signed i w16 = i + fromIntegral s16
76 where s16 = fromIntegral w16 :: Int16
78 getInstOffsets :: [J.Instruction] -> [Int]
79 getInstOffsets = map (\i -> fromIntegral $ B.length $ J.encodeInstructions [i])
81 sumSeries'':: [Int] -> [Int]
82 sumSeries'' = reverse . snd . foldl (\(s,xs) x -> (s+x,s : xs)) (0,[])
84 splitBlocksBackRef :: [NInst] -> [Int]
85 splitBlocksBackRef = concatMap checkSplit
86 where checkSplit inst = case inst of
87 (Source s, size, IF_ICMP _ t) -> []
88 (Source s, size, GOTO t) -> [addW16Signed s t]
92 getInstructions :: [Instruction] -> [(Source, Int, Instruction)]
93 getInstructions ins = zip3 (map Source $ sumSeries'' offsets) offsets ins
94 where offsets = getInstOffsets ins
97 getBranch :: NInst -> [Int]
98 getBranch (Source s, size, IF_ICMP _ t) = [addW16Signed s t, s+size]
99 getBranch (Source s, size, GOTO t) = [addW16Signed s t]
100 getBranch (Source s, size, IRETURN ) = [0] --actually wrong
103 -- a version of Prelude.span whereby the first element
104 -- which does not hold predicate f is also included in
106 spanPlus :: (a -> Bool) -> [a] -> ([a], [a])
107 spanPlus _ [] = ([],[])
108 spanPlus f (x:xs) = if f x then let (b, a) = spanPlus f xs
112 fromNInst :: NInst -> Int
113 fromNInst (Source s,_,_) = s
115 ifJust :: b -> Maybe a -> [b]
116 ifJust _ Nothing = []
117 ifJust a (Just _) = [a]
119 splitBlocks :: [Int] -> [NInst] -> IndirectCFG
120 splitBlocks _ [] = []
121 splitBlocks backRefs xs = (map fst block, block >>= snd, Id id) : splitBlocks backRefs (map fst rest)
122 where getBranchAndRef ins@(Source i,w,_) =
123 (ifJust (i+w) $ find (==i+w) backRefs) ++ getBranch ins
124 branches = zip xs $ map getBranchAndRef xs
125 (block,rest) = spanPlus (null . snd) branches
126 (Source id,_,_) = fst . head $ block -- block guarantted to be non empty
128 getTransitions :: IndirectCFG -> (M.Map BlockID [BlockID])
129 getTransitions = foldr (\(_,targets,id) s -> M.insert id (map Id targets) s) M.empty
131 -- [([NInst],[Int],BlockID)]
132 getNodes :: IndirectCFG -> [(BlockID,FinalBlock)]
133 getNodes = map rearrange
134 where rearrange (insts,_,id) = (id,FinalBlock id insts)
137 --indirectCFGToG :: IndirectCFG -> G [NInst]
138 indirectCFGToG cfg = toG packNodes (M.toList $ getTransitions cfg) (Id 0) (getNodes cfg)
142 findContinuations :: [NInst] -> State ContState ()
143 findContinuations = mapM_ addCont
145 addCont :: NInst -> State ContState ()
146 addCont (s, b, IF_ICMP _ w) = do add s (addTarget s w); jmpNext s b;
147 addCont (s, _, GOTO w) = add s (addTarget s w);
148 addCont (s, b, _ ) = return () --jmpNext s b
150 jmpNext :: Source -> Int -> State ContState ()
151 jmpNext s@(Source i) o = add s (Target $ i + o)
154 addTarget :: Source -> Word16 -> Target
155 addTarget (Source s) w16 = Target $ fromIntegral result
156 where result = s16 + fromIntegral w16 :: Int16
157 s16 = fromIntegral s :: Int16
161 addBranch :: Source -> Target -> State ContState ()
162 addBranch s t = do (branches,backRefs) <- get
163 put ( (s,t) : branches, backRefs)
165 addBackRef :: Source -> Target -> State ContState ()
166 addBackRef s t = do (branches,backRefs) <- get
167 put (branches, H.insert (t,s) backRefs)
169 add :: Source -> Target -> State ContState ()
170 add s t = do addBranch s t
171 -- if back branch - also add branch to back references
172 -- for faster access later on
173 if t `less` s then do trace ("jsadf") addBackRef s t;
178 -- Context of the Method to be compiled
179 type Context = ([J.Instruction],Class Resolved)
180 -- Maybe a context of the Method to be compiled
181 type MContext = Maybe Context
182 -- The state for CFG creation
183 type CFGState = (ContState, M.Map Int CFG)
185 type NodeState = ([(Source,Target)],[(Target,Source)])
186 type IndirectGraph k a = ((M.Map k a),(M.Map k [k]))
192 minNext :: Ord t => [t] -> [t] -> ([t], [t], Maybe t)
193 minNext f@(x:xs) b@(y:ys) = if x < y then (xs, b, Just x)
195 minNext (x:xs) [] = (xs, [], Just x )
196 minNext [] (y:ys) = ([], ys, Just y )
197 minNext [] [] = ([], [], Nothing)
199 unpackst ((Source s), (Target t)) = (s,t)
200 unpackts ((Target t), (Source s)) = (t,s)
202 --createNodes :: [NInst] -> ContState -> IndirectGraph Int Block
203 --createNodes xs (forwardRefs, backwardRefs) = evalState (createNodes' xs) (branches, bRefs)
204 -- where branches = map reverse forwardRefs
205 -- bRefs = map H.toAscList backwardRefs
207 --createNodes' ::[NInst] -> State NodeState (IndirectGraph Int Block)
208 {-createNodes' xs = do (st,ts) <- get
210 -- there are back refs and forward refs
211 ((s,t):st', (t',s'):ts')
213 then do put (st,ts') -- back reference splits block
214 let (Target entry, Source source) = (t',s')
215 return (take entry xs, [])
221 getCFG :: FilePath -> B.ByteString -> IO (Maybe CFG)
222 getCFG file name = do context <- getMethodIO file name
223 return $ buildCFGContext context
225 buildCFGContext :: MContext -> Maybe CFG
226 buildCFGContext = liftM genCFGPure
228 genCFGPure :: Context -> CFG
229 genCFGPure (ins,cls) = let offsets = getInstOffsets ins
230 taggedInst = zip3 (map Source $ sumSeries'' offsets) offsets ins
231 cont = execState (findContinuations taggedInst) ([], H.empty)
232 in evalState (genCFGState taggedInst) (cont, M.empty)
234 methodCodeM :: B.ByteString-> Maybe (Class Resolved)-> Maybe (Class Resolved, B.ByteString)
235 methodCodeM name mcls = do cls <- mcls
236 ins <- methodCode cls name
239 getMethod :: FilePath-> B.ByteString-> IO (Maybe (B.ByteString, Class Resolved))
240 getMethod file name = do cls <- parseClassFile file
241 return (methodCode cls name >>= \ins -> return (ins,cls))
243 fullDecode :: (B.ByteString, t) -> ([J.Instruction], t)
244 fullDecode (ins, cls) = (J.codeInstructions $ J.decodeMethod ins, cls)
246 getMethodIO :: FilePath-> B.ByteString-> IO (Maybe ([J.Instruction], Class Resolved))
247 getMethodIO file name = do context <- getMethod file name
248 return $ liftM fullDecode context
251 genCFGState = undefined