3 --------------------------------------------------------------------------
5 -- Module: Harpy.CodeGenMonad
6 -- Copyright: (c) 2006-2007 Martin Grabmueller and Dirk Kleeblatt
9 -- Maintainer: {magr,klee}@cs.tu-berlin.de
10 -- Stability: provisional
11 -- Portability: portable (but generated code non-portable)
13 -- Monad for generating x86 machine code at runtime.
15 -- This is a combined reader-state-exception monad which handles all
16 -- the details of handling code buffers, emitting binary data,
19 -- All the code generation functions in module "Harpy.X86CodeGen" live
20 -- in this monad and use its error reporting facilities as well as the
21 -- internal state maintained by the monad.
23 -- The library user can pass a user environment and user state through
24 -- the monad. This state is independent from the internal state and
25 -- may be used by higher-level code generation libraries to maintain
26 -- their own state across code generation operations.
27 -- --------------------------------------------------------------------------
29 module Harpy.CodeGenMonad(
40 -- ** General code generator monad operations
42 -- ** Accessing code generation internals
47 -- ** Access to user state and environment
52 -- ** Label management
69 -- ** Executing code generation
72 -- ** Calling generated functions
74 -- ** Interface to disassembler
78 import qualified Harpy.X86Disassembler as Dis
82 import Text.PrettyPrint.HughesPJ
87 import qualified Data.Map as Map
91 import Control.Monad.Trans
93 import Language.Haskell.TH.Syntax
96 -- | An error message produced by a code generation operation.
99 -- | The code generation monad, a combined reader-state-exception
101 newtype CodeGen e s a = CodeGen ((e, CodeGenEnv) -> (s, CodeGenState) -> IO ((s, CodeGenState), Either ErrMsg a))
103 -- | Configuration of the code generator. There are currently two
104 -- configuration options. The first is the number fo bytes to use for
105 -- allocating code buffers (the first as well as additional buffers
106 -- created in calls to 'ensureBufferSize'. The second allows to pass
107 -- in a pre-allocated code buffer and its size. When this option is
108 -- used, Harpy does not perform any code buffer resizing (calls to
109 -- 'ensureBufferSize' will be equivalent to calls to
110 -- 'checkBufferSize').
111 data CodeGenConfig = CodeGenConfig {
112 codeBufferSize :: Int, -- ^ Size of individual code buffer blocks.
113 customCodeBuffer :: Maybe (Ptr Word8, Int) -- ^ Code buffer passed in.
116 -- | Internal state of the code generator
117 data CodeGenState = CodeGenState {
118 buffer :: Ptr Word8, -- ^ Pointer to current code buffer.
119 bufferList :: [(Ptr Word8, Int)], -- ^ List of all other code buffers.
120 firstBuffer :: Ptr Word8, -- ^ Pointer to first buffer.
121 bufferOfs :: Int, -- ^ Current offset into buffer where next instruction will be stored.
122 bufferSize :: Int, -- ^ Size of current buffer.
123 relocEntries :: [Reloc], -- ^ List of all emitted relocation entries.
124 nextLabel :: Int, -- ^ Counter for generating labels.
125 definedLabels :: Map.Map Int (Ptr Word8, Int, String), -- ^ Map of already defined labels.
126 pendingFixups :: Map.Map Int [FixupEntry], -- ^ Map of labels which have been referenced, but not defined.
127 config :: CodeGenConfig -- ^ Configuration record.
130 data FixupEntry = FixupEntry {
131 fueBuffer :: Ptr Word8,
136 -- | Kind of a fixup entry. When a label is emitted with
137 -- 'defineLabel', all prior references to this label must be fixed
138 -- up. This data type tells how to perform the fixup operation.
139 data FixupKind = Fixup8 -- ^ 8-bit relative reference
140 | Fixup16 -- ^ 16-bit relative reference
141 | Fixup32 -- ^ 32-bit relative reference
142 | Fixup32Absolute -- ^ 32-bit absolute reference
145 data CodeGenEnv = CodeGenEnv { tailContext :: Bool }
148 -- | Kind of relocation, for example PC-relative
149 data RelocKind = RelocPCRel -- ^ PC-relative relocation
150 | RelocAbsolute -- ^ Absolute address
153 -- | Relocation entry
154 data Reloc = Reloc { offset :: Int,
155 -- ^ offset in code block which needs relocation
157 -- ^ kind of relocation
164 data Label = Label Int String
167 unCg :: CodeGen e s a -> ((e, CodeGenEnv) -> (s, CodeGenState) -> IO ((s, CodeGenState), Either ErrMsg a))
170 instance Monad (CodeGen e s) where
171 return x = cgReturn x
172 fail err = cgFail err
175 cgReturn :: a -> CodeGen e s a
176 cgReturn x = CodeGen (\_env state -> return (state, Right x))
178 cgFail :: String -> CodeGen e s a
179 cgFail err = CodeGen (\_env state -> return (state, Left (text err)))
181 cgBind :: CodeGen e s a -> (a -> CodeGen e s a1) -> CodeGen e s a1
182 cgBind m k = CodeGen (\env state ->
183 do r1 <- unCg m env state
185 (state', Left err) -> return (state', Left err)
186 (state', Right v) -> unCg (k v) env state')
188 -- | Abort code generation with the given error message.
189 failCodeGen :: Doc -> CodeGen e s a
190 failCodeGen d = CodeGen (\_env state -> return (state, Left d))
192 instance MonadIO (CodeGen e s) where
193 liftIO st = CodeGen (\_env state -> do { r <- st; return (state, Right r) })
195 emptyCodeGenState :: CodeGenState
196 emptyCodeGenState = CodeGenState { buffer = undefined,
198 firstBuffer = undefined,
203 definedLabels = Map.empty,
204 pendingFixups = Map.empty,
205 config = defaultCodeGenConfig}
207 -- | Default code generation configuration. The code buffer size is
208 -- set to 4KB, and code buffer management is automatic. This value is
209 -- intended to be used with record update syntax, for example:
211 -- > runCodeGenWithConfig ... defaultCodeGenConfig{codeBufferSize = 128} ...
212 defaultCodeGenConfig :: CodeGenConfig
213 defaultCodeGenConfig = CodeGenConfig { codeBufferSize = defaultCodeBufferSize,
214 customCodeBuffer = Nothing }
216 defaultCodeBufferSize :: Int
217 defaultCodeBufferSize = 4096
219 -- | Execute code generation, given a user environment and state. The
220 -- result is a tuple of the resulting user state and either an error
221 -- message (when code generation failed) or the result of the code
222 -- generation. This function runs 'runCodeGenWithConfig' with a
223 -- sensible default configuration.
224 runCodeGen :: CodeGen e s a -> e -> s -> IO (s, Either ErrMsg a)
225 runCodeGen cg uenv ustate =
226 runCodeGenWithConfig cg uenv ustate defaultCodeGenConfig
228 -- | Like 'runCodeGen', but allows more control over the code
229 -- generation process. In addition to a code generator and a user
230 -- environment and state, a code generation configuration must be
231 -- provided. A code generation configuration allows control over the
232 -- allocation of code buffers, for example.
233 runCodeGenWithConfig :: CodeGen e s a -> e -> s -> CodeGenConfig -> IO (s, Either ErrMsg a)
234 runCodeGenWithConfig (CodeGen cg) uenv ustate conf =
235 do (buf, sze) <- case customCodeBuffer conf of
236 Nothing -> do let initSize = codeBufferSize conf
237 arr <- mallocBytes initSize
238 return (arr, initSize)
239 Just (buf, sze) -> return (buf, sze)
240 let env = CodeGenEnv {tailContext = True}
241 let state = emptyCodeGenState{buffer = buf,
246 ((ustate', _), res) <- cg (uenv, env) (ustate, state)
247 return (ustate', res)
249 -- | Check whether the code buffer has room for at least the given
250 -- number of bytes. This should be called by code generators
251 -- whenever it cannot be guaranteed that the code buffer is large
252 -- enough to hold all the generated code. Lets the code generation
253 -- monad fail when the buffer overflows.
255 -- /Note:/ Starting with version 0.4, Harpy automatically checks for
256 -- buffer overflow, so you do not need to call this function anymore.
257 checkBufferSize :: Int -> CodeGen e s ()
258 checkBufferSize needed =
259 do state <- getInternalState
260 unless (bufferOfs state + needed <= bufferSize state)
261 (failCodeGen (text "code generation buffer overflow: needed additional" <+>
262 int needed <+> text "bytes (offset =" <+>
263 int (bufferOfs state) <>
264 text ", buffer size =" <+>
265 int (bufferSize state) <> text ")"))
267 -- | Make sure that the code buffer has room for at least the given
268 -- number of bytes. This should be called by code generators whenever
269 -- it cannot be guaranteed that the code buffer is large enough to
270 -- hold all the generated code. Creates a new buffer and places a
271 -- jump to the new buffer when there is not sufficient space
272 -- available. When code generation was invoked with a pre-defined
273 -- code buffer, code generation is aborted on overflow.
275 -- /Note:/ Starting with version 0.4, Harpy automatically checks for
276 -- buffer overflow, so you do not need to call this function anymore.
277 ensureBufferSize :: Int -> CodeGen e s ()
278 ensureBufferSize needed =
279 do state <- getInternalState
280 case (customCodeBuffer (config state)) of
282 unless (bufferOfs state + needed + 5 <= bufferSize state)
283 (do let incrSize = max (needed + 16) (codeBufferSize (config state))
284 arr <- liftIO $ mallocBytes incrSize
286 let buf = buffer state
288 disp = arr `minusPtr` (buf `plusPtr` ofs) - 5
289 emit8 0xe9 -- FIXME: Machine dependent!
290 emit32 (fromIntegral disp)
291 st <- getInternalState
292 setInternalState st{buffer = arr, bufferList = bufferList st ++ [(buffer st, bufferOfs st)], bufferOfs = 0})
293 Just (_, _) -> checkBufferSize needed
295 -- | Return a pointer to the beginning of the first code buffer, which
296 -- is normally the entry point to the generated code.
297 getEntryPoint :: CodeGen e s (Ptr Word8)
299 CodeGen (\ _ (ustate, state) ->
300 return $ ((ustate, state), Right (firstBuffer state)))
302 -- | Return the current offset in the code buffer, e.g. the offset
303 -- at which the next instruction will be emitted.
304 getCodeOffset :: CodeGen e s Int
306 CodeGen (\ _ (ustate, state) ->
307 return $ ((ustate, state), Right (bufferOfs state)))
309 -- | Set the user state to the given value.
310 setState :: s -> CodeGen e s ()
312 CodeGen (\ _ (_, state) ->
313 return $ ((st, state), Right ()))
315 -- | Return the current user state.
316 getState :: CodeGen e s s
318 CodeGen (\ _ (ustate, state) ->
319 return $ ((ustate, state), Right (ustate)))
321 -- | Return the current user environment.
322 getEnv :: CodeGen e s e
324 CodeGen (\ (uenv, _) state ->
325 return $ (state, Right uenv))
327 -- | Set the environment to the given value and execute the given
328 -- code generation in this environment.
329 withEnv :: e -> CodeGen e s r -> CodeGen e s r
330 withEnv e (CodeGen cg) =
331 CodeGen (\ (_, env) state ->
334 -- | Set the user state to the given value.
335 setInternalState :: CodeGenState -> CodeGen e s ()
336 setInternalState st =
337 CodeGen (\ _ (ustate, _) ->
338 return $ ((ustate, st), Right ()))
340 -- | Return the current user state.
341 getInternalState :: CodeGen e s CodeGenState
343 CodeGen (\ _ (ustate, state) ->
344 return $ ((ustate, state), Right (state)))
346 -- | Return the pointer to the start of the code buffer.
347 getBasePtr :: CodeGen e s (Ptr Word8)
349 CodeGen (\ _ (ustate, state) ->
350 return $ ((ustate, state), Right (buffer state)))
352 -- | Return a list of all code buffers and their respective size
353 -- (i.e., actually used space for code, not allocated size).
354 getCodeBufferList :: CodeGen e s [(Ptr Word8, Int)]
355 getCodeBufferList = do st <- getInternalState
356 return $ bufferList st ++ [(buffer st, bufferOfs st)]
358 -- | Generate a new label to be used with the label operations
359 -- 'emitFixup' and 'defineLabel'.
360 newLabel :: CodeGen e s Label
362 do state <- getInternalState
363 let lab = nextLabel state
364 setInternalState state{nextLabel = lab + 1}
365 return (Label lab "")
367 -- | Generate a new label to be used with the label operations
368 -- 'emitFixup' and 'defineLabel'. The given name is used for
369 -- diagnostic purposes, and will appear in the disassembly.
370 newNamedLabel :: String -> CodeGen e s Label
372 do state <- getInternalState
373 let lab = nextLabel state
374 setInternalState state{nextLabel = lab + 1}
375 return (Label lab name)
377 -- | Generate a new label and define it at once
378 setLabel :: CodeGen e s Label
384 -- | Emit a relocation entry for the given offset, relocation kind
385 -- and target address.
386 emitRelocInfo :: Int -> RelocKind -> FunPtr a -> CodeGen e s ()
387 emitRelocInfo ofs knd addr =
388 do state <- getInternalState
389 setInternalState state{relocEntries =
392 address = castFunPtr addr} :
393 (relocEntries state)}
395 -- | Emit a byte value to the code buffer.
396 emit8 :: Word8 -> CodeGen e s ()
398 CodeGen (\ _ (ustate, state) ->
399 do let buf = buffer state
400 ptr = bufferOfs state
401 pokeByteOff buf ptr op
402 return $ ((ustate, state{bufferOfs = ptr + 1}), Right ()))
404 -- | Store a byte value at the given offset into the code buffer.
405 emit8At :: Int -> Word8 -> CodeGen e s ()
407 CodeGen (\ _ (ustate, state) ->
408 do let buf = buffer state
409 pokeByteOff buf pos op
410 return $ ((ustate, state), Right ()))
412 -- | Return the byte value at the given offset in the code buffer.
413 peek8At :: Int -> CodeGen e s Word8
415 CodeGen (\ _ (ustate, state) ->
416 do let buf = buffer state
417 b <- peekByteOff buf pos
418 return $ ((ustate, state), Right b))
420 -- | Like 'emit8', but for a 32-bit value.
421 emit32 :: Word32 -> CodeGen e s ()
423 CodeGen (\ _ (ustate, state) ->
424 do let buf = buffer state
425 ptr = bufferOfs state
426 pokeByteOff buf ptr op
427 return $ ((ustate, state{bufferOfs = ptr + 4}), Right ()))
429 -- | Like 'emit8At', but for a 32-bit value.
430 emit32At :: Int -> Word32 -> CodeGen e s ()
432 CodeGen (\ _ (ustate, state) ->
433 do let buf = buffer state
434 pokeByteOff buf pos op
435 return $ ((ustate, state), Right ()))
437 -- | Emit a label at the current offset in the code buffer. All
438 -- references to the label will be relocated to this offset.
439 defineLabel :: Label -> CodeGen e s ()
440 defineLabel (Label lab name) =
441 do state <- getInternalState
442 case Map.lookup lab (definedLabels state) of
443 Just _ -> failCodeGen $ text "duplicate definition of label" <+>
446 case Map.lookup lab (pendingFixups state) of
447 Just fixups -> do mapM_ (performFixup (buffer state) (bufferOfs state)) fixups
448 setInternalState state{pendingFixups = Map.delete lab (pendingFixups state)}
450 state1 <- getInternalState
451 setInternalState state1{definedLabels = Map.insert lab (buffer state1, bufferOfs state1, name) (definedLabels state1)}
453 performFixup :: Ptr Word8 -> Int -> FixupEntry -> CodeGen e s ()
454 performFixup labBuf labOfs (FixupEntry{fueBuffer = buf, fueOfs = ofs, fueKind = knd}) =
455 do let diff = (labBuf `plusPtr` labOfs) `minusPtr` (buf `plusPtr` ofs)
457 Fixup8 -> pokeByteOff buf ofs (fromIntegral diff - 1 :: Word8)
458 Fixup16 -> pokeByteOff buf ofs (fromIntegral diff - 2 :: Word16)
459 Fixup32 -> pokeByteOff buf ofs (fromIntegral diff - 4 :: Word32)
460 Fixup32Absolute -> pokeByteOff buf ofs (fromIntegral (ptrToWordPtr (labBuf `plusPtr` labOfs)) :: Word32)
464 -- | This operator gives neat syntax for defining labels. When @l@ is a label, the code
466 -- > l @@ mov eax ebx
468 -- associates the label l with the following @mov@ instruction.
469 (@@) :: Label -> CodeGen e s a -> CodeGen e s a
470 (@@) lab gen = do defineLabel lab
473 -- | Emit a fixup entry for the given label at the current offset in
474 -- the code buffer (unless the label is already defined).
475 -- The instruction at this offset will
476 -- be patched to target the address associated with this label when
477 -- it is defined later.
478 emitFixup :: Label -> Int -> FixupKind -> CodeGen e s ()
479 emitFixup (Label lab _) ofs knd =
480 do state <- getInternalState
481 let base = buffer state
482 ptr = bufferOfs state
483 fue = FixupEntry{fueBuffer = base,
486 case Map.lookup lab (definedLabels state) of
487 Just (labBuf, labOfs, _) -> performFixup labBuf labOfs fue
488 Nothing -> setInternalState state{pendingFixups = Map.insertWith (++) lab [fue] (pendingFixups state)}
490 -- | Return the address of a label, fail if the label is not yet defined.
491 labelAddress :: Label -> CodeGen e s (Ptr a)
492 labelAddress (Label lab name) = do
493 state <- getInternalState
494 case Map.lookup lab (definedLabels state) of
495 Just (labBuf, labOfs, _) -> return $ plusPtr labBuf labOfs
496 Nothing -> fail $ "Label " ++ show lab ++ "(" ++ name ++ ") not yet defined"
499 -- | Disassemble all code buffers. The result is a list of
500 -- disassembled instructions which can be converted to strings using
501 -- the 'Dis.showIntel' or 'Dis.showAtt' functions from module
502 -- "Harpy.X86Disassembler".
503 disassemble :: CodeGen e s [Dis.Instruction]
505 s <- getInternalState
506 let buffers = bufferList s
507 r <- mapM (\ (buff, len) -> do
508 r <- liftIO $ Dis.disassembleBlock buff len
510 Left err -> cgFail $ show err
511 Right instr -> return instr
512 ) $ buffers ++ [(buffer s, bufferOfs s)]
513 r' <- insertLabels (concat r)
515 where insertLabels :: [Dis.Instruction] -> CodeGen e s [Dis.Instruction]
516 insertLabels = liftM concat . mapM ins
517 ins :: Dis.Instruction -> CodeGen e s [Dis.Instruction]
518 ins i@(Dis.BadInstruction{}) = return [i]
519 ins i@(Dis.PseudoInstruction{}) = return [i]
520 ins i@(Dis.Instruction{Dis.address = addr}) =
521 do state <- getInternalState
522 let allLabs = Map.toList (definedLabels state)
523 labs = filter (\ (_, (buf, ofs, _)) -> fromIntegral (ptrToWordPtr (buf `plusPtr` ofs)) == addr) allLabs
524 createLabel (l, (buf, ofs, name)) = Dis.PseudoInstruction addr
527 "label " ++ show l ++
529 hex32 (fromIntegral (ptrToWordPtr (buf `plusPtr` ofs))) ++
531 _ -> name ++ ": [" ++
532 hex32 (fromIntegral (ptrToWordPtr (buf `plusPtr` ofs))) ++
534 return $ fmap createLabel labs ++ [i]
535 hex32 :: Int -> String
540 in take (8 - length s) (repeat '0') ++ s
544 callDecl :: String -> Q Type -> Q [Dec]
547 let (tvars, cxt, t) = case t0 of
548 ForallT vs c t' -> (vs, c, t')
551 let funptr = AppT (ConT $ mkName "FunPtr") t
552 let ioresult = addIO t
553 let ty = AppT (AppT ArrowT funptr) ioresult
554 dynName <- newName "conv"
555 let dyn = ForeignD $ ImportF CCall Safe "dynamic" dynName $ ForallT tvars cxt ty
557 cbody <- [| CodeGen (\env (ustate, state) ->
558 do let code = firstBuffer state
561 cast <- [|castPtrToFunPtr|]
562 let f = AppE (VarE dynName)
565 return $ LamE [VarP c] $ foldl AppE f $ map VarE vs
567 return $ ((ustate, state), Right res))|]
568 let call = ValD (VarP name) (NormalB $ LamE (map VarP vs) cbody) []
571 mkArgs (AppT (AppT ArrowT _from) to) = do
577 addIO (AppT t@(AppT ArrowT _from) to) = AppT t $ addIO to
578 addIO t = AppT (ConT $ mkName "IO") t
582 -- | Declare a stub function to call the code buffer. Arguments are the name
583 -- of the generated function, and the type the code buffer is supposed to have.
584 -- The type argument can be given using the [t| ... |] notation of Template Haskell.
585 -- Allowed types are the legal types for FFI functions.
586 callDecl :: String -> Q Type -> Q [Dec]