3 --------------------------------------------------------------------------
5 -- Module: Harpy.CodeGenMonad
6 -- Copyright: (c) 2006-2007 Martin Grabmueller and Dirk Kleeblatt
9 -- Maintainer: {magr,klee}@cs.tu-berlin.de
10 -- Stability: provisional
11 -- Portability: portable (but generated code non-portable)
13 -- Monad for generating x86 machine code at runtime.
15 -- This is a combined reader-state-exception monad which handles all
16 -- the details of handling code buffers, emitting binary data,
19 -- All the code generation functions in module "Harpy.X86CodeGen" live
20 -- in this monad and use its error reporting facilities as well as the
21 -- internal state maintained by the monad.
23 -- The library user can pass a user environment and user state through
24 -- the monad. This state is independent from the internal state and
25 -- may be used by higher-level code generation libraries to maintain
26 -- their own state across code generation operations.
27 -- --------------------------------------------------------------------------
29 module Harpy.CodeGenMonad(
40 -- ** General code generator monad operations
42 -- ** Accessing code generation internals
47 -- ** Access to user state and environment
52 -- ** Label management
69 -- ** Executing code generation
72 -- ** Calling generated functions
74 -- ** Interface to disassembler
78 import qualified Harpy.X86Disassembler as Dis
82 import Text.PrettyPrint.HughesPJ
87 import qualified Data.Map as Map
89 import Foreign.C.Types
92 import Control.Monad.Trans
94 import Language.Haskell.TH.Syntax
97 -- | An error message produced by a code generation operation.
100 -- | The code generation monad, a combined reader-state-exception
102 newtype CodeGen e s a = CodeGen ((e, CodeGenEnv) -> (s, CodeGenState) -> IO ((s, CodeGenState), Either ErrMsg a))
104 -- | Configuration of the code generator. There are currently two
105 -- configuration options. The first is the number fo bytes to use for
106 -- allocating code buffers (the first as well as additional buffers
107 -- created in calls to 'ensureBufferSize'. The second allows to pass
108 -- in a pre-allocated code buffer and its size. When this option is
109 -- used, Harpy does not perform any code buffer resizing (calls to
110 -- 'ensureBufferSize' will be equivalent to calls to
111 -- 'checkBufferSize').
112 data CodeGenConfig = CodeGenConfig {
113 codeBufferSize :: Int, -- ^ Size of individual code buffer blocks.
114 customCodeBuffer :: Maybe (Ptr Word8, Int) -- ^ Code buffer passed in.
117 -- | Internal state of the code generator
118 data CodeGenState = CodeGenState {
119 buffer :: Ptr Word8, -- ^ Pointer to current code buffer.
120 bufferList :: [(Ptr Word8, Int)], -- ^ List of all other code buffers.
121 firstBuffer :: Ptr Word8, -- ^ Pointer to first buffer.
122 bufferOfs :: Int, -- ^ Current offset into buffer where next instruction will be stored.
123 bufferSize :: Int, -- ^ Size of current buffer.
124 relocEntries :: [Reloc], -- ^ List of all emitted relocation entries.
125 nextLabel :: Int, -- ^ Counter for generating labels.
126 definedLabels :: Map.Map Int (Ptr Word8, Int, String), -- ^ Map of already defined labels.
127 pendingFixups :: Map.Map Int [FixupEntry], -- ^ Map of labels which have been referenced, but not defined.
128 config :: CodeGenConfig -- ^ Configuration record.
131 data FixupEntry = FixupEntry {
132 fueBuffer :: Ptr Word8,
137 -- | Kind of a fixup entry. When a label is emitted with
138 -- 'defineLabel', all prior references to this label must be fixed
139 -- up. This data type tells how to perform the fixup operation.
140 data FixupKind = Fixup8 -- ^ 8-bit relative reference
141 | Fixup16 -- ^ 16-bit relative reference
142 | Fixup32 -- ^ 32-bit relative reference
143 | Fixup32Absolute -- ^ 32-bit absolute reference
146 data CodeGenEnv = CodeGenEnv { tailContext :: Bool }
149 -- | Kind of relocation, for example PC-relative
150 data RelocKind = RelocPCRel -- ^ PC-relative relocation
151 | RelocAbsolute -- ^ Absolute address
154 -- | Relocation entry
155 data Reloc = Reloc { offset :: Int,
156 -- ^ offset in code block which needs relocation
158 -- ^ kind of relocation
165 data Label = Label Int String
168 unCg :: CodeGen e s a -> ((e, CodeGenEnv) -> (s, CodeGenState) -> IO ((s, CodeGenState), Either ErrMsg a))
171 instance Monad (CodeGen e s) where
172 return x = cgReturn x
173 fail err = cgFail err
176 cgReturn :: a -> CodeGen e s a
177 cgReturn x = CodeGen (\_env state -> return (state, Right x))
179 cgFail :: String -> CodeGen e s a
180 cgFail err = CodeGen (\_env state -> return (state, Left (text err)))
182 cgBind :: CodeGen e s a -> (a -> CodeGen e s a1) -> CodeGen e s a1
183 cgBind m k = CodeGen (\env state ->
184 do r1 <- unCg m env state
186 (state', Left err) -> return (state', Left err)
187 (state', Right v) -> unCg (k v) env state')
189 -- | Abort code generation with the given error message.
190 failCodeGen :: Doc -> CodeGen e s a
191 failCodeGen d = CodeGen (\_env state -> return (state, Left d))
193 instance MonadIO (CodeGen e s) where
194 liftIO st = CodeGen (\_env state -> do { r <- st; return (state, Right r) })
196 emptyCodeGenState :: CodeGenState
197 emptyCodeGenState = CodeGenState { buffer = undefined,
199 firstBuffer = undefined,
204 definedLabels = Map.empty,
205 pendingFixups = Map.empty,
206 config = defaultCodeGenConfig}
208 -- | Default code generation configuration. The code buffer size is
209 -- set to 4KB, and code buffer management is automatic. This value is
210 -- intended to be used with record update syntax, for example:
212 -- > runCodeGenWithConfig ... defaultCodeGenConfig{codeBufferSize = 128} ...
213 defaultCodeGenConfig :: CodeGenConfig
214 defaultCodeGenConfig = CodeGenConfig { codeBufferSize = defaultCodeBufferSize,
215 customCodeBuffer = Nothing }
217 defaultCodeBufferSize :: Int
218 defaultCodeBufferSize = 4096
220 -- | Execute code generation, given a user environment and state. The
221 -- result is a tuple of the resulting user state and either an error
222 -- message (when code generation failed) or the result of the code
223 -- generation. This function runs 'runCodeGenWithConfig' with a
224 -- sensible default configuration.
225 runCodeGen :: CodeGen e s a -> e -> s -> IO (s, Either ErrMsg a)
226 runCodeGen cg uenv ustate =
227 runCodeGenWithConfig cg uenv ustate defaultCodeGenConfig
229 foreign import ccall "static stdlib.h"
230 memalign :: CUInt -> CUInt -> IO (Ptr a)
232 foreign import ccall "static sys/mman.h"
233 mprotect :: CUInt -> CUInt -> Int -> IO Int
235 -- | Like 'runCodeGen', but allows more control over the code
236 -- generation process. In addition to a code generator and a user
237 -- environment and state, a code generation configuration must be
238 -- provided. A code generation configuration allows control over the
239 -- allocation of code buffers, for example.
240 runCodeGenWithConfig :: CodeGen e s a -> e -> s -> CodeGenConfig -> IO (s, Either ErrMsg a)
241 runCodeGenWithConfig (CodeGen cg) uenv ustate conf =
242 do (buf, sze) <- case customCodeBuffer conf of
243 Nothing -> do let initSize = codeBufferSize conf
244 let size = fromIntegral initSize
245 arr <- memalign 0x1000 size
246 -- 0x7 = PROT_{READ,WRITE,EXEC}
247 _ <- mprotect (fromIntegral $ ptrToIntPtr arr) size 0x7
248 return (arr, initSize)
249 Just (buf, sze) -> return (buf, sze)
250 let env = CodeGenEnv {tailContext = True}
251 let state = emptyCodeGenState{buffer = buf,
256 ((ustate', _), res) <- cg (uenv, env) (ustate, state)
257 return (ustate', res)
259 -- | Check whether the code buffer has room for at least the given
260 -- number of bytes. This should be called by code generators
261 -- whenever it cannot be guaranteed that the code buffer is large
262 -- enough to hold all the generated code. Lets the code generation
263 -- monad fail when the buffer overflows.
265 -- /Note:/ Starting with version 0.4, Harpy automatically checks for
266 -- buffer overflow, so you do not need to call this function anymore.
267 checkBufferSize :: Int -> CodeGen e s ()
268 checkBufferSize needed =
269 do state <- getInternalState
270 unless (bufferOfs state + needed <= bufferSize state)
271 (failCodeGen (text "code generation buffer overflow: needed additional" <+>
272 int needed <+> text "bytes (offset =" <+>
273 int (bufferOfs state) <>
274 text ", buffer size =" <+>
275 int (bufferSize state) <> text ")"))
277 -- | Make sure that the code buffer has room for at least the given
278 -- number of bytes. This should be called by code generators whenever
279 -- it cannot be guaranteed that the code buffer is large enough to
280 -- hold all the generated code. Creates a new buffer and places a
281 -- jump to the new buffer when there is not sufficient space
282 -- available. When code generation was invoked with a pre-defined
283 -- code buffer, code generation is aborted on overflow.
285 -- /Note:/ Starting with version 0.4, Harpy automatically checks for
286 -- buffer overflow, so you do not need to call this function anymore.
287 ensureBufferSize :: Int -> CodeGen e s ()
288 ensureBufferSize needed =
289 do state <- getInternalState
290 case (customCodeBuffer (config state)) of
292 unless (bufferOfs state + needed + 5 <= bufferSize state)
293 (do let incrSize = max (needed + 16) (codeBufferSize (config state))
294 arr <- liftIO $ mallocBytes incrSize
296 let buf = buffer state
298 disp = arr `minusPtr` (buf `plusPtr` ofs) - 5
299 emit8 0xe9 -- FIXME: Machine dependent!
300 emit32 (fromIntegral disp)
301 st <- getInternalState
302 setInternalState st{buffer = arr, bufferList = bufferList st ++ [(buffer st, bufferOfs st)], bufferOfs = 0})
303 Just (_, _) -> checkBufferSize needed
305 -- | Return a pointer to the beginning of the first code buffer, which
306 -- is normally the entry point to the generated code.
307 getEntryPoint :: CodeGen e s (Ptr Word8)
309 CodeGen (\ _ (ustate, state) ->
310 return $ ((ustate, state), Right (firstBuffer state)))
312 -- | Return the current offset in the code buffer, e.g. the offset
313 -- at which the next instruction will be emitted.
314 getCodeOffset :: CodeGen e s Int
316 CodeGen (\ _ (ustate, state) ->
317 return $ ((ustate, state), Right (bufferOfs state)))
319 -- | Set the user state to the given value.
320 setState :: s -> CodeGen e s ()
322 CodeGen (\ _ (_, state) ->
323 return $ ((st, state), Right ()))
325 -- | Return the current user state.
326 getState :: CodeGen e s s
328 CodeGen (\ _ (ustate, state) ->
329 return $ ((ustate, state), Right (ustate)))
331 -- | Return the current user environment.
332 getEnv :: CodeGen e s e
334 CodeGen (\ (uenv, _) state ->
335 return $ (state, Right uenv))
337 -- | Set the environment to the given value and execute the given
338 -- code generation in this environment.
339 withEnv :: e -> CodeGen e s r -> CodeGen e s r
340 withEnv e (CodeGen cg) =
341 CodeGen (\ (_, env) state ->
344 -- | Set the user state to the given value.
345 setInternalState :: CodeGenState -> CodeGen e s ()
346 setInternalState st =
347 CodeGen (\ _ (ustate, _) ->
348 return $ ((ustate, st), Right ()))
350 -- | Return the current user state.
351 getInternalState :: CodeGen e s CodeGenState
353 CodeGen (\ _ (ustate, state) ->
354 return $ ((ustate, state), Right (state)))
356 -- | Return the pointer to the start of the code buffer.
357 getBasePtr :: CodeGen e s (Ptr Word8)
359 CodeGen (\ _ (ustate, state) ->
360 return $ ((ustate, state), Right (buffer state)))
362 -- | Return a list of all code buffers and their respective size
363 -- (i.e., actually used space for code, not allocated size).
364 getCodeBufferList :: CodeGen e s [(Ptr Word8, Int)]
365 getCodeBufferList = do st <- getInternalState
366 return $ bufferList st ++ [(buffer st, bufferOfs st)]
368 -- | Generate a new label to be used with the label operations
369 -- 'emitFixup' and 'defineLabel'.
370 newLabel :: CodeGen e s Label
372 do state <- getInternalState
373 let lab = nextLabel state
374 setInternalState state{nextLabel = lab + 1}
375 return (Label lab "")
377 -- | Generate a new label to be used with the label operations
378 -- 'emitFixup' and 'defineLabel'. The given name is used for
379 -- diagnostic purposes, and will appear in the disassembly.
380 newNamedLabel :: String -> CodeGen e s Label
382 do state <- getInternalState
383 let lab = nextLabel state
384 setInternalState state{nextLabel = lab + 1}
385 return (Label lab name)
387 -- | Generate a new label and define it at once
388 setLabel :: CodeGen e s Label
394 -- | Emit a relocation entry for the given offset, relocation kind
395 -- and target address.
396 emitRelocInfo :: Int -> RelocKind -> FunPtr a -> CodeGen e s ()
397 emitRelocInfo ofs knd addr =
398 do state <- getInternalState
399 setInternalState state{relocEntries =
402 address = castFunPtr addr} :
403 (relocEntries state)}
405 -- | Emit a byte value to the code buffer.
406 emit8 :: Word8 -> CodeGen e s ()
408 CodeGen (\ _ (ustate, state) ->
409 do let buf = buffer state
410 ptr = bufferOfs state
411 pokeByteOff buf ptr op
412 return $ ((ustate, state{bufferOfs = ptr + 1}), Right ()))
414 -- | Store a byte value at the given offset into the code buffer.
415 emit8At :: Int -> Word8 -> CodeGen e s ()
417 CodeGen (\ _ (ustate, state) ->
418 do let buf = buffer state
419 pokeByteOff buf pos op
420 return $ ((ustate, state), Right ()))
422 -- | Return the byte value at the given offset in the code buffer.
423 peek8At :: Int -> CodeGen e s Word8
425 CodeGen (\ _ (ustate, state) ->
426 do let buf = buffer state
427 b <- peekByteOff buf pos
428 return $ ((ustate, state), Right b))
430 -- | Like 'emit8', but for a 32-bit value.
431 emit32 :: Word32 -> CodeGen e s ()
433 CodeGen (\ _ (ustate, state) ->
434 do let buf = buffer state
435 ptr = bufferOfs state
436 pokeByteOff buf ptr op
437 return $ ((ustate, state{bufferOfs = ptr + 4}), Right ()))
439 -- | Like 'emit8At', but for a 32-bit value.
440 emit32At :: Int -> Word32 -> CodeGen e s ()
442 CodeGen (\ _ (ustate, state) ->
443 do let buf = buffer state
444 pokeByteOff buf pos op
445 return $ ((ustate, state), Right ()))
447 -- | Emit a label at the current offset in the code buffer. All
448 -- references to the label will be relocated to this offset.
449 defineLabel :: Label -> CodeGen e s ()
450 defineLabel (Label lab name) =
451 do state <- getInternalState
452 case Map.lookup lab (definedLabels state) of
453 Just _ -> failCodeGen $ text "duplicate definition of label" <+>
456 case Map.lookup lab (pendingFixups state) of
457 Just fixups -> do mapM_ (performFixup (buffer state) (bufferOfs state)) fixups
458 setInternalState state{pendingFixups = Map.delete lab (pendingFixups state)}
460 state1 <- getInternalState
461 setInternalState state1{definedLabels = Map.insert lab (buffer state1, bufferOfs state1, name) (definedLabels state1)}
463 performFixup :: Ptr Word8 -> Int -> FixupEntry -> CodeGen e s ()
464 performFixup labBuf labOfs (FixupEntry{fueBuffer = buf, fueOfs = ofs, fueKind = knd}) =
465 do let diff = (labBuf `plusPtr` labOfs) `minusPtr` (buf `plusPtr` ofs)
467 Fixup8 -> pokeByteOff buf ofs (fromIntegral diff - 1 :: Word8)
468 Fixup16 -> pokeByteOff buf ofs (fromIntegral diff - 2 :: Word16)
469 Fixup32 -> pokeByteOff buf ofs (fromIntegral diff - 4 :: Word32)
470 Fixup32Absolute -> pokeByteOff buf ofs (fromIntegral (ptrToWordPtr (labBuf `plusPtr` labOfs)) :: Word32)
474 -- | This operator gives neat syntax for defining labels. When @l@ is a label, the code
476 -- > l @@ mov eax ebx
478 -- associates the label l with the following @mov@ instruction.
479 (@@) :: Label -> CodeGen e s a -> CodeGen e s a
480 (@@) lab gen = do defineLabel lab
483 -- | Emit a fixup entry for the given label at the current offset in
484 -- the code buffer (unless the label is already defined).
485 -- The instruction at this offset will
486 -- be patched to target the address associated with this label when
487 -- it is defined later.
488 emitFixup :: Label -> Int -> FixupKind -> CodeGen e s ()
489 emitFixup (Label lab _) ofs knd =
490 do state <- getInternalState
491 let base = buffer state
492 ptr = bufferOfs state
493 fue = FixupEntry{fueBuffer = base,
496 case Map.lookup lab (definedLabels state) of
497 Just (labBuf, labOfs, _) -> performFixup labBuf labOfs fue
498 Nothing -> setInternalState state{pendingFixups = Map.insertWith (++) lab [fue] (pendingFixups state)}
500 -- | Return the address of a label, fail if the label is not yet defined.
501 labelAddress :: Label -> CodeGen e s (Ptr a)
502 labelAddress (Label lab name) = do
503 state <- getInternalState
504 case Map.lookup lab (definedLabels state) of
505 Just (labBuf, labOfs, _) -> return $ plusPtr labBuf labOfs
506 Nothing -> fail $ "Label " ++ show lab ++ "(" ++ name ++ ") not yet defined"
509 -- | Disassemble all code buffers. The result is a list of
510 -- disassembled instructions which can be converted to strings using
511 -- the 'Dis.showIntel' or 'Dis.showAtt' functions from module
512 -- "Harpy.X86Disassembler".
513 disassemble :: CodeGen e s [Dis.Instruction]
515 s <- getInternalState
516 let buffers = bufferList s
517 r <- mapM (\ (buff, len) -> do
518 r <- liftIO $ Dis.disassembleBlock buff len
520 Left err -> cgFail $ show err
521 Right instr -> return instr
522 ) $ buffers ++ [(buffer s, bufferOfs s)]
523 r' <- insertLabels (concat r)
525 where insertLabels :: [Dis.Instruction] -> CodeGen e s [Dis.Instruction]
526 insertLabels = liftM concat . mapM ins
527 ins :: Dis.Instruction -> CodeGen e s [Dis.Instruction]
528 ins i@(Dis.BadInstruction{}) = return [i]
529 ins i@(Dis.PseudoInstruction{}) = return [i]
530 ins i@(Dis.Instruction{Dis.address = addr}) =
531 do state <- getInternalState
532 let allLabs = Map.toList (definedLabels state)
533 labs = filter (\ (_, (buf, ofs, _)) -> fromIntegral (ptrToWordPtr (buf `plusPtr` ofs)) == addr) allLabs
534 createLabel (l, (buf, ofs, name)) = Dis.PseudoInstruction addr
537 "label " ++ show l ++
539 hex32 (fromIntegral (ptrToWordPtr (buf `plusPtr` ofs))) ++
541 _ -> name ++ ": [" ++
542 hex32 (fromIntegral (ptrToWordPtr (buf `plusPtr` ofs))) ++
544 return $ fmap createLabel labs ++ [i]
545 hex32 :: Int -> String
550 in take (8 - length s) (repeat '0') ++ s
554 callDecl :: String -> Q Type -> Q [Dec]
557 let (tvars, cxt, t) = case t0 of
558 ForallT vs c t' -> (vs, c, t')
561 let funptr = AppT (ConT $ mkName "FunPtr") t
562 let ioresult = addIO t
563 let ty = AppT (AppT ArrowT funptr) ioresult
564 dynName <- newName "conv"
565 let dyn = ForeignD $ ImportF CCall Safe "dynamic" dynName $ ForallT tvars cxt ty
567 cbody <- [| CodeGen (\env (ustate, state) ->
568 do let code = firstBuffer state
571 cast <- [|castPtrToFunPtr|]
572 let f = AppE (VarE dynName)
575 return $ LamE [VarP c] $ foldl AppE f $ map VarE vs
577 return $ ((ustate, state), Right res))|]
578 let call = ValD (VarP name) (NormalB $ LamE (map VarP vs) cbody) []
581 mkArgs (AppT (AppT ArrowT _from) to) = do
587 addIO (AppT t@(AppT ArrowT _from) to) = AppT t $ addIO to
588 addIO t = AppT (ConT $ mkName "IO") t
592 -- | Declare a stub function to call the code buffer. Arguments are the name
593 -- of the generated function, and the type the code buffer is supposed to have.
594 -- The type argument can be given using the [t| ... |] notation of Template Haskell.
595 -- Allowed types are the legal types for FFI functions.
596 callDecl :: String -> Q Type -> Q [Dec]