3 --------------------------------------------------------------------------
5 -- Module: Harpy.CodeGenMonad
6 -- Copyright: (c) 2006-2007 Martin Grabmueller and Dirk Kleeblatt
9 -- Maintainer: {magr,klee}@cs.tu-berlin.de
10 -- Stability: provisional
11 -- Portability: portable (but generated code non-portable)
13 -- Monad for generating x86 machine code at runtime.
15 -- This is a combined reader-state-exception monad which handles all
16 -- the details of handling code buffers, emitting binary data,
19 -- All the code generation functions in module "Harpy.X86CodeGen" live
20 -- in this monad and use its error reporting facilities as well as the
21 -- internal state maintained by the monad.
23 -- The library user can pass a user environment and user state through
24 -- the monad. This state is independent from the internal state and
25 -- may be used by higher-level code generation libraries to maintain
26 -- their own state across code generation operations.
27 -- --------------------------------------------------------------------------
29 module Harpy.CodeGenMonad(
40 -- ** General code generator monad operations
42 -- ** Accessing code generation internals
47 -- ** Access to user state and environment
52 -- ** Label management
69 -- ** Executing code generation
72 -- ** Calling generated functions
74 -- ** Interface to disassembler
78 import qualified Harpy.X86Disassembler as Dis
82 import Text.PrettyPrint.HughesPJ
87 import qualified Data.Map as Map
89 import Foreign.C.Types
92 import Control.Monad.Trans
94 import Language.Haskell.TH.Syntax
97 -- | An error message produced by a code generation operation.
100 -- | The code generation monad, a combined reader-state-exception
102 newtype CodeGen e s a = CodeGen ((e, CodeGenEnv) -> (s, CodeGenState) -> IO ((s, CodeGenState), Either ErrMsg a))
104 -- | Configuration of the code generator. There are currently two
105 -- configuration options. The first is the number fo bytes to use for
106 -- allocating code buffers (the first as well as additional buffers
107 -- created in calls to 'ensureBufferSize'. The second allows to pass
108 -- in a pre-allocated code buffer and its size. When this option is
109 -- used, Harpy does not perform any code buffer resizing (calls to
110 -- 'ensureBufferSize' will be equivalent to calls to
111 -- 'checkBufferSize').
112 data CodeGenConfig = CodeGenConfig {
113 codeBufferSize :: Int, -- ^ Size of individual code buffer blocks.
114 customCodeBuffer :: Maybe (Ptr Word8, Int) -- ^ Code buffer passed in.
117 -- | Internal state of the code generator
118 data CodeGenState = CodeGenState {
119 buffer :: Ptr Word8, -- ^ Pointer to current code buffer.
120 bufferList :: [(Ptr Word8, Int)], -- ^ List of all other code buffers.
121 firstBuffer :: Ptr Word8, -- ^ Pointer to first buffer.
122 bufferOfs :: Int, -- ^ Current offset into buffer where next instruction will be stored.
123 bufferSize :: Int, -- ^ Size of current buffer.
124 relocEntries :: [Reloc], -- ^ List of all emitted relocation entries.
125 nextLabel :: Int, -- ^ Counter for generating labels.
126 definedLabels :: Map.Map Int (Ptr Word8, Int, String), -- ^ Map of already defined labels.
127 pendingFixups :: Map.Map Int [FixupEntry], -- ^ Map of labels which have been referenced, but not defined.
128 config :: CodeGenConfig -- ^ Configuration record.
131 data FixupEntry = FixupEntry {
132 fueBuffer :: Ptr Word8,
137 -- | Kind of a fixup entry. When a label is emitted with
138 -- 'defineLabel', all prior references to this label must be fixed
139 -- up. This data type tells how to perform the fixup operation.
140 data FixupKind = Fixup8 -- ^ 8-bit relative reference
141 | Fixup16 -- ^ 16-bit relative reference
142 | Fixup32 -- ^ 32-bit relative reference
143 | Fixup32Absolute -- ^ 32-bit absolute reference
146 data CodeGenEnv = CodeGenEnv { tailContext :: Bool }
149 -- | Kind of relocation, for example PC-relative
150 data RelocKind = RelocPCRel -- ^ PC-relative relocation
151 | RelocAbsolute -- ^ Absolute address
154 -- | Relocation entry
155 data Reloc = Reloc { offset :: Int,
156 -- ^ offset in code block which needs relocation
158 -- ^ kind of relocation
165 data Label = Label Int String
168 unCg :: CodeGen e s a -> ((e, CodeGenEnv) -> (s, CodeGenState) -> IO ((s, CodeGenState), Either ErrMsg a))
171 instance Monad (CodeGen e s) where
172 return x = cgReturn x
173 fail err = cgFail err
176 instance Functor (CodeGen e s) where
179 cgReturn :: a -> CodeGen e s a
180 cgReturn x = CodeGen (\_env state -> return (state, Right x))
182 cgFail :: String -> CodeGen e s a
183 cgFail err = CodeGen (\_env state -> return (state, Left (text err)))
185 cgBind :: CodeGen e s a -> (a -> CodeGen e s a1) -> CodeGen e s a1
186 cgBind m k = CodeGen (\env state ->
187 do r1 <- unCg m env state
189 (state', Left err) -> return (state', Left err)
190 (state', Right v) -> unCg (k v) env state')
192 -- | Abort code generation with the given error message.
193 failCodeGen :: Doc -> CodeGen e s a
194 failCodeGen d = CodeGen (\_env state -> return (state, Left d))
196 instance MonadIO (CodeGen e s) where
197 liftIO st = CodeGen (\_env state -> do { r <- st; return (state, Right r) })
199 emptyCodeGenState :: CodeGenState
200 emptyCodeGenState = CodeGenState { buffer = undefined,
202 firstBuffer = undefined,
207 definedLabels = Map.empty,
208 pendingFixups = Map.empty,
209 config = defaultCodeGenConfig}
211 -- | Default code generation configuration. The code buffer size is
212 -- set to 4KB, and code buffer management is automatic. This value is
213 -- intended to be used with record update syntax, for example:
215 -- > runCodeGenWithConfig ... defaultCodeGenConfig{codeBufferSize = 128} ...
216 defaultCodeGenConfig :: CodeGenConfig
217 defaultCodeGenConfig = CodeGenConfig { codeBufferSize = defaultCodeBufferSize,
218 customCodeBuffer = Nothing }
220 defaultCodeBufferSize :: Int
221 defaultCodeBufferSize = 4096
223 -- | Execute code generation, given a user environment and state. The
224 -- result is a tuple of the resulting user state and either an error
225 -- message (when code generation failed) or the result of the code
226 -- generation. This function runs 'runCodeGenWithConfig' with a
227 -- sensible default configuration.
228 runCodeGen :: CodeGen e s a -> e -> s -> IO (s, Either ErrMsg a)
229 runCodeGen cg uenv ustate =
230 runCodeGenWithConfig cg uenv ustate defaultCodeGenConfig
232 foreign import ccall "static stdlib.h"
233 memalign :: CUInt -> CUInt -> IO (Ptr a)
235 foreign import ccall "static sys/mman.h"
236 mprotect :: CUInt -> CUInt -> Int -> IO Int
238 mallocExecBytes :: Int -> IO (Ptr a)
239 mallocExecBytes size' = do
240 arr <- memalign 0x1000 size
241 -- 0x7 = PROT_{READ,WRITE,EXEC}
242 _ <- mprotect (fromIntegral $ ptrToIntPtr arr) size 0x7
244 where size = fromIntegral size'
246 -- | Like 'runCodeGen', but allows more control over the code
247 -- generation process. In addition to a code generator and a user
248 -- environment and state, a code generation configuration must be
249 -- provided. A code generation configuration allows control over the
250 -- allocation of code buffers, for example.
251 runCodeGenWithConfig :: CodeGen e s a -> e -> s -> CodeGenConfig -> IO (s, Either ErrMsg a)
252 runCodeGenWithConfig (CodeGen cg) uenv ustate conf =
253 do (buf, sze) <- case customCodeBuffer conf of
254 Nothing -> do let initSize = codeBufferSize conf
255 arr <- mallocExecBytes initSize
256 return (arr, initSize)
257 Just (buf, sze) -> return (buf, sze)
258 let env = CodeGenEnv {tailContext = True}
259 let state = emptyCodeGenState{buffer = buf,
264 ((ustate', _), res) <- cg (uenv, env) (ustate, state)
265 return (ustate', res)
267 -- | Check whether the code buffer has room for at least the given
268 -- number of bytes. This should be called by code generators
269 -- whenever it cannot be guaranteed that the code buffer is large
270 -- enough to hold all the generated code. Lets the code generation
271 -- monad fail when the buffer overflows.
273 -- /Note:/ Starting with version 0.4, Harpy automatically checks for
274 -- buffer overflow, so you do not need to call this function anymore.
275 checkBufferSize :: Int -> CodeGen e s ()
276 checkBufferSize needed =
277 do state <- getInternalState
278 unless (bufferOfs state + needed <= bufferSize state)
279 (failCodeGen (text "code generation buffer overflow: needed additional" <+>
280 int needed <+> text "bytes (offset =" <+>
281 int (bufferOfs state) <>
282 text ", buffer size =" <+>
283 int (bufferSize state) <> text ")"))
285 -- | Make sure that the code buffer has room for at least the given
286 -- number of bytes. This should be called by code generators whenever
287 -- it cannot be guaranteed that the code buffer is large enough to
288 -- hold all the generated code. Creates a new buffer and places a
289 -- jump to the new buffer when there is not sufficient space
290 -- available. When code generation was invoked with a pre-defined
291 -- code buffer, code generation is aborted on overflow.
293 -- /Note:/ Starting with version 0.4, Harpy automatically checks for
294 -- buffer overflow, so you do not need to call this function anymore.
295 ensureBufferSize :: Int -> CodeGen e s ()
296 ensureBufferSize needed =
297 do state <- getInternalState
298 case (customCodeBuffer (config state)) of
300 unless (bufferOfs state + needed + 5 <= bufferSize state)
301 (do let incrSize = max (needed + 16) (codeBufferSize (config state))
302 arr <- liftIO $ mallocExecBytes incrSize
304 let buf = buffer state
306 disp = arr `minusPtr` (buf `plusPtr` ofs) - 5
307 emit8 0xe9 -- FIXME: Machine dependent!
308 emit32 (fromIntegral disp)
309 st <- getInternalState
310 setInternalState st{buffer = arr, bufferList = bufferList st ++ [(buffer st, bufferOfs st)], bufferOfs = 0})
311 Just (_, _) -> checkBufferSize needed
313 -- | Return a pointer to the beginning of the first code buffer, which
314 -- is normally the entry point to the generated code.
315 getEntryPoint :: CodeGen e s (Ptr Word8)
317 CodeGen (\ _ (ustate, state) ->
318 return $ ((ustate, state), Right (firstBuffer state)))
320 -- | Return the current offset in the code buffer, e.g. the offset
321 -- at which the next instruction will be emitted.
322 getCodeOffset :: CodeGen e s Int
324 CodeGen (\ _ (ustate, state) ->
325 return $ ((ustate, state), Right (bufferOfs state)))
327 -- | Set the user state to the given value.
328 setState :: s -> CodeGen e s ()
330 CodeGen (\ _ (_, state) ->
331 return $ ((st, state), Right ()))
333 -- | Return the current user state.
334 getState :: CodeGen e s s
336 CodeGen (\ _ (ustate, state) ->
337 return $ ((ustate, state), Right (ustate)))
339 -- | Return the current user environment.
340 getEnv :: CodeGen e s e
342 CodeGen (\ (uenv, _) state ->
343 return $ (state, Right uenv))
345 -- | Set the environment to the given value and execute the given
346 -- code generation in this environment.
347 withEnv :: e -> CodeGen e s r -> CodeGen e s r
348 withEnv e (CodeGen cg) =
349 CodeGen (\ (_, env) state ->
352 -- | Set the user state to the given value.
353 setInternalState :: CodeGenState -> CodeGen e s ()
354 setInternalState st =
355 CodeGen (\ _ (ustate, _) ->
356 return $ ((ustate, st), Right ()))
358 -- | Return the current user state.
359 getInternalState :: CodeGen e s CodeGenState
361 CodeGen (\ _ (ustate, state) ->
362 return $ ((ustate, state), Right (state)))
364 -- | Return the pointer to the start of the code buffer.
365 getBasePtr :: CodeGen e s (Ptr Word8)
367 CodeGen (\ _ (ustate, state) ->
368 return $ ((ustate, state), Right (buffer state)))
370 -- | Return a list of all code buffers and their respective size
371 -- (i.e., actually used space for code, not allocated size).
372 getCodeBufferList :: CodeGen e s [(Ptr Word8, Int)]
373 getCodeBufferList = do st <- getInternalState
374 return $ bufferList st ++ [(buffer st, bufferOfs st)]
376 -- | Generate a new label to be used with the label operations
377 -- 'emitFixup' and 'defineLabel'.
378 newLabel :: CodeGen e s Label
380 do state <- getInternalState
381 let lab = nextLabel state
382 setInternalState state{nextLabel = lab + 1}
383 return (Label lab "")
385 -- | Generate a new label to be used with the label operations
386 -- 'emitFixup' and 'defineLabel'. The given name is used for
387 -- diagnostic purposes, and will appear in the disassembly.
388 newNamedLabel :: String -> CodeGen e s Label
390 do state <- getInternalState
391 let lab = nextLabel state
392 setInternalState state{nextLabel = lab + 1}
393 return (Label lab name)
395 -- | Generate a new label and define it at once
396 setLabel :: CodeGen e s Label
402 -- | Emit a relocation entry for the given offset, relocation kind
403 -- and target address.
404 emitRelocInfo :: Int -> RelocKind -> FunPtr a -> CodeGen e s ()
405 emitRelocInfo ofs knd addr =
406 do state <- getInternalState
407 setInternalState state{relocEntries =
410 address = castFunPtr addr} :
411 (relocEntries state)}
413 -- | Emit a byte value to the code buffer.
414 emit8 :: Word8 -> CodeGen e s ()
416 CodeGen (\ _ (ustate, state) ->
417 do let buf = buffer state
418 ptr = bufferOfs state
419 pokeByteOff buf ptr op
420 return $ ((ustate, state{bufferOfs = ptr + 1}), Right ()))
422 -- | Store a byte value at the given offset into the code buffer.
423 emit8At :: Int -> Word8 -> CodeGen e s ()
425 CodeGen (\ _ (ustate, state) ->
426 do let buf = buffer state
427 pokeByteOff buf pos op
428 return $ ((ustate, state), Right ()))
430 -- | Return the byte value at the given offset in the code buffer.
431 peek8At :: Int -> CodeGen e s Word8
433 CodeGen (\ _ (ustate, state) ->
434 do let buf = buffer state
435 b <- peekByteOff buf pos
436 return $ ((ustate, state), Right b))
438 -- | Like 'emit8', but for a 32-bit value.
439 emit32 :: Word32 -> CodeGen e s ()
441 CodeGen (\ _ (ustate, state) ->
442 do let buf = buffer state
443 ptr = bufferOfs state
444 pokeByteOff buf ptr op
445 return $ ((ustate, state{bufferOfs = ptr + 4}), Right ()))
447 -- | Like 'emit8At', but for a 32-bit value.
448 emit32At :: Int -> Word32 -> CodeGen e s ()
450 CodeGen (\ _ (ustate, state) ->
451 do let buf = buffer state
452 pokeByteOff buf pos op
453 return $ ((ustate, state), Right ()))
455 -- | Emit a label at the current offset in the code buffer. All
456 -- references to the label will be relocated to this offset.
457 defineLabel :: Label -> CodeGen e s ()
458 defineLabel (Label lab name) =
459 do state <- getInternalState
460 case Map.lookup lab (definedLabels state) of
461 Just _ -> failCodeGen $ text "duplicate definition of label" <+>
464 case Map.lookup lab (pendingFixups state) of
465 Just fixups -> do mapM_ (performFixup (buffer state) (bufferOfs state)) fixups
466 setInternalState state{pendingFixups = Map.delete lab (pendingFixups state)}
468 state1 <- getInternalState
469 setInternalState state1{definedLabels = Map.insert lab (buffer state1, bufferOfs state1, name) (definedLabels state1)}
471 performFixup :: Ptr Word8 -> Int -> FixupEntry -> CodeGen e s ()
472 performFixup labBuf labOfs (FixupEntry{fueBuffer = buf, fueOfs = ofs, fueKind = knd}) =
473 do let diff = (labBuf `plusPtr` labOfs) `minusPtr` (buf `plusPtr` ofs)
475 Fixup8 -> pokeByteOff buf ofs (fromIntegral diff - 1 :: Word8)
476 Fixup16 -> pokeByteOff buf ofs (fromIntegral diff - 2 :: Word16)
477 Fixup32 -> pokeByteOff buf ofs (fromIntegral diff - 4 :: Word32)
478 Fixup32Absolute -> pokeByteOff buf ofs (fromIntegral (ptrToWordPtr (labBuf `plusPtr` labOfs)) :: Word32)
482 -- | This operator gives neat syntax for defining labels. When @l@ is a label, the code
484 -- > l @@ mov eax ebx
486 -- associates the label l with the following @mov@ instruction.
487 (@@) :: Label -> CodeGen e s a -> CodeGen e s a
488 (@@) lab gen = do defineLabel lab
491 -- | Emit a fixup entry for the given label at the current offset in
492 -- the code buffer (unless the label is already defined).
493 -- The instruction at this offset will
494 -- be patched to target the address associated with this label when
495 -- it is defined later.
496 emitFixup :: Label -> Int -> FixupKind -> CodeGen e s ()
497 emitFixup (Label lab _) ofs knd =
498 do state <- getInternalState
499 let base = buffer state
500 ptr = bufferOfs state
501 fue = FixupEntry{fueBuffer = base,
504 case Map.lookup lab (definedLabels state) of
505 Just (labBuf, labOfs, _) -> performFixup labBuf labOfs fue
506 Nothing -> setInternalState state{pendingFixups = Map.insertWith (++) lab [fue] (pendingFixups state)}
508 -- | Return the address of a label, fail if the label is not yet defined.
509 labelAddress :: Label -> CodeGen e s (Ptr a)
510 labelAddress (Label lab name) = do
511 state <- getInternalState
512 case Map.lookup lab (definedLabels state) of
513 Just (labBuf, labOfs, _) -> return $ plusPtr labBuf labOfs
514 Nothing -> fail $ "Label " ++ show lab ++ "(" ++ name ++ ") not yet defined"
517 -- | Disassemble all code buffers. The result is a list of
518 -- disassembled instructions which can be converted to strings using
519 -- the 'Dis.showIntel' or 'Dis.showAtt' functions from module
520 -- "Harpy.X86Disassembler".
521 disassemble :: CodeGen e s [Dis.Instruction]
523 s <- getInternalState
524 let buffers = bufferList s
525 r <- mapM (\ (buff, len) -> do
526 r <- liftIO $ Dis.disassembleBlock buff len
528 Left err -> cgFail $ show err
529 Right instr -> return instr
530 ) $ buffers ++ [(buffer s, bufferOfs s)]
531 r' <- insertLabels (concat r)
533 where insertLabels :: [Dis.Instruction] -> CodeGen e s [Dis.Instruction]
534 insertLabels = liftM concat . mapM ins
535 ins :: Dis.Instruction -> CodeGen e s [Dis.Instruction]
536 ins i@(Dis.BadInstruction _ _ addr _) = insWithLabel i addr
537 ins i@(Dis.PseudoInstruction{}) = return [i]
538 ins i@(Dis.Instruction{Dis.address = addr}) = insWithLabel i addr
540 insWithLabel :: Dis.Instruction -> Int -> CodeGen e s [Dis.Instruction]
541 insWithLabel i addr =
542 do state <- getInternalState
543 let allLabs = Map.toList (definedLabels state)
544 labs = filter (\ (_, (buf, ofs, _)) -> fromIntegral (ptrToWordPtr (buf `plusPtr` ofs)) == addr) allLabs
545 createLabel (l, (buf, ofs, name)) = Dis.PseudoInstruction addr
548 "label " ++ show l ++
550 hex32 (fromIntegral (ptrToWordPtr (buf `plusPtr` ofs))) ++
552 _ -> name ++ ": [" ++
553 hex32 (fromIntegral (ptrToWordPtr (buf `plusPtr` ofs))) ++
555 return $ fmap createLabel labs ++ [i]
556 hex32 :: Int -> String
561 in take (8 - length s) (repeat '0') ++ s
565 callDecl :: String -> Q Type -> Q [Dec]
568 let (tvars, cxt, t) = case t0 of
569 ForallT vs c t' -> (vs, c, t')
572 let funptr = AppT (ConT $ mkName "FunPtr") t
573 let ioresult = addIO t
574 let ty = AppT (AppT ArrowT funptr) ioresult
575 dynName <- newName "conv"
576 let dyn = ForeignD $ ImportF CCall Safe "dynamic" dynName $ ForallT tvars cxt ty
578 cbody <- [| CodeGen (\env (ustate, state) ->
579 do let code = firstBuffer state
582 cast <- [|castPtrToFunPtr|]
583 let f = AppE (VarE dynName)
586 return $ LamE [VarP c] $ foldl AppE f $ map VarE vs
588 return $ ((ustate, state), Right res))|]
589 let call = ValD (VarP name) (NormalB $ LamE (map VarP vs) cbody) []
592 mkArgs (AppT (AppT ArrowT _from) to) = do
598 addIO (AppT t@(AppT ArrowT _from) to) = AppT t $ addIO to
599 addIO t = AppT (ConT $ mkName "IO") t
603 -- | Declare a stub function to call the code buffer. Arguments are the name
604 -- of the generated function, and the type the code buffer is supposed to have.
605 -- The type argument can be given using the [t| ... |] notation of Template Haskell.
606 -- Allowed types are the legal types for FFI functions.
607 callDecl :: String -> Q Type -> Q [Dec]