3 --------------------------------------------------------------------------
5 -- Module: Harpy.CodeGenMonad
6 -- Copyright: (c) 2006-2007 Martin Grabmueller and Dirk Kleeblatt
9 -- Maintainer: {magr,klee}@cs.tu-berlin.de
10 -- Stability: provisional
11 -- Portability: portable (but generated code non-portable)
13 -- Monad for generating x86 machine code at runtime.
15 -- This is a combined reader-state-exception monad which handles all
16 -- the details of handling code buffers, emitting binary data,
19 -- All the code generation functions in module "Harpy.X86CodeGen" live
20 -- in this monad and use its error reporting facilities as well as the
21 -- internal state maintained by the monad.
23 -- The library user can pass a user environment and user state through
24 -- the monad. This state is independent from the internal state and
25 -- may be used by higher-level code generation libraries to maintain
26 -- their own state across code generation operations.
27 -- --------------------------------------------------------------------------
29 module Harpy.CodeGenMonad(
40 -- ** General code generator monad operations
42 -- ** Accessing code generation internals
47 -- ** Access to user state and environment
52 -- ** Label management
69 -- ** Executing code generation
72 -- ** Calling generated functions
74 -- ** Interface to disassembler
78 import qualified Harpy.X86Disassembler as Dis
82 import Text.PrettyPrint.HughesPJ
87 import qualified Data.Map as Map
89 import Foreign.C.Types
92 import Control.Monad.Trans
94 import Language.Haskell.TH.Syntax
97 -- | An error message produced by a code generation operation.
100 -- | The code generation monad, a combined reader-state-exception
102 newtype CodeGen e s a = CodeGen ((e, CodeGenEnv) -> (s, CodeGenState) -> IO ((s, CodeGenState), Either ErrMsg a))
104 -- | Configuration of the code generator. There are currently two
105 -- configuration options. The first is the number fo bytes to use for
106 -- allocating code buffers (the first as well as additional buffers
107 -- created in calls to 'ensureBufferSize'. The second allows to pass
108 -- in a pre-allocated code buffer and its size. When this option is
109 -- used, Harpy does not perform any code buffer resizing (calls to
110 -- 'ensureBufferSize' will be equivalent to calls to
111 -- 'checkBufferSize').
112 data CodeGenConfig = CodeGenConfig {
113 codeBufferSize :: Int, -- ^ Size of individual code buffer blocks.
114 customCodeBuffer :: Maybe (Ptr Word8, Int) -- ^ Code buffer passed in.
117 -- | Internal state of the code generator
118 data CodeGenState = CodeGenState {
119 buffer :: Ptr Word8, -- ^ Pointer to current code buffer.
120 bufferList :: [(Ptr Word8, Int)], -- ^ List of all other code buffers.
121 firstBuffer :: Ptr Word8, -- ^ Pointer to first buffer.
122 bufferOfs :: Int, -- ^ Current offset into buffer where next instruction will be stored.
123 bufferSize :: Int, -- ^ Size of current buffer.
124 relocEntries :: [Reloc], -- ^ List of all emitted relocation entries.
125 nextLabel :: Int, -- ^ Counter for generating labels.
126 definedLabels :: Map.Map Int (Ptr Word8, Int, String), -- ^ Map of already defined labels.
127 pendingFixups :: Map.Map Int [FixupEntry], -- ^ Map of labels which have been referenced, but not defined.
128 config :: CodeGenConfig -- ^ Configuration record.
131 data FixupEntry = FixupEntry {
132 fueBuffer :: Ptr Word8,
137 -- | Kind of a fixup entry. When a label is emitted with
138 -- 'defineLabel', all prior references to this label must be fixed
139 -- up. This data type tells how to perform the fixup operation.
140 data FixupKind = Fixup8 -- ^ 8-bit relative reference
141 | Fixup16 -- ^ 16-bit relative reference
142 | Fixup32 -- ^ 32-bit relative reference
143 | Fixup32Absolute -- ^ 32-bit absolute reference
146 data CodeGenEnv = CodeGenEnv { tailContext :: Bool }
149 -- | Kind of relocation, for example PC-relative
150 data RelocKind = RelocPCRel -- ^ PC-relative relocation
151 | RelocAbsolute -- ^ Absolute address
154 -- | Relocation entry
155 data Reloc = Reloc { offset :: Int,
156 -- ^ offset in code block which needs relocation
158 -- ^ kind of relocation
165 data Label = Label Int String
168 unCg :: CodeGen e s a -> ((e, CodeGenEnv) -> (s, CodeGenState) -> IO ((s, CodeGenState), Either ErrMsg a))
171 instance Monad (CodeGen e s) where
172 return x = cgReturn x
173 fail err = cgFail err
176 cgReturn :: a -> CodeGen e s a
177 cgReturn x = CodeGen (\_env state -> return (state, Right x))
179 cgFail :: String -> CodeGen e s a
180 cgFail err = CodeGen (\_env state -> return (state, Left (text err)))
182 cgBind :: CodeGen e s a -> (a -> CodeGen e s a1) -> CodeGen e s a1
183 cgBind m k = CodeGen (\env state ->
184 do r1 <- unCg m env state
186 (state', Left err) -> return (state', Left err)
187 (state', Right v) -> unCg (k v) env state')
189 -- | Abort code generation with the given error message.
190 failCodeGen :: Doc -> CodeGen e s a
191 failCodeGen d = CodeGen (\_env state -> return (state, Left d))
193 instance MonadIO (CodeGen e s) where
194 liftIO st = CodeGen (\_env state -> do { r <- st; return (state, Right r) })
196 emptyCodeGenState :: CodeGenState
197 emptyCodeGenState = CodeGenState { buffer = undefined,
199 firstBuffer = undefined,
204 definedLabels = Map.empty,
205 pendingFixups = Map.empty,
206 config = defaultCodeGenConfig}
208 -- | Default code generation configuration. The code buffer size is
209 -- set to 4KB, and code buffer management is automatic. This value is
210 -- intended to be used with record update syntax, for example:
212 -- > runCodeGenWithConfig ... defaultCodeGenConfig{codeBufferSize = 128} ...
213 defaultCodeGenConfig :: CodeGenConfig
214 defaultCodeGenConfig = CodeGenConfig { codeBufferSize = defaultCodeBufferSize,
215 customCodeBuffer = Nothing }
217 defaultCodeBufferSize :: Int
218 defaultCodeBufferSize = 4096
220 -- | Execute code generation, given a user environment and state. The
221 -- result is a tuple of the resulting user state and either an error
222 -- message (when code generation failed) or the result of the code
223 -- generation. This function runs 'runCodeGenWithConfig' with a
224 -- sensible default configuration.
225 runCodeGen :: CodeGen e s a -> e -> s -> IO (s, Either ErrMsg a)
226 runCodeGen cg uenv ustate =
227 runCodeGenWithConfig cg uenv ustate defaultCodeGenConfig
229 foreign import ccall "static stdlib.h"
230 memalign :: CUInt -> CUInt -> IO (Ptr a)
232 foreign import ccall "static sys/mman.h"
233 mprotect :: CUInt -> CUInt -> Int -> IO Int
235 mallocExecBytes :: Int -> IO (Ptr a)
236 mallocExecBytes size' = do
237 arr <- memalign 0x1000 size
238 -- 0x7 = PROT_{READ,WRITE,EXEC}
239 _ <- mprotect (fromIntegral $ ptrToIntPtr arr) size 0x7
241 where size = fromIntegral size'
243 -- | Like 'runCodeGen', but allows more control over the code
244 -- generation process. In addition to a code generator and a user
245 -- environment and state, a code generation configuration must be
246 -- provided. A code generation configuration allows control over the
247 -- allocation of code buffers, for example.
248 runCodeGenWithConfig :: CodeGen e s a -> e -> s -> CodeGenConfig -> IO (s, Either ErrMsg a)
249 runCodeGenWithConfig (CodeGen cg) uenv ustate conf =
250 do (buf, sze) <- case customCodeBuffer conf of
251 Nothing -> do let initSize = codeBufferSize conf
252 arr <- mallocExecBytes initSize
253 return (arr, initSize)
254 Just (buf, sze) -> return (buf, sze)
255 let env = CodeGenEnv {tailContext = True}
256 let state = emptyCodeGenState{buffer = buf,
261 ((ustate', _), res) <- cg (uenv, env) (ustate, state)
262 return (ustate', res)
264 -- | Check whether the code buffer has room for at least the given
265 -- number of bytes. This should be called by code generators
266 -- whenever it cannot be guaranteed that the code buffer is large
267 -- enough to hold all the generated code. Lets the code generation
268 -- monad fail when the buffer overflows.
270 -- /Note:/ Starting with version 0.4, Harpy automatically checks for
271 -- buffer overflow, so you do not need to call this function anymore.
272 checkBufferSize :: Int -> CodeGen e s ()
273 checkBufferSize needed =
274 do state <- getInternalState
275 unless (bufferOfs state + needed <= bufferSize state)
276 (failCodeGen (text "code generation buffer overflow: needed additional" <+>
277 int needed <+> text "bytes (offset =" <+>
278 int (bufferOfs state) <>
279 text ", buffer size =" <+>
280 int (bufferSize state) <> text ")"))
282 -- | Make sure that the code buffer has room for at least the given
283 -- number of bytes. This should be called by code generators whenever
284 -- it cannot be guaranteed that the code buffer is large enough to
285 -- hold all the generated code. Creates a new buffer and places a
286 -- jump to the new buffer when there is not sufficient space
287 -- available. When code generation was invoked with a pre-defined
288 -- code buffer, code generation is aborted on overflow.
290 -- /Note:/ Starting with version 0.4, Harpy automatically checks for
291 -- buffer overflow, so you do not need to call this function anymore.
292 ensureBufferSize :: Int -> CodeGen e s ()
293 ensureBufferSize needed =
294 do state <- getInternalState
295 case (customCodeBuffer (config state)) of
297 unless (bufferOfs state + needed + 5 <= bufferSize state)
298 (do let incrSize = max (needed + 16) (codeBufferSize (config state))
299 arr <- liftIO $ mallocExecBytes incrSize
301 let buf = buffer state
303 disp = arr `minusPtr` (buf `plusPtr` ofs) - 5
304 emit8 0xe9 -- FIXME: Machine dependent!
305 emit32 (fromIntegral disp)
306 st <- getInternalState
307 setInternalState st{buffer = arr, bufferList = bufferList st ++ [(buffer st, bufferOfs st)], bufferOfs = 0})
308 Just (_, _) -> checkBufferSize needed
310 -- | Return a pointer to the beginning of the first code buffer, which
311 -- is normally the entry point to the generated code.
312 getEntryPoint :: CodeGen e s (Ptr Word8)
314 CodeGen (\ _ (ustate, state) ->
315 return $ ((ustate, state), Right (firstBuffer state)))
317 -- | Return the current offset in the code buffer, e.g. the offset
318 -- at which the next instruction will be emitted.
319 getCodeOffset :: CodeGen e s Int
321 CodeGen (\ _ (ustate, state) ->
322 return $ ((ustate, state), Right (bufferOfs state)))
324 -- | Set the user state to the given value.
325 setState :: s -> CodeGen e s ()
327 CodeGen (\ _ (_, state) ->
328 return $ ((st, state), Right ()))
330 -- | Return the current user state.
331 getState :: CodeGen e s s
333 CodeGen (\ _ (ustate, state) ->
334 return $ ((ustate, state), Right (ustate)))
336 -- | Return the current user environment.
337 getEnv :: CodeGen e s e
339 CodeGen (\ (uenv, _) state ->
340 return $ (state, Right uenv))
342 -- | Set the environment to the given value and execute the given
343 -- code generation in this environment.
344 withEnv :: e -> CodeGen e s r -> CodeGen e s r
345 withEnv e (CodeGen cg) =
346 CodeGen (\ (_, env) state ->
349 -- | Set the user state to the given value.
350 setInternalState :: CodeGenState -> CodeGen e s ()
351 setInternalState st =
352 CodeGen (\ _ (ustate, _) ->
353 return $ ((ustate, st), Right ()))
355 -- | Return the current user state.
356 getInternalState :: CodeGen e s CodeGenState
358 CodeGen (\ _ (ustate, state) ->
359 return $ ((ustate, state), Right (state)))
361 -- | Return the pointer to the start of the code buffer.
362 getBasePtr :: CodeGen e s (Ptr Word8)
364 CodeGen (\ _ (ustate, state) ->
365 return $ ((ustate, state), Right (buffer state)))
367 -- | Return a list of all code buffers and their respective size
368 -- (i.e., actually used space for code, not allocated size).
369 getCodeBufferList :: CodeGen e s [(Ptr Word8, Int)]
370 getCodeBufferList = do st <- getInternalState
371 return $ bufferList st ++ [(buffer st, bufferOfs st)]
373 -- | Generate a new label to be used with the label operations
374 -- 'emitFixup' and 'defineLabel'.
375 newLabel :: CodeGen e s Label
377 do state <- getInternalState
378 let lab = nextLabel state
379 setInternalState state{nextLabel = lab + 1}
380 return (Label lab "")
382 -- | Generate a new label to be used with the label operations
383 -- 'emitFixup' and 'defineLabel'. The given name is used for
384 -- diagnostic purposes, and will appear in the disassembly.
385 newNamedLabel :: String -> CodeGen e s Label
387 do state <- getInternalState
388 let lab = nextLabel state
389 setInternalState state{nextLabel = lab + 1}
390 return (Label lab name)
392 -- | Generate a new label and define it at once
393 setLabel :: CodeGen e s Label
399 -- | Emit a relocation entry for the given offset, relocation kind
400 -- and target address.
401 emitRelocInfo :: Int -> RelocKind -> FunPtr a -> CodeGen e s ()
402 emitRelocInfo ofs knd addr =
403 do state <- getInternalState
404 setInternalState state{relocEntries =
407 address = castFunPtr addr} :
408 (relocEntries state)}
410 -- | Emit a byte value to the code buffer.
411 emit8 :: Word8 -> CodeGen e s ()
413 CodeGen (\ _ (ustate, state) ->
414 do let buf = buffer state
415 ptr = bufferOfs state
416 pokeByteOff buf ptr op
417 return $ ((ustate, state{bufferOfs = ptr + 1}), Right ()))
419 -- | Store a byte value at the given offset into the code buffer.
420 emit8At :: Int -> Word8 -> CodeGen e s ()
422 CodeGen (\ _ (ustate, state) ->
423 do let buf = buffer state
424 pokeByteOff buf pos op
425 return $ ((ustate, state), Right ()))
427 -- | Return the byte value at the given offset in the code buffer.
428 peek8At :: Int -> CodeGen e s Word8
430 CodeGen (\ _ (ustate, state) ->
431 do let buf = buffer state
432 b <- peekByteOff buf pos
433 return $ ((ustate, state), Right b))
435 -- | Like 'emit8', but for a 32-bit value.
436 emit32 :: Word32 -> CodeGen e s ()
438 CodeGen (\ _ (ustate, state) ->
439 do let buf = buffer state
440 ptr = bufferOfs state
441 pokeByteOff buf ptr op
442 return $ ((ustate, state{bufferOfs = ptr + 4}), Right ()))
444 -- | Like 'emit8At', but for a 32-bit value.
445 emit32At :: Int -> Word32 -> CodeGen e s ()
447 CodeGen (\ _ (ustate, state) ->
448 do let buf = buffer state
449 pokeByteOff buf pos op
450 return $ ((ustate, state), Right ()))
452 -- | Emit a label at the current offset in the code buffer. All
453 -- references to the label will be relocated to this offset.
454 defineLabel :: Label -> CodeGen e s ()
455 defineLabel (Label lab name) =
456 do state <- getInternalState
457 case Map.lookup lab (definedLabels state) of
458 Just _ -> failCodeGen $ text "duplicate definition of label" <+>
461 case Map.lookup lab (pendingFixups state) of
462 Just fixups -> do mapM_ (performFixup (buffer state) (bufferOfs state)) fixups
463 setInternalState state{pendingFixups = Map.delete lab (pendingFixups state)}
465 state1 <- getInternalState
466 setInternalState state1{definedLabels = Map.insert lab (buffer state1, bufferOfs state1, name) (definedLabels state1)}
468 performFixup :: Ptr Word8 -> Int -> FixupEntry -> CodeGen e s ()
469 performFixup labBuf labOfs (FixupEntry{fueBuffer = buf, fueOfs = ofs, fueKind = knd}) =
470 do let diff = (labBuf `plusPtr` labOfs) `minusPtr` (buf `plusPtr` ofs)
472 Fixup8 -> pokeByteOff buf ofs (fromIntegral diff - 1 :: Word8)
473 Fixup16 -> pokeByteOff buf ofs (fromIntegral diff - 2 :: Word16)
474 Fixup32 -> pokeByteOff buf ofs (fromIntegral diff - 4 :: Word32)
475 Fixup32Absolute -> pokeByteOff buf ofs (fromIntegral (ptrToWordPtr (labBuf `plusPtr` labOfs)) :: Word32)
479 -- | This operator gives neat syntax for defining labels. When @l@ is a label, the code
481 -- > l @@ mov eax ebx
483 -- associates the label l with the following @mov@ instruction.
484 (@@) :: Label -> CodeGen e s a -> CodeGen e s a
485 (@@) lab gen = do defineLabel lab
488 -- | Emit a fixup entry for the given label at the current offset in
489 -- the code buffer (unless the label is already defined).
490 -- The instruction at this offset will
491 -- be patched to target the address associated with this label when
492 -- it is defined later.
493 emitFixup :: Label -> Int -> FixupKind -> CodeGen e s ()
494 emitFixup (Label lab _) ofs knd =
495 do state <- getInternalState
496 let base = buffer state
497 ptr = bufferOfs state
498 fue = FixupEntry{fueBuffer = base,
501 case Map.lookup lab (definedLabels state) of
502 Just (labBuf, labOfs, _) -> performFixup labBuf labOfs fue
503 Nothing -> setInternalState state{pendingFixups = Map.insertWith (++) lab [fue] (pendingFixups state)}
505 -- | Return the address of a label, fail if the label is not yet defined.
506 labelAddress :: Label -> CodeGen e s (Ptr a)
507 labelAddress (Label lab name) = do
508 state <- getInternalState
509 case Map.lookup lab (definedLabels state) of
510 Just (labBuf, labOfs, _) -> return $ plusPtr labBuf labOfs
511 Nothing -> fail $ "Label " ++ show lab ++ "(" ++ name ++ ") not yet defined"
514 -- | Disassemble all code buffers. The result is a list of
515 -- disassembled instructions which can be converted to strings using
516 -- the 'Dis.showIntel' or 'Dis.showAtt' functions from module
517 -- "Harpy.X86Disassembler".
518 disassemble :: CodeGen e s [Dis.Instruction]
520 s <- getInternalState
521 let buffers = bufferList s
522 r <- mapM (\ (buff, len) -> do
523 r <- liftIO $ Dis.disassembleBlock buff len
525 Left err -> cgFail $ show err
526 Right instr -> return instr
527 ) $ buffers ++ [(buffer s, bufferOfs s)]
528 r' <- insertLabels (concat r)
530 where insertLabels :: [Dis.Instruction] -> CodeGen e s [Dis.Instruction]
531 insertLabels = liftM concat . mapM ins
532 ins :: Dis.Instruction -> CodeGen e s [Dis.Instruction]
533 ins i@(Dis.BadInstruction _ _ addr _) = insWithLabel i addr
534 ins i@(Dis.PseudoInstruction{}) = return [i]
535 ins i@(Dis.Instruction{Dis.address = addr}) = insWithLabel i addr
537 insWithLabel :: Dis.Instruction -> Int -> CodeGen e s [Dis.Instruction]
538 insWithLabel i addr =
539 do state <- getInternalState
540 let allLabs = Map.toList (definedLabels state)
541 labs = filter (\ (_, (buf, ofs, _)) -> fromIntegral (ptrToWordPtr (buf `plusPtr` ofs)) == addr) allLabs
542 createLabel (l, (buf, ofs, name)) = Dis.PseudoInstruction addr
545 "label " ++ show l ++
547 hex32 (fromIntegral (ptrToWordPtr (buf `plusPtr` ofs))) ++
549 _ -> name ++ ": [" ++
550 hex32 (fromIntegral (ptrToWordPtr (buf `plusPtr` ofs))) ++
552 return $ fmap createLabel labs ++ [i]
553 hex32 :: Int -> String
558 in take (8 - length s) (repeat '0') ++ s
562 callDecl :: String -> Q Type -> Q [Dec]
565 let (tvars, cxt, t) = case t0 of
566 ForallT vs c t' -> (vs, c, t')
569 let funptr = AppT (ConT $ mkName "FunPtr") t
570 let ioresult = addIO t
571 let ty = AppT (AppT ArrowT funptr) ioresult
572 dynName <- newName "conv"
573 let dyn = ForeignD $ ImportF CCall Safe "dynamic" dynName $ ForallT tvars cxt ty
575 cbody <- [| CodeGen (\env (ustate, state) ->
576 do let code = firstBuffer state
579 cast <- [|castPtrToFunPtr|]
580 let f = AppE (VarE dynName)
583 return $ LamE [VarP c] $ foldl AppE f $ map VarE vs
585 return $ ((ustate, state), Right res))|]
586 let call = ValD (VarP name) (NormalB $ LamE (map VarP vs) cbody) []
589 mkArgs (AppT (AppT ArrowT _from) to) = do
595 addIO (AppT t@(AppT ArrowT _from) to) = AppT t $ addIO to
596 addIO t = AppT (ConT $ mkName "IO") t
600 -- | Declare a stub function to call the code buffer. Arguments are the name
601 -- of the generated function, and the type the code buffer is supposed to have.
602 -- The type argument can be given using the [t| ... |] notation of Template Haskell.
603 -- Allowed types are the legal types for FFI functions.
604 callDecl :: String -> Q Type -> Q [Dec]