1 -----------------------------------------------------------------------------
3 -- Module : Text.Parsec.Prim
4 -- Copyright : (c) Daan Leijen 1999-2001, (c) Paolo Martini 2007
5 -- License : BSD-style (see the LICENSE file)
7 -- Maintainer : derek.a.elkins@gmail.com
8 -- Stability : provisional
9 -- Portability : portable
11 -- The primitive parser combinators.
13 -----------------------------------------------------------------------------
15 {-# LANGUAGE MultiParamTypeClasses, FunctionalDependencies, FlexibleContexts,
16 UndecidableInstances #-}
18 module Text.Parsec.Prim where
20 import qualified Control.Applicative as Applicative ( Applicative(..), Alternative(..) )
21 import Control.Monad()
22 import Control.Monad.Trans
23 import Control.Monad.Identity
25 import Control.Monad.Reader.Class
26 import Control.Monad.State.Class
27 import Control.Monad.Cont.Class
28 import Control.Monad.Error.Class
30 import Text.Parsec.Pos
31 import Text.Parsec.Error
33 unknownError :: State s u -> ParseError
34 unknownError state = newErrorUnknown (statePos state)
36 sysUnExpectError :: String -> SourcePos -> Reply s u a
37 sysUnExpectError msg pos = Error (newErrorMessage (SysUnExpect msg) pos)
39 -- | The parser @unexpected msg@ always fails with an unexpected error
40 -- message @msg@ without consuming any input.
42 -- The parsers 'fail', ('<?>') and @unexpected@ are the three parsers
43 -- used to generate error messages. Of these, only ('<?>') is commonly
44 -- used. For an example of the use of @unexpected@, see the definition
45 -- of 'Text.Parsec.Combinator.notFollowedBy'.
47 unexpected :: (Stream s m t) => String -> ParsecT s u m a
49 = ParsecT $ \s -> return $ Empty $ return $
50 Error (newErrorMessage (UnExpect msg) (statePos s))
52 -- | ParserT monad transformer and Parser type
54 -- | @ParsecT s u m a@ is a parser with stream type @s@, user state type @u@,
55 -- underlying monad @m@ and return type @a@
58 = ParsecT { runParsecT :: State s u -> m (Consumed (m (Reply s u a))) }
60 type Parsec s u = ParsecT s u Identity
62 data Consumed a = Consumed a
65 data Reply s u a = Ok !a !(State s u) ParseError
68 data State s u = State {
70 statePos :: !SourcePos,
74 instance Functor Consumed where
75 fmap f (Consumed x) = Consumed (f x)
76 fmap f (Empty x) = Empty (f x)
78 instance Functor (Reply s u) where
79 fmap f (Ok x s e) = Ok (f x) s e
80 fmap _ (Error e) = Error e -- XXX
82 instance (Monad m) => Functor (ParsecT s u m) where
83 fmap f p = parsecMap f p
85 parsecMap :: (Monad m) => (a -> b) -> ParsecT s u m a -> ParsecT s u m b
87 = ParsecT $ \s -> liftM (fmap (liftM (fmap f))) (runParsecT p s)
89 instance (Monad m) => Applicative.Applicative (ParsecT s u m) where
91 (<*>) = ap -- TODO: Can this be optimized?
93 instance (Monad m) => Applicative.Alternative (ParsecT s u m) where
97 instance (Monad m) => Monad (ParsecT s u m) where
98 return x = parserReturn x
99 p >>= f = parserBind p f
100 fail msg = parserFail msg
102 instance (MonadIO m) => MonadIO (ParsecT s u m) where
103 liftIO = lift . liftIO
105 instance (MonadReader r m) => MonadReader r (ParsecT s u m) where
107 local f p = ParsecT $ \s -> local f (runParsecT p s)
109 -- I'm presuming the user might want a separate, non-backtracking
110 -- state aside from the Parsec user state.
111 instance (MonadState s m) => MonadState s (ParsecT s' u m) where
115 instance (MonadCont m) => MonadCont (ParsecT s u m) where
116 callCC f = ParsecT $ \s ->
118 runParsecT (f (\a -> ParsecT $ \s' -> c (pack s' a))) s
120 where pack s a= Empty $ return (Ok a s (unknownError s))
122 instance (MonadError e m) => MonadError e (ParsecT s u m) where
123 throwError = lift . throwError
124 p `catchError` h = ParsecT $ \s ->
125 runParsecT p s `catchError` \e ->
128 parserReturn :: (Monad m) => a -> ParsecT s u m a
130 = ParsecT $ \s -> return $ Empty $ return (Ok x s (unknownError s))
132 parserBind :: (Monad m)
133 => ParsecT s u m a -> (a -> ParsecT s u m b) -> ParsecT s u m b
136 = ParsecT $ \s -> do -- TODO: This was \s@(State _ u _) ???
137 res1 <- runParsecT p s
141 -> do reply1 <- mReply1
144 res2 <- runParsecT (f x) s'
147 -> do reply2 <- mReply2
149 return $ mergeErrorReply err1 reply2
152 Error err1 -> return $ Empty $ return $ Error err1
155 -> do reply1 <- mReply1
156 return $ Consumed $ -- `early' returning
159 res2 <- runParsecT (f x) s'
162 -> do reply2 <- mReply2
163 return $ mergeErrorReply err1 reply2
164 Consumed reply2 -> reply2
165 Error err1 -> return $ Error err1
168 mergeErrorReply :: ParseError -> Reply s u a -> Reply s u a
169 mergeErrorReply err1 reply -- XXX where to put it?
171 Ok x state err2 -> Ok x state (mergeError err1 err2)
172 Error err2 -> Error (mergeError err1 err2)
174 parserFail :: (Monad m) => String -> ParsecT s u m a
176 = ParsecT $ \s -> return $ Empty $ return $
177 Error (newErrorMessage (Message msg) (statePos s))
179 instance (Monad m) => MonadPlus (ParsecT s u m) where
181 mplus p1 p2 = parserPlus p1 p2
183 -- | @parserZero@ always fails without consuming any input. @parserZero@ is defined
184 -- equal to the 'mzero' member of the 'MonadPlus' class and to the 'Control.Applicative.empty' member
185 -- of the 'Control.Applicative.Applicative' class.
187 parserZero :: (Monad m) => ParsecT s u m a
189 = ParsecT $ \s -> return $ Empty $ return $ Error (unknownError s)
191 parserPlus :: (Monad m)
192 => ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
193 parserPlus (ParsecT p1) (ParsecT p2)
204 -> do reply2 <- mReply2
205 return $ Empty $ return (mergeErrorReply err reply2)
208 other -> return $ Empty $ return $ other
209 other -> return $ other
211 instance MonadTrans (ParsecT s u) where
212 lift amb = ParsecT $ \s -> do
214 return $ Empty $ return $ Ok a s (unknownError s)
219 -- | The parser @p <?> msg@ behaves as parser @p@, but whenever the
220 -- parser @p@ fails /without consuming any input/, it replaces expect
221 -- error messages with the expect error message @msg@.
223 -- This is normally used at the end of a set alternatives where we want
224 -- to return an error message in terms of a higher level construct
225 -- rather than returning all possible characters. For example, if the
226 -- @expr@ parser from the 'try' example would fail, the error
227 -- message is: '...: expecting expression'. Without the @(\<?>)@
228 -- combinator, the message would be like '...: expecting \"let\" or
229 -- letter', which is less friendly.
232 => (ParsecT s u m a) -> String -> (ParsecT s u m a)
233 p <?> msg = label p msg
235 -- | This combinator implements choice. The parser @p \<|> q@ first
236 -- applies @p@. If it succeeds, the value of @p@ is returned. If @p@
237 -- fails /without consuming any input/, parser @q@ is tried. This
238 -- combinator is defined equal to the 'mplus' member of the 'MonadPlus'
239 -- class and the ('Control.Applicative.<|>') member of 'Control.Applicative.Alternative'.
241 -- The parser is called /predictive/ since @q@ is only tried when
242 -- parser @p@ didn't consume any input (i.e.. the look ahead is 1).
243 -- This non-backtracking behaviour allows for both an efficient
244 -- implementation of the parser combinators and the generation of good
248 => (ParsecT s u m a) -> (ParsecT s u m a) -> (ParsecT s u m a)
249 p1 <|> p2 = mplus p1 p2
251 label :: (Monad m) => ParsecT s u m a -> String -> ParsecT s u m a
255 labels :: (Monad m) => ParsecT s u m a -> [String] -> ParsecT s u m a
262 return $ Empty $ case reply of
264 -> return $ Error (setExpectErrors err msgs)
266 | errorIsUnknown err -> return $ reply
267 | otherwise -> return (Ok x s' (setExpectErrors err msgs))
268 other -> return $ other
270 setExpectErrors err [] = setErrorMessage (Expect "") err
271 setExpectErrors err [msg] = setErrorMessage (Expect msg) err
272 setExpectErrors err (msg:msgs)
273 = foldr (\msg' err' -> addErrorMessage (Expect msg') err')
274 (setErrorMessage (Expect msg) err) msgs
276 -- TODO: There should be a stronger statement that can be made about this
278 -- | An instance of @Stream@ has stream type @s@, underlying monad @m@ and token type @t@ determined by the stream
280 -- Some rough guidelines for a \"correct\" instance of Stream:
282 -- * unfoldM uncons gives the [t] corresponding to the stream
284 -- * A @Stream@ instance is responsible for maintaining the \"position within the stream\" in the stream state @s@. This is trivial unless you are using the monad in a non-trivial way.
286 class (Monad m) => Stream s m t | s -> t where
287 uncons :: s -> m (Maybe (t,s))
289 tokens :: (Stream s m t, Eq t)
290 => ([t] -> String) -- Pretty print a list of tokens
291 -> (SourcePos -> [t] -> SourcePos)
292 -> [t] -- List of tokens to parse
295 = ParsecT $ \s -> return $ Empty $ return $ Ok [] s (unknownError s)
296 tokens showTokens nextposs tts@(tok:toks)
297 = ParsecT $ \(State input pos u) ->
299 errEof = return $ Error (setErrorMessage (Expect (showTokens tts))
300 (newErrorMessage (SysUnExpect "") pos))
301 errExpect x = return $ Error (setErrorMessage (Expect (showTokens tts))
302 (newErrorMessage (SysUnExpect (showTokens [x])) pos))
303 walk [] rs = return (ok rs)
308 Just (x,xs) | t == x -> walk ts xs
309 | otherwise -> errExpect x
310 ok rs = let pos' = nextposs pos tts
312 in Ok tts s' (newErrorUnknown pos')
316 Nothing -> Empty $ errEof
318 | tok == x -> Consumed $ walk toks xs
319 | otherwise -> Empty $ errExpect x
321 -- | The parser @try p@ behaves like parser @p@, except that it
322 -- pretends that it hasn't consumed any input when an error occurs.
324 -- This combinator is used whenever arbitrary look ahead is needed.
325 -- Since it pretends that it hasn't consumed any input when @p@ fails,
326 -- the ('<|>') combinator will try its second alternative even when the
327 -- first parser failed while consuming input.
329 -- The @try@ combinator can for example be used to distinguish
330 -- identifiers and reserved words. Both reserved words and identifiers
331 -- are a sequence of letters. Whenever we expect a certain reserved
332 -- word where we can also expect an identifier we have to use the @try@
333 -- combinator. Suppose we write:
335 -- > expr = letExpr <|> identifier <?> "expression"
337 -- > letExpr = do{ string "let"; ... }
338 -- > identifier = many1 letter
340 -- If the user writes \"lexical\", the parser fails with: @unexpected
341 -- \'x\', expecting \'t\' in \"let\"@. Indeed, since the ('<|>') combinator
342 -- only tries alternatives when the first alternative hasn't consumed
343 -- input, the @identifier@ parser is never tried (because the prefix
344 -- \"le\" of the @string \"let\"@ parser is already consumed). The
345 -- right behaviour can be obtained by adding the @try@ combinator:
347 -- > expr = letExpr <|> identifier <?> "expression"
349 -- > letExpr = do{ try (string "let"); ... }
350 -- > identifier = many1 letter
352 try :: (Stream s m t) => ParsecT s u m a -> ParsecT s u m a
354 = ParsecT $ \s@(State _ pos _) -> do
357 Consumed rep -> do r <- rep
359 Error err -> return $ Empty $ return $ Error
360 (setErrorPos pos err)
361 ok -> return $ Consumed $ return $ ok
362 empty -> return $ empty
364 -- | The parser @token showTok posFromTok testTok@ accepts a token @t@
365 -- with result @x@ when the function @testTok t@ returns @'Just' x@. The
366 -- source position of the @t@ should be returned by @posFromTok t@ and
367 -- the token can be shown using @showTok t@.
369 -- This combinator is expressed in terms of 'tokenPrim'.
370 -- It is used to accept user defined token streams. For example,
371 -- suppose that we have a stream of basic tokens tupled with source
372 -- positions. We can than define a parser that accepts single tokens as:
375 -- > = token showTok posFromTok testTok
377 -- > showTok (pos,t) = show t
378 -- > posFromTok (pos,t) = pos
379 -- > testTok (pos,t) = if x == t then Just t else Nothing
381 token :: (Stream s Identity t)
382 => (t -> String) -- ^ Token pretty-printing function.
383 -> (t -> SourcePos) -- ^ Computes the position of a token.
384 -> (t -> Maybe a) -- ^ Matching function for the token to parse.
386 token showToken tokpos test = tokenPrim showToken nextpos test
388 nextpos _ tok ts = case runIdentity (uncons ts) of
389 Nothing -> tokpos tok
390 Just (tok',_) -> tokpos tok'
392 -- | The parser @token showTok nextPos testTok@ accepts a token @t@
393 -- with result @x@ when the function @testTok t@ returns @'Just' x@. The
394 -- token can be shown using @showTok t@. The position of the /next/
395 -- token should be returned when @nextPos@ is called with the current
396 -- source position @pos@, the current token @t@ and the rest of the
397 -- tokens @toks@, @nextPos pos t toks@.
399 -- This is the most primitive combinator for accepting tokens. For
400 -- example, the 'Text.Parsec.Char.char' parser could be implemented as:
403 -- > = tokenPrim showChar nextPos testChar
405 -- > showChar x = "'" ++ x ++ "'"
406 -- > testChar x = if x == c then Just x else Nothing
407 -- > nextPos pos x xs = updatePosChar pos x
409 tokenPrim :: (Stream s m t)
410 => (t -> String) -- ^ Token pretty-printing function.
411 -> (SourcePos -> t -> s -> SourcePos) -- ^ Next position calculating function.
412 -> (t -> Maybe a) -- ^ Matching function for the token to parse.
414 tokenPrim showToken nextpos test = tokenPrimEx showToken nextpos Nothing test
416 tokenPrimEx :: (Stream s m t)
418 -> (SourcePos -> t -> s -> SourcePos)
419 -> Maybe (SourcePos -> t -> s -> u -> u)
422 tokenPrimEx showToken nextpos mbNextState test
423 = case mbNextState of
425 -> ParsecT $ \(State input pos user) -> do
428 Nothing -> return $ Empty $ return (sysUnExpectError "" pos)
431 Just x -> let newpos = nextpos pos c cs
432 newstate = State cs newpos user
433 in seq newpos $ seq newstate $
434 return $ Consumed $ return $
435 (Ok x newstate (newErrorUnknown newpos))
436 Nothing -> return $ Empty $ return $
437 (sysUnExpectError (showToken c) pos)
439 -> ParsecT $ \(State input pos user) -> do
442 Nothing -> return $ Empty $ return (sysUnExpectError "" pos)
445 Just x -> let newpos = nextpos pos c cs
446 newuser = nextState pos c cs user
447 newstate = State cs newpos newuser
448 in seq newpos $ seq newstate $
449 return $ Consumed $ return $
450 (Ok x newstate (newErrorUnknown newpos))
451 Nothing -> return $ Empty $ return $
452 (sysUnExpectError (showToken c) pos)
454 -- | @many p@ applies the parser @p@ /zero/ or more times. Returns a
455 -- list of the returned values of @p@.
457 -- > identifier = do{ c <- letter
458 -- > ; cs <- many (alphaNum <|> char '_')
462 many :: (Stream s m t) => ParsecT s u m a -> ParsecT s u m [a]
464 = do xs <- manyAccum (:) p
467 -- | @skipMany p@ applies the parser @p@ /zero/ or more times, skipping
470 -- > spaces = skipMany space
472 skipMany :: (Stream s m t) => ParsecT s u m a -> ParsecT s u m ()
474 = do manyAccum (\_ _ -> []) p
477 manyAccum :: (Stream s m t)
487 -> do reply <- mReply
489 Error err -> return $ Ok xs state err
490 _ -> error "Text.Parsec.Prim.many: combinator 'many' is applied to a parser that accepts an empty string."
492 -> do reply <- mReply
495 -> return $ Error err
497 -> let ys = accum x xs
498 in seq ys (walk ys s' (runParsecT p s'))
499 in do r <- runParsecT p s
502 -> do reply <- mReply
505 -> error "Text.ParserCombinators.Parsec.Prim.many: combinator 'many' is applied to a parser that accepts an empty string."
507 -> return $ Empty $ return (Ok [] s err)
509 -> return $ Consumed $ walk [] s (return consumed)
512 -- < Running a parser: monadic (runPT) and pure (runP)
514 runPT :: (Stream s m t)
515 => ParsecT s u m a -> u -> SourceName -> s -> m (Either ParseError a)
517 = do res <- runParsecT p (State s (initialPos name) u)
520 Ok x _ _ -> return (Right x)
521 Error err -> return (Left err)
528 runP :: (Stream s Identity t)
529 => Parsec s u a -> u -> SourceName -> s -> Either ParseError a
530 runP p u name s = runIdentity $ runPT p u name s
532 -- | The most general way to run a parser. @runParserT p state filePath
533 -- input@ runs parser @p@ on the input list of tokens @input@,
534 -- obtained from source @filePath@ with the initial user state @st@.
535 -- The @filePath@ is only used in error messages and may be the empty
536 -- string. Returns a computation in the underlying monad @m@ that return either a 'ParseError' ('Left') or a
537 -- value of type @a@ ('Right').
539 runParserT :: (Stream s m t)
540 => ParsecT s u m a -> u -> SourceName -> s -> m (Either ParseError a)
543 -- | The most general way to run a parser over the Identity monad. @runParser p state filePath
544 -- input@ runs parser @p@ on the input list of tokens @input@,
545 -- obtained from source @filePath@ with the initial user state @st@.
546 -- The @filePath@ is only used in error messages and may be the empty
547 -- string. Returns either a 'ParseError' ('Left') or a
548 -- value of type @a@ ('Right').
550 -- > parseFromFile p fname
551 -- > = do{ input <- readFile fname
552 -- > ; return (runParser p () fname input)
555 runParser :: (Stream s Identity t)
556 => Parsec s u a -> u -> SourceName -> s -> Either ParseError a
559 -- | @parse p filePath input@ runs a parser @p@ over Identity without user
560 -- state. The @filePath@ is only used in error messages and may be the
561 -- empty string. Returns either a 'ParseError' ('Left')
562 -- or a value of type @a@ ('Right').
564 -- > main = case (parse numbers "" "11, 2, 43") of
565 -- > Left err -> print err
566 -- > Right xs -> print (sum xs)
568 -- > numbers = commaSep integer
570 parse :: (Stream s Identity t)
571 => Parsec s () a -> SourceName -> s -> Either ParseError a
574 -- | The expression @parseTest p input@ applies a parser @p@ against
575 -- input @input@ and prints the result to stdout. Used for testing
578 parseTest :: (Stream s Identity t, Show a)
579 => Parsec s () a -> s -> IO ()
581 = case parse p "" input of
582 Left err -> do putStr "parse error at "
586 -- < Parser state combinators
588 -- | Returns the current source position. See also 'SourcePos'.
590 getPosition :: (Monad m) => ParsecT s u m SourcePos
591 getPosition = do state <- getParserState
592 return (statePos state)
594 -- | Returns the current input
596 getInput :: (Monad m) => ParsecT s u m s
597 getInput = do state <- getParserState
598 return (stateInput state)
600 -- | @setPosition pos@ sets the current source position to @pos@.
602 setPosition :: (Monad m) => SourcePos -> ParsecT s u m ()
604 = do updateParserState (\(State input _ user) -> State input pos user)
607 -- | @setInput input@ continues parsing with @input@. The 'getInput' and
608 -- @setInput@ functions can for example be used to deal with #include
611 setInput :: (Monad m) => s -> ParsecT s u m ()
613 = do updateParserState (\(State _ pos user) -> State input pos user)
616 -- | Returns the full parser state as a 'State' record.
618 getParserState :: (Monad m) => ParsecT s u m (State s u)
619 getParserState = updateParserState id
621 -- | @setParserState st@ set the full parser state to @st@.
623 setParserState :: (Monad m) => State s u -> ParsecT s u m (State s u)
624 setParserState st = updateParserState (const st)
626 -- | @updateParserState f@ applies function @f@ to the parser state.
628 updateParserState :: (Monad m)
629 => (State s u -> State s u) -> ParsecT s u m (State s u)
631 = ParsecT $ \s -> let s' = f s
632 in return $ Empty $ return (Ok s' s' (unknownError s'))
634 -- < User state combinators
636 -- | Returns the current user state.
638 getState :: (Monad m) => ParsecT s u m u
639 getState = stateUser `liftM` getParserState
641 -- | @putState st@ set the user state to @st@.
643 putState :: (Monad m) => u -> ParsecT s u m ()
644 putState u = do updateParserState $ \s -> s { stateUser = u }
647 -- | @updateState f@ applies function @f@ to the user state. Suppose
648 -- that we want to count identifiers in a source, we could use the user
651 -- > expr = do{ x <- identifier
652 -- > ; updateState (+1)
656 modifyState :: (Monad m) => (u -> u) -> ParsecT s u m ()
657 modifyState f = do updateParserState $ \s -> s { stateUser = f (stateUser s) }
662 -- | An alias for putState for backwards compatibility.
664 setState :: (Monad m) => u -> ParsecT s u m ()
667 -- | An alias for modifyState for backwards compatibility.
669 updateState :: (Monad m) => (u -> u) -> ParsecT s u m ()
670 updateState = modifyState