3a_asm/Text/Parsec/Token.hs

   1 -----------------------------------------------------------------------------
   2 -- |
   3 -- Module      :  Text.Parsec.Token
   4 -- Copyright   :  (c) Daan Leijen 1999-2001, (c) Paolo Martini 2007
   5 -- License     :  BSD-style (see the LICENSE file)
   6 --
   7 -- Maintainer  :  derek.a.elkins@gmail.com
   8 -- Stability   :  provisional
   9 -- Portability :  non-portable (uses local universal quantification: PolymorphicComponents)
  10 --
  11 -- A helper module to parse lexical elements (tokens). See 'makeTokenParser'
  12 -- for a description of how to use it.
  13 --
  14 -----------------------------------------------------------------------------
  15
  16 {-# LANGUAGE PolymorphicComponents #-}
  17 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
  18 {-# OPTIONS_GHC -XFlexibleContexts #-}
  19
  20 module Text.Parsec.Token
  21     ( LanguageDef
  22     , GenLanguageDef (..)
  23     , TokenParser
  24     , GenTokenParser (..)
  25     , makeTokenParser
  26     ) where
  27
  28 import Data.Char ( isAlpha, toLower, toUpper, isSpace, digitToInt )
  29 import Data.List ( nub, sort )
  30 import Control.Monad.Identity
  31 import Text.Parsec.Prim
  32 import Text.Parsec.Char
  33 import Text.Parsec.Combinator
  34
  35 -----------------------------------------------------------
  36 -- Language Definition
  37 -----------------------------------------------------------
  38
  39 type LanguageDef st = GenLanguageDef String st Identity
  40
  41 -- | The @GenLanguageDef@ type is a record that contains all parameterizable
  42 -- features of the 'Text.Parsec.Token' module. The module 'Text.Parsec.Language'
  43 -- contains some default definitions.
  44
  45 data GenLanguageDef s u m
  46     = LanguageDef {
  47
  48     -- | Describes the start of a block comment. Use the empty string if the
  49     -- language doesn't support block comments. For example \"\/*\".
  50
  51     commentStart   :: String,
  52
  53     -- | Describes the end of a block comment. Use the empty string if the
  54     -- language doesn't support block comments. For example \"*\/\".
  55
  56     commentEnd     :: String,
  57
  58     -- | Describes the start of a line comment. Use the empty string if the
  59     -- language doesn't support line comments. For example \"\/\/\".
  60
  61     commentLine    :: String,
  62
  63     -- | Set to 'True' if the language supports nested block comments.
  64
  65     nestedComments :: Bool,
  66
  67     -- | This parser should accept any start characters of identifiers. For
  68     -- example @letter \<|> char \"_\"@.
  69
  70     identStart     :: ParsecT s u m Char,
  71
  72     -- | This parser should accept any legal tail characters of identifiers.
  73     -- For example @alphaNum \<|> char \"_\"@.
  74
  75     identLetter    :: ParsecT s u m Char,
  76
  77     -- | This parser should accept any start characters of operators. For
  78     -- example @oneOf \":!#$%&*+.\/\<=>?\@\\\\^|-~\"@
  79
  80     opStart        :: ParsecT s u m Char,
  81
  82     -- | This parser should accept any legal tail characters of operators.
  83     -- Note that this parser should even be defined if the language doesn't
  84     -- support user-defined operators, or otherwise the 'reservedOp'
  85     -- parser won't work correctly.
  86
  87     opLetter       :: ParsecT s u m Char,
  88
  89     -- | The list of reserved identifiers.
  90
  91     reservedNames  :: [String],
  92
  93     -- | The list of reserved operators.
  94
  95     reservedOpNames:: [String],
  96
  97     -- | Set to 'True' if the language is case sensitive.
  98
  99     caseSensitive  :: Bool
 100
 101     }
 102
 103 -----------------------------------------------------------
 104 -- A first class module: TokenParser
 105 -----------------------------------------------------------
 106
 107 type TokenParser st = GenTokenParser String st Identity
 108
 109 -- | The type of the record that holds lexical parsers that work on
 110 -- @s@ streams with state @u@ over a monad @m@.
 111
 112 data GenTokenParser s u m
 113     = TokenParser {
 114
 115         -- | This lexeme parser parses a legal identifier. Returns the identifier
 116         -- string. This parser will fail on identifiers that are reserved
 117         -- words. Legal identifier (start) characters and reserved words are
 118         -- defined in the 'LanguageDef' that is passed to
 119         -- 'makeTokenParser'. An @identifier@ is treated as
 120         -- a single token using 'try'.
 121
 122         identifier       :: ParsecT s u m String,
 123
 124         -- | The lexeme parser @reserved name@ parses @symbol
 125         -- name@, but it also checks that the @name@ is not a prefix of a
 126         -- valid identifier. A @reserved@ word is treated as a single token
 127         -- using 'try'.
 128
 129         reserved         :: String -> ParsecT s u m (),
 130
 131         -- | This lexeme parser parses a legal operator. Returns the name of the
 132         -- operator. This parser will fail on any operators that are reserved
 133         -- operators. Legal operator (start) characters and reserved operators
 134         -- are defined in the 'LanguageDef' that is passed to
 135         -- 'makeTokenParser'. An @operator@ is treated as a
 136         -- single token using 'try'.
 137
 138         operator         :: ParsecT s u m String,
 139
 140         -- |The lexeme parser @reservedOp name@ parses @symbol
 141         -- name@, but it also checks that the @name@ is not a prefix of a
 142         -- valid operator. A @reservedOp@ is treated as a single token using
 143         -- 'try'.
 144
 145         reservedOp       :: String -> ParsecT s u m (),
 146
 147
 148         -- | This lexeme parser parses a single literal character. Returns the
 149         -- literal character value. This parsers deals correctly with escape
 150         -- sequences. The literal character is parsed according to the grammar
 151         -- rules defined in the Haskell report (which matches most programming
 152         -- languages quite closely).
 153
 154         charLiteral      :: ParsecT s u m Char,
 155
 156         -- | This lexeme parser parses a literal string. Returns the literal
 157         -- string value. This parsers deals correctly with escape sequences and
 158         -- gaps. The literal string is parsed according to the grammar rules
 159         -- defined in the Haskell report (which matches most programming
 160         -- languages quite closely).
 161
 162         stringLiteral    :: ParsecT s u m String,
 163
 164         -- | This lexeme parser parses a natural number (a positive whole
 165         -- number). Returns the value of the number. The number can be
 166         -- specified in 'decimal', 'hexadecimal' or
 167         -- 'octal'. The number is parsed according to the grammar
 168         -- rules in the Haskell report.
 169
 170         natural          :: ParsecT s u m Integer,
 171
 172         -- | This lexeme parser parses an integer (a whole number). This parser
 173         -- is like 'natural' except that it can be prefixed with
 174         -- sign (i.e. \'-\' or \'+\'). Returns the value of the number. The
 175         -- number can be specified in 'decimal', 'hexadecimal'
 176         -- or 'octal'. The number is parsed according
 177         -- to the grammar rules in the Haskell report.
 178
 179         integer          :: ParsecT s u m Integer,
 180
 181         -- | This lexeme parser parses a floating point value. Returns the value
 182         -- of the number. The number is parsed according to the grammar rules
 183         -- defined in the Haskell report.
 184
 185         float            :: ParsecT s u m Double,
 186
 187         -- | This lexeme parser parses either 'natural' or a 'float'.
 188         -- Returns the value of the number. This parsers deals with
 189         -- any overlap in the grammar rules for naturals and floats. The number
 190         -- is parsed according to the grammar rules defined in the Haskell report.
 191
 192         naturalOrFloat   :: ParsecT s u m (Either Integer Double),
 193
 194         -- | Parses a positive whole number in the decimal system. Returns the
 195         -- value of the number.
 196
 197         decimal          :: ParsecT s u m Integer,
 198
 199         -- | Parses a positive whole number in the hexadecimal system. The number
 200         -- should be prefixed with \"0x\" or \"0X\". Returns the value of the
 201         -- number.
 202
 203         hexadecimal      :: ParsecT s u m Integer,
 204
 205         -- | Parses a positive whole number in the octal system. The number
 206         -- should be prefixed with \"0o\" or \"0O\". Returns the value of the
 207         -- number.
 208
 209         octal            :: ParsecT s u m Integer,
 210
 211         -- | Lexeme parser @symbol s@ parses 'string' @s@ and skips
 212         -- trailing white space.
 213
 214         symbol           :: String -> ParsecT s u m String,
 215
 216         -- | @lexeme p@ first applies parser @p@ and than the 'whiteSpace'
 217         -- parser, returning the value of @p@. Every lexical
 218         -- token (lexeme) is defined using @lexeme@, this way every parse
 219         -- starts at a point without white space. Parsers that use @lexeme@ are
 220         -- called /lexeme/ parsers in this document.
 221         --
 222         -- The only point where the 'whiteSpace' parser should be
 223         -- called explicitly is the start of the main parser in order to skip
 224         -- any leading white space.
 225         --
 226         -- >    mainParser  = do{ whiteSpace
 227         -- >                     ; ds <- many (lexeme digit)
 228         -- >                     ; eof
 229         -- >                     ; return (sum ds)
 230         -- >                     }
 231
 232         lexeme           :: forall a. ParsecT s u m a -> ParsecT s u m a,
 233
 234         -- | Parses any white space. White space consists of /zero/ or more
 235         -- occurrences of a 'space', a line comment or a block (multi
 236         -- line) comment. Block comments may be nested. How comments are
 237         -- started and ended is defined in the 'LanguageDef'
 238         -- that is passed to 'makeTokenParser'.
 239
 240         whiteSpace       :: ParsecT s u m (),
 241
 242         -- | Lexeme parser @parens p@ parses @p@ enclosed in parenthesis,
 243         -- returning the value of @p@.
 244
 245         parens           :: forall a. ParsecT s u m a -> ParsecT s u m a,
 246
 247         -- | Lexeme parser @braces p@ parses @p@ enclosed in braces (\'{\' and
 248         -- \'}\'), returning the value of @p@.
 249
 250         braces           :: forall a. ParsecT s u m a -> ParsecT s u m a,
 251
 252         -- | Lexeme parser @angles p@ parses @p@ enclosed in angle brackets (\'\<\'
 253         -- and \'>\'), returning the value of @p@.
 254
 255         angles           :: forall a. ParsecT s u m a -> ParsecT s u m a,
 256
 257         -- | Lexeme parser @brackets p@ parses @p@ enclosed in brackets (\'[\'
 258         -- and \']\'), returning the value of @p@.
 259
 260         brackets         :: forall a. ParsecT s u m a -> ParsecT s u m a,
 261
 262         -- | DEPRECATED: Use 'brackets'.
 263
 264         squares          :: forall a. ParsecT s u m a -> ParsecT s u m a,
 265
 266         -- | Lexeme parser |semi| parses the character \';\' and skips any
 267         -- trailing white space. Returns the string \";\".
 268
 269         semi             :: ParsecT s u m String,
 270
 271         -- | Lexeme parser @comma@ parses the character \',\' and skips any
 272         -- trailing white space. Returns the string \",\".
 273
 274         comma            :: ParsecT s u m String,
 275
 276         -- | Lexeme parser @colon@ parses the character \':\' and skips any
 277         -- trailing white space. Returns the string \":\".
 278
 279         colon            :: ParsecT s u m String,
 280
 281         -- | Lexeme parser @dot@ parses the character \'.\' and skips any
 282         -- trailing white space. Returns the string \".\".
 283
 284         dot              :: ParsecT s u m String,
 285
 286         -- | Lexeme parser @semiSep p@ parses /zero/ or more occurrences of @p@
 287         -- separated by 'semi'. Returns a list of values returned by
 288         -- @p@.
 289
 290         semiSep          :: forall a . ParsecT s u m a -> ParsecT s u m [a],
 291
 292         -- | Lexeme parser @semiSep1 p@ parses /one/ or more occurrences of @p@
 293         -- separated by 'semi'. Returns a list of values returned by @p@.
 294
 295         semiSep1         :: forall a . ParsecT s u m a -> ParsecT s u m [a],
 296
 297         -- | Lexeme parser @commaSep p@ parses /zero/ or more occurrences of
 298         -- @p@ separated by 'comma'. Returns a list of values returned
 299         -- by @p@.
 300
 301         commaSep         :: forall a . ParsecT s u m a -> ParsecT s u m [a],
 302
 303         -- | Lexeme parser @commaSep1 p@ parses /one/ or more occurrences of
 304         -- @p@ separated by 'comma'. Returns a list of values returned
 305         -- by @p@.
 306
 307         commaSep1        :: forall a . ParsecT s u m a -> ParsecT s u m [a]
 308     }
 309
 310 -----------------------------------------------------------
 311 -- Given a LanguageDef, create a token parser.
 312 -----------------------------------------------------------
 313
 314 -- | The expression @makeTokenParser language@ creates a 'GenTokenParser'
 315 -- record that contains lexical parsers that are
 316 -- defined using the definitions in the @language@ record.
 317 --
 318 -- The use of this function is quite stylized - one imports the
 319 -- appropiate language definition and selects the lexical parsers that
 320 -- are needed from the resulting 'GenTokenParser'.
 321 --
 322 -- >  module Main where
 323 -- >
 324 -- >  import Text.Parsec
 325 -- >  import qualified Text.Parsec.Token as P
 326 -- >  import Text.Parsec.Language (haskellDef)
 327 -- >
 328 -- >  -- The parser
 329 -- >  ...
 330 -- >
 331 -- >  expr  =   parens expr
 332 -- >        <|> identifier
 333 -- >        <|> ...
 334 -- >
 335 -- >
 336 -- >  -- The lexer
 337 -- >  lexer       = P.makeTokenParser haskellDef
 338 -- >
 339 -- >  parens      = P.parens lexer
 340 -- >  braces      = P.braces lexer
 341 -- >  identifier  = P.identifier lexer
 342 -- >  reserved    = P.reserved lexer
 343 -- >  ...
 344
 345 makeTokenParser :: (Stream s m Char)
 346                 => GenLanguageDef s u m -> GenTokenParser s u m
 347 makeTokenParser languageDef
 348     = TokenParser{ identifier = identifier
 349                  , reserved = reserved
 350                  , operator = operator
 351                  , reservedOp = reservedOp
 352
 353                  , charLiteral = charLiteral
 354                  , stringLiteral = stringLiteral
 355                  , natural = natural
 356                  , integer = integer
 357                  , float = float
 358                  , naturalOrFloat = naturalOrFloat
 359                  , decimal = decimal
 360                  , hexadecimal = hexadecimal
 361                  , octal = octal
 362
 363                  , symbol = symbol
 364                  , lexeme = lexeme
 365                  , whiteSpace = whiteSpace
 366
 367                  , parens = parens
 368                  , braces = braces
 369                  , angles = angles
 370                  , brackets = brackets
 371                  , squares = brackets
 372                  , semi = semi
 373                  , comma = comma
 374                  , colon = colon
 375                  , dot = dot
 376                  , semiSep = semiSep
 377                  , semiSep1 = semiSep1
 378                  , commaSep = commaSep
 379                  , commaSep1 = commaSep1
 380                  }
 381     where
 382
 383     -----------------------------------------------------------
 384     -- Bracketing
 385     -----------------------------------------------------------
 386     parens p        = between (symbol "(") (symbol ")") p
 387     braces p        = between (symbol "{") (symbol "}") p
 388     angles p        = between (symbol "<") (symbol ">") p
 389     brackets p      = between (symbol "[") (symbol "]") p
 390
 391     semi            = symbol ";"
 392     comma           = symbol ","
 393     dot             = symbol "."
 394     colon           = symbol ":"
 395
 396     commaSep p      = sepBy p comma
 397     semiSep p       = sepBy p semi
 398
 399     commaSep1 p     = sepBy1 p comma
 400     semiSep1 p      = sepBy1 p semi
 401
 402
 403     -----------------------------------------------------------
 404     -- Chars & Strings
 405     -----------------------------------------------------------
 406     charLiteral     = lexeme (between (char '\'')
 407                                       (char '\'' <?> "end of character")
 408                                       characterChar )
 409                     <?> "character"
 410
 411     characterChar   = charLetter <|> charEscape
 412                     <?> "literal character"
 413
 414     charEscape      = do{ char '\\'; escapeCode }
 415     charLetter      = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026'))
 416
 417
 418
 419     stringLiteral   = lexeme (
 420                       do{ str <- between (char '"')
 421                                          (char '"' <?> "end of string")
 422                                          (many stringChar)
 423                         ; return (foldr (maybe id (:)) "" str)
 424                         }
 425                       <?> "literal string")
 426
 427     stringChar      =   do{ c <- stringLetter; return (Just c) }
 428                     <|> stringEscape
 429                     <?> "string character"
 430
 431     stringLetter    = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026'))
 432
 433     stringEscape    = do{ char '\\'
 434                         ;     do{ escapeGap  ; return Nothing }
 435                           <|> do{ escapeEmpty; return Nothing }
 436                           <|> do{ esc <- escapeCode; return (Just esc) }
 437                         }
 438
 439     escapeEmpty     = char '&'
 440     escapeGap       = do{ many1 space
 441                         ; char '\\' <?> "end of string gap"
 442                         }
 443
 444
 445
 446     -- escape codes
 447     escapeCode      = charEsc <|> charNum <|> charAscii <|> charControl
 448                     <?> "escape code"
 449
 450     charControl     = do{ char '^'
 451                         ; code <- upper
 452                         ; return (toEnum (fromEnum code - fromEnum 'A'))
 453                         }
 454
 455     charNum         = do{ code <- decimal
 456                                   <|> do{ char 'o'; number 8 octDigit }
 457                                   <|> do{ char 'x'; number 16 hexDigit }
 458                         ; return (toEnum (fromInteger code))
 459                         }
 460
 461     charEsc         = choice (map parseEsc escMap)
 462                     where
 463                       parseEsc (c,code)     = do{ char c; return code }
 464
 465     charAscii       = choice (map parseAscii asciiMap)
 466                     where
 467                       parseAscii (asc,code) = try (do{ string asc; return code })
 468
 469
 470     -- escape code tables
 471     escMap          = zip ("abfnrtv\\\"\'") ("\a\b\f\n\r\t\v\\\"\'")
 472     asciiMap        = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)
 473
 474     ascii2codes     = ["BS","HT","LF","VT","FF","CR","SO","SI","EM",
 475                        "FS","GS","RS","US","SP"]
 476     ascii3codes     = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL",
 477                        "DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB",
 478                        "CAN","SUB","ESC","DEL"]
 479
 480     ascii2          = ['\BS','\HT','\LF','\VT','\FF','\CR','\SO','\SI',
 481                        '\EM','\FS','\GS','\RS','\US','\SP']
 482     ascii3          = ['\NUL','\SOH','\STX','\ETX','\EOT','\ENQ','\ACK',
 483                        '\BEL','\DLE','\DC1','\DC2','\DC3','\DC4','\NAK',
 484                        '\SYN','\ETB','\CAN','\SUB','\ESC','\DEL']
 485
 486
 487     -----------------------------------------------------------
 488     -- Numbers
 489     -----------------------------------------------------------
 490     naturalOrFloat  = lexeme (natFloat) <?> "number"
 491
 492     float           = lexeme floating   <?> "float"
 493     integer         = lexeme int        <?> "integer"
 494     natural         = lexeme nat        <?> "natural"
 495
 496
 497     -- floats
 498     floating        = do{ n <- decimal
 499                         ; fractExponent n
 500                         }
 501
 502
 503     natFloat        = do{ char '0'
 504                         ; zeroNumFloat
 505                         }
 506                       <|> decimalFloat
 507
 508     zeroNumFloat    =  do{ n <- hexadecimal <|> octal
 509                          ; return (Left n)
 510                          }
 511                     <|> decimalFloat
 512                     <|> fractFloat 0
 513                     <|> return (Left 0)
 514
 515     decimalFloat    = do{ n <- decimal
 516                         ; option (Left n)
 517                                  (fractFloat n)
 518                         }
 519
 520     fractFloat n    = do{ f <- fractExponent n
 521                         ; return (Right f)
 522                         }
 523
 524     fractExponent n = do{ fract <- fraction
 525                         ; expo  <- option 1.0 exponent'
 526                         ; return ((fromInteger n + fract)*expo)
 527                         }
 528                     <|>
 529                       do{ expo <- exponent'
 530                         ; return ((fromInteger n)*expo)
 531                         }
 532
 533     fraction        = do{ char '.'
 534                         ; digits <- many1 digit <?> "fraction"
 535                         ; return (foldr op 0.0 digits)
 536                         }
 537                       <?> "fraction"
 538                     where
 539                       op d f    = (f + fromIntegral (digitToInt d))/10.0
 540
 541     exponent'       = do{ oneOf "eE"
 542                         ; f <- sign
 543                         ; e <- decimal <?> "exponent"
 544                         ; return (power (f e))
 545                         }
 546                       <?> "exponent"
 547                     where
 548                        power e  | e < 0      = 1.0/power(-e)
 549                                 | otherwise  = fromInteger (10^e)
 550
 551
 552     -- integers and naturals
 553     int             = do{ f <- lexeme sign
 554                         ; n <- nat
 555                         ; return (f n)
 556                         }
 557
 558     sign            =   (char '-' >> return negate)
 559                     <|> (char '+' >> return id)
 560                     <|> return id
 561
 562     nat             = zeroNumber <|> decimal
 563
 564     zeroNumber      = do{ char '0'
 565                         ; hexadecimal <|> octal <|> decimal <|> return 0
 566                         }
 567                       <?> ""
 568
 569     decimal         = number 10 digit
 570     hexadecimal     = do{ oneOf "xX"; number 16 hexDigit }
 571     octal           = do{ oneOf "oO"; number 8 octDigit  }
 572
 573     number base baseDigit
 574         = do{ digits <- many1 baseDigit
 575             ; let n = foldl (\x d -> base*x + toInteger (digitToInt d)) 0 digits
 576             ; seq n (return n)
 577             }
 578
 579     -----------------------------------------------------------
 580     -- Operators & reserved ops
 581     -----------------------------------------------------------
 582     reservedOp name =
 583         lexeme $ try $
 584         do{ string name
 585           ; notFollowedBy (opLetter languageDef) <?> ("end of " ++ show name)
 586           }
 587
 588     operator =
 589         lexeme $ try $
 590         do{ name <- oper
 591           ; if (isReservedOp name)
 592              then unexpected ("reserved operator " ++ show name)
 593              else return name
 594           }
 595
 596     oper =
 597         do{ c <- (opStart languageDef)
 598           ; cs <- many (opLetter languageDef)
 599           ; return (c:cs)
 600           }
 601         <?> "operator"
 602
 603     isReservedOp name =
 604         isReserved (sort (reservedOpNames languageDef)) name
 605
 606
 607     -----------------------------------------------------------
 608     -- Identifiers & Reserved words
 609     -----------------------------------------------------------
 610     reserved name =
 611         lexeme $ try $
 612         do{ caseString name
 613           ; notFollowedBy (identLetter languageDef) <?> ("end of " ++ show name)
 614           }
 615
 616     caseString name
 617         | caseSensitive languageDef  = string name
 618         | otherwise               = do{ walk name; return name }
 619         where
 620           walk []     = return ()
 621           walk (c:cs) = do{ caseChar c <?> msg; walk cs }
 622
 623           caseChar c  | isAlpha c  = char (toLower c) <|> char (toUpper c)
 624                       | otherwise  = char c
 625
 626           msg         = show name
 627
 628
 629     identifier =
 630         lexeme $ try $
 631         do{ name <- ident
 632           ; if (isReservedName name)
 633              then unexpected ("reserved word " ++ show name)
 634              else return name
 635           }
 636
 637
 638     ident
 639         = do{ c <- identStart languageDef
 640             ; cs <- many (identLetter languageDef)
 641             ; return (c:cs)
 642             }
 643         <?> "identifier"
 644
 645     isReservedName name
 646         = isReserved theReservedNames caseName
 647         where
 648           caseName      | caseSensitive languageDef  = name
 649                         | otherwise               = map toLower name
 650
 651
 652     isReserved names name
 653         = scan names
 654         where
 655           scan []       = False
 656           scan (r:rs)   = case (compare r name) of
 657                             LT  -> scan rs
 658                             EQ  -> True
 659                             GT  -> False
 660
 661     theReservedNames
 662         | caseSensitive languageDef  = sortedNames
 663         | otherwise               = map (map toLower) sortedNames
 664         where
 665           sortedNames   = sort (reservedNames languageDef)
 666
 667
 668
 669     -----------------------------------------------------------
 670     -- White space & symbols
 671     -----------------------------------------------------------
 672     symbol name
 673         = lexeme (string name)
 674
 675     lexeme p
 676         = do{ x <- p; whiteSpace; return x  }
 677
 678
 679     --whiteSpace
 680     whiteSpace
 681         | noLine && noMulti  = skipMany (simpleSpace <?> "")
 682         | noLine             = skipMany (simpleSpace <|> multiLineComment <?> "")
 683         | noMulti            = skipMany (simpleSpace <|> oneLineComment <?> "")
 684         | otherwise          = skipMany (simpleSpace <|> oneLineComment <|> multiLineComment <?> "")
 685         where
 686           noLine  = null (commentLine languageDef)
 687           noMulti = null (commentStart languageDef)
 688
 689
 690     simpleSpace =
 691         skipMany1 (satisfy isSpace)
 692
 693     oneLineComment =
 694         do{ try (string (commentLine languageDef))
 695           ; skipMany (satisfy (/= '\n'))
 696           ; return ()
 697           }
 698
 699     multiLineComment =
 700         do { try (string (commentStart languageDef))
 701            ; inComment
 702            }
 703
 704     inComment
 705         | nestedComments languageDef  = inCommentMulti
 706         | otherwise                = inCommentSingle
 707
 708     inCommentMulti
 709         =   do{ try (string (commentEnd languageDef)) ; return () }
 710         <|> do{ multiLineComment                     ; inCommentMulti }
 711         <|> do{ skipMany1 (noneOf startEnd)          ; inCommentMulti }
 712         <|> do{ oneOf startEnd                       ; inCommentMulti }
 713         <?> "end of comment"
 714         where
 715           startEnd   = nub (commentEnd languageDef ++ commentStart languageDef)
 716
 717     inCommentSingle
 718         =   do{ try (string (commentEnd languageDef)); return () }
 719         <|> do{ skipMany1 (noneOf startEnd)         ; inCommentSingle }
 720         <|> do{ oneOf startEnd                      ; inCommentSingle }
 721         <?> "end of comment"
 722         where
 723           startEnd   = nub (commentEnd languageDef ++ commentStart languageDef)