3a_asm/Text/Parsec/Token.hs

   1 -----------------------------------------------------------------------------
   2 -- |
   3 -- Module      :  Text.Parsec.Token
   4 -- Copyright   :  (c) Daan Leijen 1999-2001, (c) Paolo Martini 2007
   5 -- License     :  BSD-style (see the LICENSE file)
   6 --
   7 -- Maintainer  :  derek.a.elkins@gmail.com
   8 -- Stability   :  provisional
   9 -- Portability :  non-portable (uses local universal quantification: PolymorphicComponents)
  10 --
  11 -- A helper module to parse lexical elements (tokens). See 'makeTokenParser'
  12 -- for a description of how to use it.
  13 --
  14 -----------------------------------------------------------------------------
  15
  16 {-# LANGUAGE PolymorphicComponents #-}
  17 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
  18
  19 module Text.Parsec.Token
  20     ( LanguageDef
  21     , GenLanguageDef (..)
  22     , TokenParser
  23     , GenTokenParser (..)
  24     , makeTokenParser
  25     ) where
  26
  27 import Data.Char ( isAlpha, toLower, toUpper, isSpace, digitToInt )
  28 import Data.List ( nub, sort )
  29 import Control.Monad.Identity
  30 import Text.Parsec.Prim
  31 import Text.Parsec.Char
  32 import Text.Parsec.Combinator
  33
  34 -----------------------------------------------------------
  35 -- Language Definition
  36 -----------------------------------------------------------
  37
  38 type LanguageDef st = GenLanguageDef String st Identity
  39
  40 -- | The @GenLanguageDef@ type is a record that contains all parameterizable
  41 -- features of the 'Text.Parsec.Token' module. The module 'Text.Parsec.Language'
  42 -- contains some default definitions.
  43
  44 data GenLanguageDef s u m
  45     = LanguageDef {
  46
  47     -- | Describes the start of a block comment. Use the empty string if the
  48     -- language doesn't support block comments. For example \"\/*\".
  49
  50     commentStart   :: String,
  51
  52     -- | Describes the end of a block comment. Use the empty string if the
  53     -- language doesn't support block comments. For example \"*\/\".
  54
  55     commentEnd     :: String,
  56
  57     -- | Describes the start of a line comment. Use the empty string if the
  58     -- language doesn't support line comments. For example \"\/\/\".
  59
  60     commentLine    :: String,
  61
  62     -- | Set to 'True' if the language supports nested block comments.
  63
  64     nestedComments :: Bool,
  65
  66     -- | This parser should accept any start characters of identifiers. For
  67     -- example @letter \<|> char \"_\"@.
  68
  69     identStart     :: ParsecT s u m Char,
  70
  71     -- | This parser should accept any legal tail characters of identifiers.
  72     -- For example @alphaNum \<|> char \"_\"@.
  73
  74     identLetter    :: ParsecT s u m Char,
  75
  76     -- | This parser should accept any start characters of operators. For
  77     -- example @oneOf \":!#$%&*+.\/\<=>?\@\\\\^|-~\"@
  78
  79     opStart        :: ParsecT s u m Char,
  80
  81     -- | This parser should accept any legal tail characters of operators.
  82     -- Note that this parser should even be defined if the language doesn't
  83     -- support user-defined operators, or otherwise the 'reservedOp'
  84     -- parser won't work correctly.
  85
  86     opLetter       :: ParsecT s u m Char,
  87
  88     -- | The list of reserved identifiers.
  89
  90     reservedNames  :: [String],
  91
  92     -- | The list of reserved operators.
  93
  94     reservedOpNames:: [String],
  95
  96     -- | Set to 'True' if the language is case sensitive.
  97
  98     caseSensitive  :: Bool
  99
 100     }
 101
 102 -----------------------------------------------------------
 103 -- A first class module: TokenParser
 104 -----------------------------------------------------------
 105
 106 type TokenParser st = GenTokenParser String st Identity
 107
 108 -- | The type of the record that holds lexical parsers that work on
 109 -- @s@ streams with state @u@ over a monad @m@.
 110
 111 data GenTokenParser s u m
 112     = TokenParser {
 113
 114         -- | This lexeme parser parses a legal identifier. Returns the identifier
 115         -- string. This parser will fail on identifiers that are reserved
 116         -- words. Legal identifier (start) characters and reserved words are
 117         -- defined in the 'LanguageDef' that is passed to
 118         -- 'makeTokenParser'. An @identifier@ is treated as
 119         -- a single token using 'try'.
 120
 121         identifier       :: ParsecT s u m String,
 122
 123         -- | The lexeme parser @reserved name@ parses @symbol
 124         -- name@, but it also checks that the @name@ is not a prefix of a
 125         -- valid identifier. A @reserved@ word is treated as a single token
 126         -- using 'try'.
 127
 128         reserved         :: String -> ParsecT s u m (),
 129
 130         -- | This lexeme parser parses a legal operator. Returns the name of the
 131         -- operator. This parser will fail on any operators that are reserved
 132         -- operators. Legal operator (start) characters and reserved operators
 133         -- are defined in the 'LanguageDef' that is passed to
 134         -- 'makeTokenParser'. An @operator@ is treated as a
 135         -- single token using 'try'.
 136
 137         operator         :: ParsecT s u m String,
 138
 139         -- |The lexeme parser @reservedOp name@ parses @symbol
 140         -- name@, but it also checks that the @name@ is not a prefix of a
 141         -- valid operator. A @reservedOp@ is treated as a single token using
 142         -- 'try'.
 143
 144         reservedOp       :: String -> ParsecT s u m (),
 145
 146
 147         -- | This lexeme parser parses a single literal character. Returns the
 148         -- literal character value. This parsers deals correctly with escape
 149         -- sequences. The literal character is parsed according to the grammar
 150         -- rules defined in the Haskell report (which matches most programming
 151         -- languages quite closely).
 152
 153         charLiteral      :: ParsecT s u m Char,
 154
 155         -- | This lexeme parser parses a literal string. Returns the literal
 156         -- string value. This parsers deals correctly with escape sequences and
 157         -- gaps. The literal string is parsed according to the grammar rules
 158         -- defined in the Haskell report (which matches most programming
 159         -- languages quite closely).
 160
 161         stringLiteral    :: ParsecT s u m String,
 162
 163         -- | This lexeme parser parses a natural number (a positive whole
 164         -- number). Returns the value of the number. The number can be
 165         -- specified in 'decimal', 'hexadecimal' or
 166         -- 'octal'. The number is parsed according to the grammar
 167         -- rules in the Haskell report.
 168
 169         natural          :: ParsecT s u m Integer,
 170
 171         -- | This lexeme parser parses an integer (a whole number). This parser
 172         -- is like 'natural' except that it can be prefixed with
 173         -- sign (i.e. \'-\' or \'+\'). Returns the value of the number. The
 174         -- number can be specified in 'decimal', 'hexadecimal'
 175         -- or 'octal'. The number is parsed according
 176         -- to the grammar rules in the Haskell report.
 177
 178         integer          :: ParsecT s u m Integer,
 179
 180         -- | This lexeme parser parses a floating point value. Returns the value
 181         -- of the number. The number is parsed according to the grammar rules
 182         -- defined in the Haskell report.
 183
 184         float            :: ParsecT s u m Double,
 185
 186         -- | This lexeme parser parses either 'natural' or a 'float'.
 187         -- Returns the value of the number. This parsers deals with
 188         -- any overlap in the grammar rules for naturals and floats. The number
 189         -- is parsed according to the grammar rules defined in the Haskell report.
 190
 191         naturalOrFloat   :: ParsecT s u m (Either Integer Double),
 192
 193         -- | Parses a positive whole number in the decimal system. Returns the
 194         -- value of the number.
 195
 196         decimal          :: ParsecT s u m Integer,
 197
 198         -- | Parses a positive whole number in the hexadecimal system. The number
 199         -- should be prefixed with \"0x\" or \"0X\". Returns the value of the
 200         -- number.
 201
 202         hexadecimal      :: ParsecT s u m Integer,
 203
 204         -- | Parses a positive whole number in the octal system. The number
 205         -- should be prefixed with \"0o\" or \"0O\". Returns the value of the
 206         -- number.
 207
 208         octal            :: ParsecT s u m Integer,
 209
 210         -- | Lexeme parser @symbol s@ parses 'string' @s@ and skips
 211         -- trailing white space.
 212
 213         symbol           :: String -> ParsecT s u m String,
 214
 215         -- | @lexeme p@ first applies parser @p@ and than the 'whiteSpace'
 216         -- parser, returning the value of @p@. Every lexical
 217         -- token (lexeme) is defined using @lexeme@, this way every parse
 218         -- starts at a point without white space. Parsers that use @lexeme@ are
 219         -- called /lexeme/ parsers in this document.
 220         --
 221         -- The only point where the 'whiteSpace' parser should be
 222         -- called explicitly is the start of the main parser in order to skip
 223         -- any leading white space.
 224         --
 225         -- >    mainParser  = do{ whiteSpace
 226         -- >                     ; ds <- many (lexeme digit)
 227         -- >                     ; eof
 228         -- >                     ; return (sum ds)
 229         -- >                     }
 230
 231         lexeme           :: forall a. ParsecT s u m a -> ParsecT s u m a,
 232
 233         -- | Parses any white space. White space consists of /zero/ or more
 234         -- occurrences of a 'space', a line comment or a block (multi
 235         -- line) comment. Block comments may be nested. How comments are
 236         -- started and ended is defined in the 'LanguageDef'
 237         -- that is passed to 'makeTokenParser'.
 238
 239         whiteSpace       :: ParsecT s u m (),
 240
 241         -- | Lexeme parser @parens p@ parses @p@ enclosed in parenthesis,
 242         -- returning the value of @p@.
 243
 244         parens           :: forall a. ParsecT s u m a -> ParsecT s u m a,
 245
 246         -- | Lexeme parser @braces p@ parses @p@ enclosed in braces (\'{\' and
 247         -- \'}\'), returning the value of @p@.
 248
 249         braces           :: forall a. ParsecT s u m a -> ParsecT s u m a,
 250
 251         -- | Lexeme parser @angles p@ parses @p@ enclosed in angle brackets (\'\<\'
 252         -- and \'>\'), returning the value of @p@.
 253
 254         angles           :: forall a. ParsecT s u m a -> ParsecT s u m a,
 255
 256         -- | Lexeme parser @brackets p@ parses @p@ enclosed in brackets (\'[\'
 257         -- and \']\'), returning the value of @p@.
 258
 259         brackets         :: forall a. ParsecT s u m a -> ParsecT s u m a,
 260
 261         -- | DEPRECATED: Use 'brackets'.
 262
 263         squares          :: forall a. ParsecT s u m a -> ParsecT s u m a,
 264
 265         -- | Lexeme parser |semi| parses the character \';\' and skips any
 266         -- trailing white space. Returns the string \";\".
 267
 268         semi             :: ParsecT s u m String,
 269
 270         -- | Lexeme parser @comma@ parses the character \',\' and skips any
 271         -- trailing white space. Returns the string \",\".
 272
 273         comma            :: ParsecT s u m String,
 274
 275         -- | Lexeme parser @colon@ parses the character \':\' and skips any
 276         -- trailing white space. Returns the string \":\".
 277
 278         colon            :: ParsecT s u m String,
 279
 280         -- | Lexeme parser @dot@ parses the character \'.\' and skips any
 281         -- trailing white space. Returns the string \".\".
 282
 283         dot              :: ParsecT s u m String,
 284
 285         -- | Lexeme parser @semiSep p@ parses /zero/ or more occurrences of @p@
 286         -- separated by 'semi'. Returns a list of values returned by
 287         -- @p@.
 288
 289         semiSep          :: forall a . ParsecT s u m a -> ParsecT s u m [a],
 290
 291         -- | Lexeme parser @semiSep1 p@ parses /one/ or more occurrences of @p@
 292         -- separated by 'semi'. Returns a list of values returned by @p@.
 293
 294         semiSep1         :: forall a . ParsecT s u m a -> ParsecT s u m [a],
 295
 296         -- | Lexeme parser @commaSep p@ parses /zero/ or more occurrences of
 297         -- @p@ separated by 'comma'. Returns a list of values returned
 298         -- by @p@.
 299
 300         commaSep         :: forall a . ParsecT s u m a -> ParsecT s u m [a],
 301
 302         -- | Lexeme parser @commaSep1 p@ parses /one/ or more occurrences of
 303         -- @p@ separated by 'comma'. Returns a list of values returned
 304         -- by @p@.
 305
 306         commaSep1        :: forall a . ParsecT s u m a -> ParsecT s u m [a]
 307     }
 308
 309 -----------------------------------------------------------
 310 -- Given a LanguageDef, create a token parser.
 311 -----------------------------------------------------------
 312
 313 -- | The expression @makeTokenParser language@ creates a 'GenTokenParser'
 314 -- record that contains lexical parsers that are
 315 -- defined using the definitions in the @language@ record.
 316 --
 317 -- The use of this function is quite stylized - one imports the
 318 -- appropiate language definition and selects the lexical parsers that
 319 -- are needed from the resulting 'GenTokenParser'.
 320 --
 321 -- >  module Main where
 322 -- >
 323 -- >  import Text.Parsec
 324 -- >  import qualified Text.Parsec.Token as P
 325 -- >  import Text.Parsec.Language (haskellDef)
 326 -- >
 327 -- >  -- The parser
 328 -- >  ...
 329 -- >
 330 -- >  expr  =   parens expr
 331 -- >        <|> identifier
 332 -- >        <|> ...
 333 -- >
 334 -- >
 335 -- >  -- The lexer
 336 -- >  lexer       = P.makeTokenParser haskellDef
 337 -- >
 338 -- >  parens      = P.parens lexer
 339 -- >  braces      = P.braces lexer
 340 -- >  identifier  = P.identifier lexer
 341 -- >  reserved    = P.reserved lexer
 342 -- >  ...
 343
 344 makeTokenParser :: (Stream s m Char)
 345                 => GenLanguageDef s u m -> GenTokenParser s u m
 346 makeTokenParser languageDef
 347     = TokenParser{ identifier = identifier
 348                  , reserved = reserved
 349                  , operator = operator
 350                  , reservedOp = reservedOp
 351
 352                  , charLiteral = charLiteral
 353                  , stringLiteral = stringLiteral
 354                  , natural = natural
 355                  , integer = integer
 356                  , float = float
 357                  , naturalOrFloat = naturalOrFloat
 358                  , decimal = decimal
 359                  , hexadecimal = hexadecimal
 360                  , octal = octal
 361
 362                  , symbol = symbol
 363                  , lexeme = lexeme
 364                  , whiteSpace = whiteSpace
 365
 366                  , parens = parens
 367                  , braces = braces
 368                  , angles = angles
 369                  , brackets = brackets
 370                  , squares = brackets
 371                  , semi = semi
 372                  , comma = comma
 373                  , colon = colon
 374                  , dot = dot
 375                  , semiSep = semiSep
 376                  , semiSep1 = semiSep1
 377                  , commaSep = commaSep
 378                  , commaSep1 = commaSep1
 379                  }
 380     where
 381
 382     -----------------------------------------------------------
 383     -- Bracketing
 384     -----------------------------------------------------------
 385     parens p        = between (symbol "(") (symbol ")") p
 386     braces p        = between (symbol "{") (symbol "}") p
 387     angles p        = between (symbol "<") (symbol ">") p
 388     brackets p      = between (symbol "[") (symbol "]") p
 389
 390     semi            = symbol ";"
 391     comma           = symbol ","
 392     dot             = symbol "."
 393     colon           = symbol ":"
 394
 395     commaSep p      = sepBy p comma
 396     semiSep p       = sepBy p semi
 397
 398     commaSep1 p     = sepBy1 p comma
 399     semiSep1 p      = sepBy1 p semi
 400
 401
 402     -----------------------------------------------------------
 403     -- Chars & Strings
 404     -----------------------------------------------------------
 405     charLiteral     = lexeme (between (char '\'')
 406                                       (char '\'' <?> "end of character")
 407                                       characterChar )
 408                     <?> "character"
 409
 410     characterChar   = charLetter <|> charEscape
 411                     <?> "literal character"
 412
 413     charEscape      = do{ char '\\'; escapeCode }
 414     charLetter      = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026'))
 415
 416
 417
 418     stringLiteral   = lexeme (
 419                       do{ str <- between (char '"')
 420                                          (char '"' <?> "end of string")
 421                                          (many stringChar)
 422                         ; return (foldr (maybe id (:)) "" str)
 423                         }
 424                       <?> "literal string")
 425
 426     stringChar      =   do{ c <- stringLetter; return (Just c) }
 427                     <|> stringEscape
 428                     <?> "string character"
 429
 430     stringLetter    = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026'))
 431
 432     stringEscape    = do{ char '\\'
 433                         ;     do{ escapeGap  ; return Nothing }
 434                           <|> do{ escapeEmpty; return Nothing }
 435                           <|> do{ esc <- escapeCode; return (Just esc) }
 436                         }
 437
 438     escapeEmpty     = char '&'
 439     escapeGap       = do{ many1 space
 440                         ; char '\\' <?> "end of string gap"
 441                         }
 442
 443
 444
 445     -- escape codes
 446     escapeCode      = charEsc <|> charNum <|> charAscii <|> charControl
 447                     <?> "escape code"
 448
 449     charControl     = do{ char '^'
 450                         ; code <- upper
 451                         ; return (toEnum (fromEnum code - fromEnum 'A'))
 452                         }
 453
 454     charNum         = do{ code <- decimal
 455                                   <|> do{ char 'o'; number 8 octDigit }
 456                                   <|> do{ char 'x'; number 16 hexDigit }
 457                         ; return (toEnum (fromInteger code))
 458                         }
 459
 460     charEsc         = choice (map parseEsc escMap)
 461                     where
 462                       parseEsc (c,code)     = do{ char c; return code }
 463
 464     charAscii       = choice (map parseAscii asciiMap)
 465                     where
 466                       parseAscii (asc,code) = try (do{ string asc; return code })
 467
 468
 469     -- escape code tables
 470     escMap          = zip ("abfnrtv\\\"\'") ("\a\b\f\n\r\t\v\\\"\'")
 471     asciiMap        = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)
 472
 473     ascii2codes     = ["BS","HT","LF","VT","FF","CR","SO","SI","EM",
 474                        "FS","GS","RS","US","SP"]
 475     ascii3codes     = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL",
 476                        "DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB",
 477                        "CAN","SUB","ESC","DEL"]
 478
 479     ascii2          = ['\BS','\HT','\LF','\VT','\FF','\CR','\SO','\SI',
 480                        '\EM','\FS','\GS','\RS','\US','\SP']
 481     ascii3          = ['\NUL','\SOH','\STX','\ETX','\EOT','\ENQ','\ACK',
 482                        '\BEL','\DLE','\DC1','\DC2','\DC3','\DC4','\NAK',
 483                        '\SYN','\ETB','\CAN','\SUB','\ESC','\DEL']
 484
 485
 486     -----------------------------------------------------------
 487     -- Numbers
 488     -----------------------------------------------------------
 489     naturalOrFloat  = lexeme (natFloat) <?> "number"
 490
 491     float           = lexeme floating   <?> "float"
 492     integer         = lexeme int        <?> "integer"
 493     natural         = lexeme nat        <?> "natural"
 494
 495
 496     -- floats
 497     floating        = do{ n <- decimal
 498                         ; fractExponent n
 499                         }
 500
 501
 502     natFloat        = do{ char '0'
 503                         ; zeroNumFloat
 504                         }
 505                       <|> decimalFloat
 506
 507     zeroNumFloat    =  do{ n <- hexadecimal <|> octal
 508                          ; return (Left n)
 509                          }
 510                     <|> decimalFloat
 511                     <|> fractFloat 0
 512                     <|> return (Left 0)
 513
 514     decimalFloat    = do{ n <- decimal
 515                         ; option (Left n)
 516                                  (fractFloat n)
 517                         }
 518
 519     fractFloat n    = do{ f <- fractExponent n
 520                         ; return (Right f)
 521                         }
 522
 523     fractExponent n = do{ fract <- fraction
 524                         ; expo  <- option 1.0 exponent'
 525                         ; return ((fromInteger n + fract)*expo)
 526                         }
 527                     <|>
 528                       do{ expo <- exponent'
 529                         ; return ((fromInteger n)*expo)
 530                         }
 531
 532     fraction        = do{ char '.'
 533                         ; digits <- many1 digit <?> "fraction"
 534                         ; return (foldr op 0.0 digits)
 535                         }
 536                       <?> "fraction"
 537                     where
 538                       op d f    = (f + fromIntegral (digitToInt d))/10.0
 539
 540     exponent'       = do{ oneOf "eE"
 541                         ; f <- sign
 542                         ; e <- decimal <?> "exponent"
 543                         ; return (power (f e))
 544                         }
 545                       <?> "exponent"
 546                     where
 547                        power e  | e < 0      = 1.0/power(-e)
 548                                 | otherwise  = fromInteger (10^e)
 549
 550
 551     -- integers and naturals
 552     int             = do{ f <- lexeme sign
 553                         ; n <- nat
 554                         ; return (f n)
 555                         }
 556
 557     sign            =   (char '-' >> return negate)
 558                     <|> (char '+' >> return id)
 559                     <|> return id
 560
 561     nat             = zeroNumber <|> decimal
 562
 563     zeroNumber      = do{ char '0'
 564                         ; hexadecimal <|> octal <|> decimal <|> return 0
 565                         }
 566                       <?> ""
 567
 568     decimal         = number 10 digit
 569     hexadecimal     = do{ oneOf "xX"; number 16 hexDigit }
 570     octal           = do{ oneOf "oO"; number 8 octDigit  }
 571
 572     number base baseDigit
 573         = do{ digits <- many1 baseDigit
 574             ; let n = foldl (\x d -> base*x + toInteger (digitToInt d)) 0 digits
 575             ; seq n (return n)
 576             }
 577
 578     -----------------------------------------------------------
 579     -- Operators & reserved ops
 580     -----------------------------------------------------------
 581     reservedOp name =
 582         lexeme $ try $
 583         do{ string name
 584           ; notFollowedBy (opLetter languageDef) <?> ("end of " ++ show name)
 585           }
 586
 587     operator =
 588         lexeme $ try $
 589         do{ name <- oper
 590           ; if (isReservedOp name)
 591              then unexpected ("reserved operator " ++ show name)
 592              else return name
 593           }
 594
 595     oper =
 596         do{ c <- (opStart languageDef)
 597           ; cs <- many (opLetter languageDef)
 598           ; return (c:cs)
 599           }
 600         <?> "operator"
 601
 602     isReservedOp name =
 603         isReserved (sort (reservedOpNames languageDef)) name
 604
 605
 606     -----------------------------------------------------------
 607     -- Identifiers & Reserved words
 608     -----------------------------------------------------------
 609     reserved name =
 610         lexeme $ try $
 611         do{ caseString name
 612           ; notFollowedBy (identLetter languageDef) <?> ("end of " ++ show name)
 613           }
 614
 615     caseString name
 616         | caseSensitive languageDef  = string name
 617         | otherwise               = do{ walk name; return name }
 618         where
 619           walk []     = return ()
 620           walk (c:cs) = do{ caseChar c <?> msg; walk cs }
 621
 622           caseChar c  | isAlpha c  = char (toLower c) <|> char (toUpper c)
 623                       | otherwise  = char c
 624
 625           msg         = show name
 626
 627
 628     identifier =
 629         lexeme $ try $
 630         do{ name <- ident
 631           ; if (isReservedName name)
 632              then unexpected ("reserved word " ++ show name)
 633              else return name
 634           }
 635
 636
 637     ident
 638         = do{ c <- identStart languageDef
 639             ; cs <- many (identLetter languageDef)
 640             ; return (c:cs)
 641             }
 642         <?> "identifier"
 643
 644     isReservedName name
 645         = isReserved theReservedNames caseName
 646         where
 647           caseName      | caseSensitive languageDef  = name
 648                         | otherwise               = map toLower name
 649
 650
 651     isReserved names name
 652         = scan names
 653         where
 654           scan []       = False
 655           scan (r:rs)   = case (compare r name) of
 656                             LT  -> scan rs
 657                             EQ  -> True
 658                             GT  -> False
 659
 660     theReservedNames
 661         | caseSensitive languageDef  = sortedNames
 662         | otherwise               = map (map toLower) sortedNames
 663         where
 664           sortedNames   = sort (reservedNames languageDef)
 665
 666
 667
 668     -----------------------------------------------------------
 669     -- White space & symbols
 670     -----------------------------------------------------------
 671     symbol name
 672         = lexeme (string name)
 673
 674     lexeme p
 675         = do{ x <- p; whiteSpace; return x  }
 676
 677
 678     --whiteSpace
 679     whiteSpace
 680         | noLine && noMulti  = skipMany (simpleSpace <?> "")
 681         | noLine             = skipMany (simpleSpace <|> multiLineComment <?> "")
 682         | noMulti            = skipMany (simpleSpace <|> oneLineComment <?> "")
 683         | otherwise          = skipMany (simpleSpace <|> oneLineComment <|> multiLineComment <?> "")
 684         where
 685           noLine  = null (commentLine languageDef)
 686           noMulti = null (commentStart languageDef)
 687
 688
 689     simpleSpace =
 690         skipMany1 (satisfy isSpace)
 691
 692     oneLineComment =
 693         do{ try (string (commentLine languageDef))
 694           ; skipMany (satisfy (/= '\n'))
 695           ; return ()
 696           }
 697
 698     multiLineComment =
 699         do { try (string (commentStart languageDef))
 700            ; inComment
 701            }
 702
 703     inComment
 704         | nestedComments languageDef  = inCommentMulti
 705         | otherwise                = inCommentSingle
 706
 707     inCommentMulti
 708         =   do{ try (string (commentEnd languageDef)) ; return () }
 709         <|> do{ multiLineComment                     ; inCommentMulti }
 710         <|> do{ skipMany1 (noneOf startEnd)          ; inCommentMulti }
 711         <|> do{ oneOf startEnd                       ; inCommentMulti }
 712         <?> "end of comment"
 713         where
 714           startEnd   = nub (commentEnd languageDef ++ commentStart languageDef)
 715
 716     inCommentSingle
 717         =   do{ try (string (commentEnd languageDef)); return () }
 718         <|> do{ skipMany1 (noneOf startEnd)         ; inCommentSingle }
 719         <|> do{ oneOf startEnd                      ; inCommentSingle }
 720         <?> "end of comment"
 721         where
 722           startEnd   = nub (commentEnd languageDef ++ commentStart languageDef)