Merge pull request #1245 from StephenMcConnel/bug-22483
[mono.git] / mcs / mcs / cs-tokenizer.cs
index ab0c799f53916e10a64e16d45532d14b6d2330a8..2147c145e6c74151380ea0962a61fc614b5f359f 100644 (file)
@@ -188,6 +188,8 @@ namespace Mono.CSharp
                readonly SeekableStreamReader reader;
                readonly CompilationSourceFile source_file;
                readonly CompilerContext context;
+               readonly Report Report;
+
 
                SourceFile current_source;
                Location hidden_block_start;
@@ -200,7 +202,6 @@ namespace Mono.CSharp
                bool handle_get_set = false;
                bool handle_remove_add = false;
                bool handle_where;
-               bool handle_typeof = false;
                bool lambda_arguments_parsing;
                List<Location> escaped_identifiers;
                int parsing_generic_less_than;
@@ -250,6 +251,9 @@ namespace Mono.CSharp
                public const int EvalCompilationUnitParserCharacter = 0x100001;
                public const int EvalUsingDeclarationsParserCharacter = 0x100002;
                public const int DocumentationXref = 0x100003;
+
+               const int UnicodeLS = 0x2028;
+               const int UnicodePS = 0x2029;
                
                //
                // XML documentation buffer. The save point is used to divide
@@ -315,11 +319,6 @@ namespace Mono.CSharp
                        get { return handle_where; }
                        set { handle_where = value; }
                }
-
-               public bool TypeOfParsing {
-                       get { return handle_typeof; }
-                       set { handle_typeof = value; }
-               }
        
                public XmlCommentState doc_state {
                        get { return xml_doc_state; }
@@ -427,7 +426,7 @@ namespace Mono.CSharp
                        }
                }
 
-               public Tokenizer (SeekableStreamReader input, CompilationSourceFile file, ParserSession session)
+               public Tokenizer (SeekableStreamReader input, CompilationSourceFile file, ParserSession session, Report report)
                {
                        this.source_file = file;
                        this.context = file.Compiler;
@@ -436,6 +435,7 @@ namespace Mono.CSharp
                        this.id_builder = session.IDBuilder;
                        this.number_builder = session.NumberBuilder;
                        this.ltb = new LocatedTokenBuffer (session.LocatedTokens);
+                       this.Report = report;
 
                        reader = input;
 
@@ -723,6 +723,7 @@ namespace Mono.CSharp
                                        case Token.BYTE:
                                        case Token.CHAR:
                                        case Token.DECIMAL:
+                                       case Token.DOUBLE:
                                        case Token.FLOAT:
                                        case Token.LONG:
                                        case Token.OBJECT:
@@ -928,7 +929,27 @@ namespace Mono.CSharp
                        if (c < 0x80)
                                return false;
 
-                       return Char.IsLetter (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation;
+                       return is_identifier_part_character_slow_part (c);
+               }
+
+               static bool is_identifier_part_character_slow_part (char c)
+               {
+                       if (Char.IsLetter (c))
+                               return true;
+
+                       switch (Char.GetUnicodeCategory (c)) {
+                               case UnicodeCategory.ConnectorPunctuation:
+
+                               // combining-character: A Unicode character of classes Mn or Mc
+                               case UnicodeCategory.NonSpacingMark:
+                               case UnicodeCategory.SpacingCombiningMark:
+
+                               // decimal-digit-character: A Unicode character of the class Nd 
+                               case UnicodeCategory.DecimalDigitNumber:
+                               return true;
+                       }
+
+                       return false;
                }
 
                public static bool IsKeyword (string s)
@@ -1118,7 +1139,7 @@ namespace Mono.CSharp
                        return true;
                }
 
-               bool parse_less_than ()
+               bool parse_less_than (ref int genericDimension)
                {
                start:
                        int the_token = token ();
@@ -1155,10 +1176,23 @@ namespace Mono.CSharp
                        case Token.VOID:
                                break;
                        case Token.OP_GENERICS_GT:
+                               genericDimension = 1;
+                               return true;
                        case Token.IN:
                        case Token.OUT:
                                return true;
+                       case Token.COMMA:
+                               do {
+                                       ++genericDimension;
+                                       the_token = token ();
+                               } while (the_token == Token.COMMA);
+
+                               if (the_token == Token.OP_GENERICS_GT) {
+                                       ++genericDimension;
+                                       return true;
+                               }
 
+                               return false;
                        default:
                                return false;
                        }
@@ -1172,7 +1206,7 @@ namespace Mono.CSharp
                        else if (the_token == Token.INTERR_NULLABLE || the_token == Token.STAR)
                                goto again;
                        else if (the_token == Token.OP_GENERICS_LT) {
-                               if (!parse_less_than ())
+                               if (!parse_less_than (ref genericDimension))
                                        return false;
                                goto again;
                        } else if (the_token == Token.OPEN_BRACKET) {
@@ -1188,22 +1222,6 @@ namespace Mono.CSharp
                        return false;
                }
 
-               bool parse_generic_dimension (out int dimension)
-               {
-                       dimension = 1;
-
-               again:
-                       int the_token = token ();
-                       if (the_token == Token.OP_GENERICS_GT)
-                               return true;
-                       else if (the_token == Token.COMMA) {
-                               dimension++;
-                               goto again;
-                       }
-
-                       return false;
-               }
-               
                public int peek_token ()
                {
                        int the_token;
@@ -1219,7 +1237,7 @@ namespace Mono.CSharp
                // Tonizes `?' using custom disambiguous rules to return one
                // of following tokens: INTERR_NULLABLE, OP_COALESCING, INTERR
                //
-               // Tricky expression look like:
+               // Tricky expression looks like:
                //
                // Foo ? a = x ? b : c;
                //
@@ -1234,13 +1252,8 @@ namespace Mono.CSharp
                                return Token.OP_COALESCING;
                        }
 
-                       switch (current_token) {
-                       case Token.CLOSE_PARENS:
-                       case Token.TRUE:
-                       case Token.FALSE:
-                       case Token.NULL:
-                       case Token.LITERAL:
-                               return Token.INTERR;
+                       if (d == '.') {
+                               return Token.INTERR_OPERATOR;
                        }
 
                        if (d != ' ') {
@@ -1253,7 +1266,15 @@ namespace Mono.CSharp
                        PushPosition ();
                        current_token = Token.NONE;
                        int next_token;
-                       switch (xtoken ()) {
+                       int parens = 0;
+                       int generics = 0;
+
+                       var nt = xtoken ();
+                       switch (nt) {
+                       case Token.DOT:
+                       case Token.OPEN_BRACKET_EXPR:
+                               next_token = Token.INTERR_OPERATOR;
+                               break;
                        case Token.LITERAL:
                        case Token.TRUE:
                        case Token.FALSE:
@@ -1273,7 +1294,21 @@ namespace Mono.CSharp
                        case Token.COLON:
                                next_token = Token.INTERR_NULLABLE;
                                break;
-                               
+
+                       case Token.OPEN_PARENS:
+                       case Token.OPEN_PARENS_CAST:
+                       case Token.OPEN_PARENS_LAMBDA:
+                               next_token = -1;
+                               ++parens;
+                               break;
+
+                       case Token.OP_GENERICS_LT:
+                       case Token.OP_GENERICS_LT_DECL:
+                       case Token.GENERIC_DIMENSION:
+                               next_token = -1;
+                               ++generics;
+                               break;
+
                        default:
                                next_token = -1;
                                break;
@@ -1284,32 +1319,81 @@ namespace Mono.CSharp
                                case Token.COMMA:
                                case Token.SEMICOLON:
                                case Token.OPEN_BRACE:
-                               case Token.CLOSE_PARENS:
                                case Token.IN:
                                        next_token = Token.INTERR_NULLABLE;
                                        break;
                                        
                                case Token.COLON:
                                        next_token = Token.INTERR;
-                                       break;                                                  
-                                       
+                                       break;
+
+                               case Token.OPEN_PARENS:
+                               case Token.OPEN_PARENS_CAST:
+                               case Token.OPEN_PARENS_LAMBDA:
+                                       ++parens;
+                                       goto default;
+
+                               case Token.CLOSE_PARENS:
+                                       --parens;
+                                       goto default;
+
+                               case Token.OP_GENERICS_LT:
+                               case Token.OP_GENERICS_LT_DECL:
+                               case Token.GENERIC_DIMENSION:
+                                       ++generics;
+                                       goto default;
+
                                default:
                                        int ntoken;
                                        int interrs = 1;
                                        int colons = 0;
                                        int braces = 0;
+                                       int brackets = 0;
                                        //
                                        // All shorcuts failed, do it hard way
                                        //
                                        while ((ntoken = xtoken ()) != Token.EOF) {
-                                               if (ntoken == Token.OPEN_BRACE) {
+                                               switch (ntoken) {
+                                               case Token.OPEN_BRACE:
                                                        ++braces;
                                                        continue;
-                                               }
-
-                                               if (ntoken == Token.CLOSE_BRACE) {
+                                               case Token.OPEN_PARENS:
+                                               case Token.OPEN_PARENS_CAST:
+                                               case Token.OPEN_PARENS_LAMBDA:
+                                                       ++parens;
+                                                       continue;
+                                               case Token.CLOSE_BRACE:
                                                        --braces;
                                                        continue;
+                                               case Token.OP_GENERICS_LT:
+                                               case Token.OP_GENERICS_LT_DECL:
+                                               case Token.GENERIC_DIMENSION:
+                                                       ++generics;
+                                                       continue;
+                                               case Token.OPEN_BRACKET:
+                                               case Token.OPEN_BRACKET_EXPR:
+                                                       ++brackets;
+                                                       continue;
+                                               case Token.CLOSE_BRACKET:
+                                                       --brackets;
+                                                       continue;
+                                               case Token.CLOSE_PARENS:
+                                                       if (parens > 0) {
+                                                               --parens;
+                                                               continue;
+                                                       }
+
+                                                       PopPosition ();
+                                                       return Token.INTERR_NULLABLE;
+
+                                               case Token.OP_GENERICS_GT:
+                                                       if (generics > 0) {
+                                                               --generics;
+                                                               continue;
+                                                       }
+
+                                                       PopPosition ();
+                                                       return Token.INTERR_NULLABLE;
                                                }
 
                                                if (braces != 0)
@@ -1317,6 +1401,17 @@ namespace Mono.CSharp
 
                                                if (ntoken == Token.SEMICOLON)
                                                        break;
+
+                                               if (parens != 0)
+                                                       continue;
+
+                                               if (ntoken == Token.COMMA) {
+                                                       if (generics != 0 || brackets != 0)
+                                                               continue;
+
+                                                       PopPosition ();
+                                                       return Token.INTERR_NULLABLE;
+                                               }
                                                
                                                if (ntoken == Token.COLON) {
                                                        if (++colons == interrs)
@@ -1806,18 +1901,25 @@ namespace Mono.CSharp
                                x = reader.Read ();
                        }
                        
-                       if (x == '\r') {
-                               if (peek_char () == '\n') {
-                                       putback_char = -1;
-                               }
+                       if (x <= 13) {
+                               if (x == '\r') {
+                                       if (peek_char () == '\n') {
+                                               putback_char = -1;
+                                       }
 
-                               x = '\n';
-                               advance_line ();
-                       } else if (x == '\n') {
+                                       x = '\n';
+                                       advance_line ();
+                               } else if (x == '\n') {
+                                       advance_line ();
+                               } else {
+                                       col++;
+                               }
+                       } else if (x >= UnicodeLS && x <= UnicodePS) {
                                advance_line ();
                        } else {
                                col++;
                        }
+
                        return x;
                }
 
@@ -1849,7 +1951,7 @@ namespace Mono.CSharp
                                throw new InternalErrorException (string.Format ("Secondary putback [{0}] putting back [{1}] is not allowed", (char)putback_char, (char) c), Location);
                        }
 
-                       if (c == '\n' || col == 0) {
+                       if (c == '\n' || col == 0 || (c >= UnicodeLS && c <= UnicodePS)) {
                                // It won't happen though.
                                line--;
                                ref_line--;
@@ -1931,7 +2033,7 @@ namespace Mono.CSharp
                        int has_identifier_argument = (int)(cmd & PreprocessorDirective.RequiresArgument);
                        int pos = 0;
 
-                       while (c != -1 && c != '\n') {
+                       while (c != -1 && c != '\n' && c != UnicodeLS && c != UnicodePS) {
                                if (c == '\\' && has_identifier_argument >= 0) {
                                        if (has_identifier_argument != 0) {
                                                has_identifier_argument = 1;
@@ -1958,10 +2060,7 @@ namespace Mono.CSharp
                                        // Eat single-line comments
                                        //
                                        get_char ();
-                                       do {
-                                               c = get_char ();
-                                       } while (c != -1 && c != '\n');
-
+                                       ReadToEndOfLine ();
                                        break;
                                }
 
@@ -2023,10 +2122,7 @@ namespace Mono.CSharp
                                //
                                // Eat any remaining characters to continue parsing on next line
                                //
-                               while (c != -1 && c != '\n') {
-                                       c = get_char ();
-                               }
-
+                               ReadToEndOfLine ();
                                return false;
                        }
 
@@ -2035,10 +2131,7 @@ namespace Mono.CSharp
                                //
                                // Eat any remaining characters to continue parsing on next line
                                //
-                               while (c != -1 && c != '\n') {
-                                       c = get_char ();
-                               }
-
+                               ReadToEndOfLine ();
                                return new_line != 0;
                        }
 
@@ -2052,13 +2145,11 @@ namespace Mono.CSharp
                                c = 0;
                        }
 
-                       if (c != '\n' && c != '/' && c != '"') {
+                       if (c != '\n' && c != '/' && c != '"' && c != UnicodeLS && c != UnicodePS) {
                                //
                                // Eat any remaining characters to continue parsing on next line
                                //
-                               while (c != -1 && c != '\n') {
-                                       c = get_char ();
-                               }
+                               ReadToEndOfLine ();
 
                                Report.Error (1578, loc, "Filename, single-line comment or end-of-line expected");
                                return true;
@@ -2074,16 +2165,15 @@ namespace Mono.CSharp
                                }
                        }
 
-                       if (c == '\n') {
+                       if (c == '\n' || c == UnicodeLS || c == UnicodePS) {
+
                        } else if (c == '/') {
                                ReadSingleLineComment ();
                        } else {
                                //
                                // Eat any remaining characters to continue parsing on next line
                                //
-                               while (c != -1 && c != '\n') {
-                                       c = get_char ();
-                               }
+                               ReadToEndOfLine ();
 
                                Error_EndLineExpected ();
                                return true;
@@ -2318,7 +2408,7 @@ namespace Mono.CSharp
                string TokenizeFileName (ref int c)
                {
                        var string_builder = new StringBuilder ();
-                       while (c != -1 && c != '\n') {
+                       while (c != -1 && c != '\n' && c != UnicodeLS && c != UnicodePS) {
                                c = get_char ();
                                if (c == '"') {
                                        c = get_char ();
@@ -2366,31 +2456,34 @@ namespace Mono.CSharp
                                        Report.Warning (1692, 1, Location, "Invalid number");
 
                                        // Read everything till the end of the line or file
-                                       do {
-                                               c = get_char ();
-                                       } while (c != -1 && c != '\n');
+                                       ReadToEndOfLine ();
                                }
                        }
 
                        return number;
                }
 
+               void ReadToEndOfLine ()
+               {
+                       int c;
+                       do {
+                               c = get_char ();
+                       } while (c != -1 && c != '\n' && c != UnicodeLS && c != UnicodePS);
+               }
+
                void ReadSingleLineComment ()
                {
                        if (peek_char () != '/')
                                Report.Warning (1696, 1, Location, "Single-line comment or end-of-line expected");
 
                        // Read everything till the end of the line or file
-                       int c;
-                       do {
-                               c = get_char ();
-                       } while (c != -1 && c != '\n');
+                       ReadToEndOfLine ();
                }
 
                /// <summary>
                /// Handles #pragma directive
                /// </summary>
-               void ParsePragmaDirective (string arg)
+               void ParsePragmaDirective ()
                {
                        int c;
                        int length = TokenizePreprocessorIdentifier (out c);
@@ -2410,7 +2503,7 @@ namespace Mono.CSharp
 
                                                var loc = Location;
 
-                                               if (c == '\n' || c == '/') {
+                                               if (c == '\n' || c == '/' || c == UnicodeLS || c == UnicodePS) {
                                                        if (c == '/')
                                                                ReadSingleLineComment ();
 
@@ -2436,7 +2529,7 @@ namespace Mono.CSharp
                                                                                Report.RegisterWarningRegion (loc).WarningEnable (loc, code, context);
                                                                        }
                                                                }
-                                                       } while (code >= 0 && c != '\n' && c != -1);
+                                                       } while (code >= 0 && c != '\n' && c != -1 && c != UnicodeLS && c != UnicodePS);
                                                }
 
                                                return;
@@ -2446,8 +2539,7 @@ namespace Mono.CSharp
                                Report.Warning (1634, 1, Location, "Expected disable or restore");
 
                                // Eat any remaining characters on the line
-                               while (c != '\n' && c != -1)
-                                       c = get_char ();
+                               ReadToEndOfLine ();
 
                                return;
                        }
@@ -2861,7 +2953,7 @@ namespace Mono.CSharp
                                        Report.FeatureIsNotAvailable (context, Location, "#pragma");
                                }
 
-                               ParsePragmaDirective (arg);
+                               ParsePragmaDirective ();
                                return true;
 
                        case PreprocessorDirective.Line:
@@ -2927,7 +3019,7 @@ namespace Mono.CSharp
                                        return Token.LITERAL;
                                }
 
-                               if (c == '\n') {
+                               if (c == '\n' || c == UnicodeLS || c == UnicodePS) {
                                        if (!quoted) {
                                                Report.Error (1010, Location, "Newline in constant");
 
@@ -3024,7 +3116,7 @@ namespace Mono.CSharp
 
                                                        continue;
                                                }
-                                       } else if (Char.IsLetter ((char) c) || Char.GetUnicodeCategory ((char) c) == UnicodeCategory.ConnectorPunctuation) {
+                                       } else if (is_identifier_part_character_slow_part ((char) c)) {
                                                id_builder [pos++] = (char) c;
                                                continue;
                                        }
@@ -3150,6 +3242,8 @@ namespace Mono.CSharp
                                        case '\v':
                                        case '\r':
                                        case '\n':
+                                       case UnicodeLS:
+                                       case UnicodePS:
                                        case '/':
                                                next = peek_token ();
                                                if (next == Token.COMMA || next == Token.CLOSE_BRACKET)
@@ -3368,7 +3462,7 @@ namespace Mono.CSharp
                                                        }
                                                }
 
-                                               while ((d = get_char ()) != -1 && d != '\n');
+                                               ReadToEndOfLine ();
 
                                                any_token_seen |= tokens_seen;
                                                tokens_seen = false;
@@ -3406,7 +3500,7 @@ namespace Mono.CSharp
                                                        if (docAppend)
                                                                xml_comment_buffer.Append ((char) d);
                                                        
-                                                       if (d == '\n'){
+                                                       if (d == '\n' || d == UnicodeLS || d == UnicodePS){
                                                                any_token_seen |= tokens_seen;
                                                                tokens_seen = false;
                                                                // 
@@ -3456,6 +3550,8 @@ namespace Mono.CSharp
                                        return is_number (c, false);
 
                                case '\n': // white space
+                               case UnicodeLS:
+                               case UnicodePS:
                                        any_token_seen |= tokens_seen;
                                        tokens_seen = false;
                                        comments_seen = false;
@@ -3492,7 +3588,7 @@ namespace Mono.CSharp
                                                        continue;
                                                }
 
-                                               if (c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\v' )
+                                               if (c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\v' || c == UnicodeLS || c == UnicodePS)
                                                        continue;
 
                                                if (c == '#') {
@@ -3576,7 +3672,7 @@ namespace Mono.CSharp
                                return Token.LITERAL;
                        }
 
-                       if (c == '\n') {
+                       if (c == '\n' || c == UnicodeLS || c == UnicodePS) {
                                Report.Error (1010, start_location, "Newline in constant");
                                return Token.ERROR;
                        }
@@ -3597,7 +3693,7 @@ namespace Mono.CSharp
 
                                // Try to recover, read until newline or next "'"
                                while ((c = get_char ()) != -1) {
-                                       if (c == '\n' || c == '\'')
+                                       if (c == '\n' || c == '\'' || c == UnicodeLS || c == UnicodePS)
                                                break;
                                }
                        }
@@ -3612,22 +3708,20 @@ namespace Mono.CSharp
                int TokenizeLessThan ()
                {
                        int d;
-                       if (handle_typeof) {
-                               PushPosition ();
-                               if (parse_generic_dimension (out d)) {
-                                       val = d;
-                                       DiscardPosition ();
-                                       return Token.GENERIC_DIMENSION;
-                               }
-                               PopPosition ();
-                       }
 
                        // Save current position and parse next token.
                        PushPosition ();
-                       if (parse_less_than ()) {
+                       int generic_dimension = 0;
+                       if (parse_less_than (ref generic_dimension)) {
                                if (parsing_generic_declaration && (parsing_generic_declaration_doc || token () != Token.DOT)) {
                                        d = Token.OP_GENERICS_LT_DECL;
                                } else {
+                                       if (generic_dimension > 0) {
+                                               val = generic_dimension;
+                                               DiscardPosition ();
+                                               return Token.GENERIC_DIMENSION;
+                                       }
+
                                        d = Token.OP_GENERICS_LT;
                                }
                                PopPosition ();
@@ -3726,10 +3820,6 @@ namespace Mono.CSharp
                        return null;
                }
 
-               Report Report {
-                       get { return context.Report; }
-               }
-
                void reset_doc_comment ()
                {
                        xml_comment_buffer.Length = 0;