2007-02-13 Martin Baulig <martin@ximian.com>
[mono.git] / mcs / mcs / cs-tokenizer.cs
index 0b14023bd362e2781b1d359985b9d4b9ece1053e..e288f8d9dfc330d54839c4a957b3b9e22ce765ec 100644 (file)
 // (C) 2001, 2002 Ximian, Inc (http://www.ximian.com)
 // (C) 2004 Novell, Inc
 //
-
-/*
- * TODO:
- *   Make sure we accept the proper Unicode ranges, per the spec.
- *   Report error 1032
-*/
+//
 
 using System;
 using System.Text;
@@ -45,6 +40,7 @@ namespace Mono.CSharp
                bool handle_assembly = false;
                bool handle_constraints = false;
                bool handle_typeof = false;
+               bool linq;
                Location current_location;
                Location current_comment_location = Location.Null;
                ArrayList escapedIdentifiers = new ArrayList ();
@@ -73,6 +69,7 @@ namespace Mono.CSharp
                bool any_token_seen = false;
 
                static Hashtable tokenValues;
+               static readonly char[] simple_whitespaces = new char[] { ' ', '\t' };
 
                private static Hashtable TokenValueName
                {
@@ -268,9 +265,10 @@ namespace Mono.CSharp
                        public int col;
                        public int putback_char;
                        public int previous_col;
-#if GMCS_SOURCES
+                       public Stack ifstack;
                        public int parsing_generic_less_than;
-#endif                 
+                       public int current_token;
+
                        public Position (Tokenizer t)
                        {
                                position = t.reader.Position;
@@ -278,9 +276,10 @@ namespace Mono.CSharp
                                col = t.col;
                                putback_char = t.putback_char;
                                previous_col = t.previous_col;
-#if GMCS_SOURCES
+                               if (t.ifstack != null && t.ifstack.Count != 0)
+                                       ifstack = (Stack)t.ifstack.Clone ();
                                parsing_generic_less_than = t.parsing_generic_less_than;
-#endif
+                               current_token = t.current_token;
                        }
                }
                
@@ -298,7 +297,9 @@ namespace Mono.CSharp
                        col = p.col;
                        putback_char = p.putback_char;
                        previous_col = p.previous_col;
-
+                       ifstack = p.ifstack;
+                       parsing_generic_less_than = p.parsing_generic_less_than;
+                       current_token = p.current_token;
                }
 
                // Do not reset the position, ignore it.
@@ -468,6 +469,7 @@ namespace Mono.CSharp
                {
                        this.ref_name = file;
                        this.file_name = file;
+                       linq = RootContext.Version == LanguageVersion.LINQ;
                        reader = input;
                        
                        putback_char = -1;
@@ -494,7 +496,8 @@ namespace Mono.CSharp
 
                static bool is_identifier_part_character (char c)
                {
-                       return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= '0' && c <= '9') || Char.IsLetter (c);
+                       return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= '0' && c <= '9') ||
+                               Char.IsLetter (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation;
                }
 
                public static bool IsKeyword (string s)
@@ -517,7 +520,6 @@ namespace Mono.CSharp
                        return true;
                }
 
-#if GMCS_SOURCE
                bool parse_generic_dimension (out int dimension)
                {
                        dimension = 1;
@@ -593,8 +595,7 @@ namespace Mono.CSharp
                        return false;
                }
 
-               int parsing_generic_less_than = 0;
-
+#if GMCS_SOURCE
                public void PutbackNullable ()
                {
                        if (nullable_pos < 0)
@@ -623,6 +624,155 @@ namespace Mono.CSharp
                                nullable_pos = -1;
                }
 #endif
+               
+               public int peek_token ()
+               {
+                       int the_token;
+
+                       PushPosition ();
+                       the_token = token ();
+                       PopPosition ();
+                       
+                       return the_token;
+               }
+               
+               bool parse_namespace_or_typename (int next)
+               {
+                       if (next == -1)
+                               next = peek_token ();
+                       while (next == Token.IDENTIFIER){
+                               token ();
+                         again:
+                               next = peek_token ();
+                               if (next == Token.DOT || next == Token.DOUBLE_COLON){
+                                       token ();
+                                       next = peek_token ();
+                                       continue;
+                               }
+                               if (next == Token.OP_GENERICS_LT){
+                                       token ();
+                                       if (!parse_less_than ())
+                                               return false;
+                                       goto again;
+                               }
+                               return true;
+                       }
+
+                       return false;
+               }
+
+               bool is_simple_type (int token)
+               {
+                       return  (token == Token.BOOL ||
+                                token == Token.DECIMAL ||
+                                token == Token.SBYTE ||
+                                token == Token.BYTE ||
+                                token == Token.SHORT ||
+                                token == Token.USHORT ||
+                                token == Token.INT ||
+                                token == Token.UINT ||
+                                token == Token.LONG ||
+                                token == Token.ULONG ||
+                                token == Token.CHAR ||
+                                token == Token.FLOAT ||
+                                token == Token.DOUBLE);
+               }
+
+               bool is_builtin_reference_type (int token)
+               {
+                       return (token == Token.OBJECT || token == Token.STRING);
+               }
+
+               bool parse_opt_rank (int next)
+               {
+                       while (true){
+                               if (next != Token.OPEN_BRACKET)
+                                       return true;
+
+                               token ();
+                               while (true){
+                                       next = token ();
+                                       if (next == Token.CLOSE_BRACKET){
+                                               next = peek_token ();
+                                               break;
+                                       }
+                                       if (next == Token.COMMA)
+                                               continue;
+                                       
+                                       return false;
+                               }
+                       }
+               }
+                       
+               bool parse_type ()
+               {
+                       int next = peek_token ();
+                       
+                       if (is_simple_type (next)){
+                               token ();
+                               next = peek_token ();
+                               if (next == Token.INTERR)
+                                       token ();
+                               return parse_opt_rank (peek_token ());
+                       }
+                       if (parse_namespace_or_typename (next)){
+                               next = peek_token ();
+                               if (next == Token.INTERR)
+                                       token ();
+                               return parse_opt_rank (peek_token ());
+                       } else if (is_builtin_reference_type (next)){
+                               token ();
+                               return parse_opt_rank (peek_token ());
+                       }
+                       
+                       return false;
+               }
+               
+               //
+               // Invoked after '(' has been seen and tries to parse:
+               // type identifier [, type identifier]*
+               //
+               // if this is the case, instead of returning an
+               // OPEN_PARENS token we return a special token that
+               // triggers lambda parsing.
+               //
+               // This is needed because we can not introduce the
+               // explicitly_typed_lambda_parameter_list after a '(' in the
+               // grammar without introducing reduce/reduce conflicts.
+               //
+               // We need to parse a type and if it is followed by an
+               // identifier, we know it has to be parsed as a lambda
+               // expression.  
+               //
+               // the type expression can be prefixed with `ref' or `out'
+               //
+               public bool parse_lambda_parameters ()
+               {
+                       while (true){
+                               int next = peek_token ();
+
+                               if (next == Token.REF || next == Token.OUT)
+                                       token ();
+                                                
+                               if (parse_type ()){
+                                       next = peek_token ();
+                                       if (next == Token.IDENTIFIER){
+                                               token ();
+                                               next = peek_token ();
+                                               if (next == Token.COMMA){
+                                                       token ();
+                                                       continue;
+                                               }
+                                               if (next == Token.CLOSE_PARENS)
+                                                       return true;
+                                       }
+                               }
+                               return false;
+                       }
+               }
+
+               int parsing_generic_less_than = 0;
+               
                int is_punct (char c, ref bool doread)
                {
                        int d;
@@ -645,7 +795,17 @@ namespace Mono.CSharp
                        case ']':
                                return Token.CLOSE_BRACKET;
                        case '(':
-                               return Token.OPEN_PARENS;
+                               if (linq){
+                                       PushPosition ();
+                                       bool have_lambda_parameter = parse_lambda_parameters ();
+                                       PopPosition ();
+                                       
+                                       if (have_lambda_parameter)
+                                               return Token.OPEN_PARENS_LAMBDA;
+                                       else
+                                               return Token.OPEN_PARENS;
+                               } else
+                                       return Token.OPEN_PARENS;
                        case ')': {
                                if (deambiguate_close_parens == 0)
                                        return Token.CLOSE_PARENS;
@@ -654,10 +814,7 @@ namespace Mono.CSharp
 
                                PushPosition ();
 
-                               // disable preprocessing directives when peeking
-                               process_directives = false;
                                int new_token = xtoken ();
-                               process_directives = true;
 
                                PopPosition ();
 
@@ -704,7 +861,6 @@ namespace Mono.CSharp
                                PopPosition ();
 
                                if (is_generic_lt) {
-                                       parsing_generic_less_than++;
                                        return Token.OP_GENERICS_LT;
                                } else
                                        parsing_generic_less_than = 0;
@@ -794,6 +950,11 @@ namespace Mono.CSharp
                                        doread = true;
                                        return Token.OP_EQ;
                                }
+                               if (d == '>'){
+                                       doread = true;
+                                       val = Location;
+                                       return Token.ARROW;
+                               }
                                return Token.ASSIGN;
                        }
 
@@ -1502,7 +1663,7 @@ namespace Mono.CSharp
 
                        cmd = static_cmd_arg.ToString ();
 
-                       if (c == '\n'){
+                       if (c == '\n' || c == '\r'){
                                return;
                        }
 
@@ -1526,7 +1687,7 @@ namespace Mono.CSharp
                                static_cmd_arg.Append ((char) c);
                        }
 
-                       arg = static_cmd_arg.ToString ().Trim ();
+                       arg = static_cmd_arg.ToString ();
                }
 
                //
@@ -1576,16 +1737,15 @@ namespace Mono.CSharp
                //
                // Handles #define and #undef
                //
-               void PreProcessDefinition (bool is_define, string arg)
+               void PreProcessDefinition (bool is_define, string arg, bool caller_is_taking)
                {
                        if (arg.Length == 0 || arg == "true" || arg == "false"){
                                Report.Error (1001, Location, "Missing identifer to pre-processor directive");
                                return;
                        }
 
-                       char[] whitespace = { ' ', '\t' };
-                       if (arg.IndexOfAny (whitespace) != -1){
-                               Report.Error (1025, Location, "Single-line comment or end-of-line expected");
+                       if (arg.IndexOfAny (simple_whitespaces) != -1){
+                               Error_EndLineExpected ();
                                return;
                        }
 
@@ -1599,6 +1759,9 @@ namespace Mono.CSharp
                                }
                        }
 
+                       if (!caller_is_taking)
+                               return;
+
                        if (is_define){
                                if (defines == null)
                                        defines = new Hashtable ();
@@ -1699,7 +1862,7 @@ namespace Mono.CSharp
                                
                                if (c == '('){
                                        s = s.Substring (1);
-                                       bool val = pp_expr (ref s);
+                                       bool val = pp_expr (ref s, false);
                                        if (s.Length > 0 && s [0] == ')'){
                                                s = s.Substring (1);
                                                return val;
@@ -1802,7 +1965,7 @@ namespace Mono.CSharp
                //
                // Evaluates an expression for `#if' or `#elif'
                //
-               bool pp_expr (ref string s)
+               bool pp_expr (ref string s, bool isTerm)
                {
                        bool va = pp_and (ref s);
                        s = s.Trim ();
@@ -1813,12 +1976,16 @@ namespace Mono.CSharp
                                if (c == '|'){
                                        if (len > 2 && s [1] == '|'){
                                                s = s.Substring (2);
-                                               return va | pp_expr (ref s);
+                                               return va | pp_expr (ref s, isTerm);
                                        } else {
                                                Error_InvalidDirective ();
                                                return false;
                                        }
-                               } 
+                               }
+                               if (isTerm) {
+                                       Error_EndLineExpected ();
+                                       return false;
+                               }
                        }
                        
                        return va;
@@ -1826,7 +1993,7 @@ namespace Mono.CSharp
 
                bool eval (string s)
                {
-                       bool v = pp_expr (ref s);
+                       bool v = pp_expr (ref s, true);
                        s = s.Trim ();
                        if (s.Length != 0){
                                return false;
@@ -1863,12 +2030,12 @@ namespace Mono.CSharp
                        Report.Error (1040, Location,
                                "Preprocessor directives must appear as the first non-whitespace character on a line");
                }
-               
-               //
-               // Set to false to stop handling preprocesser directives
-               // 
-               bool process_directives = true;
 
+               void Error_EndLineExpected ()
+               {
+                       Report.Error (1025, Location, "Single-line comment or end-of-line expected");
+               }
+               
                //
                // if true, then the code continues processing the code
                // if false, the code stays in a loop until another directive is
@@ -1885,7 +2052,7 @@ namespace Mono.CSharp
                        // Eat any trailing whitespaces and single-line comments
                        if (arg.IndexOf ("//") != -1)
                                arg = arg.Substring (0, arg.IndexOf ("//"));
-                       arg = arg.TrimEnd (' ', '\t');
+                       arg = arg.Trim (simple_whitespaces);
 
                        //
                        // The first group of pre-processing instructions is always processed
@@ -1897,18 +2064,22 @@ namespace Mono.CSharp
                                goto case "if";
 
                        case "endregion":
-                               region_directive = true;
-                               goto case "endif";
-                               
-                       case "if":
-                               if (arg.Length == 0){
-                                       Error_InvalidDirective ();
+                               if (ifstack == null || ifstack.Count == 0){
+                                       Error_UnexpectedDirective ("no #region for this #endregion");
                                        return true;
                                }
-
-                               int flags = region_directive ? REGION : 0;
+                               int pop = (int) ifstack.Pop ();
+                                       
+                               if ((pop & REGION) == 0)
+                                       Report.Error (1027, Location, "Expected `#endif' directive");
+                                       
+                               return caller_is_taking;
+                               
+                       case "if":
                                if (ifstack == null)
                                        ifstack = new Stack (2);
+
+                               int flags = region_directive ? REGION : 0;
                                if (ifstack.Count == 0){
                                        flags |= PARENT_TAKING;
                                } else {
@@ -1930,27 +2101,20 @@ namespace Mono.CSharp
                                        Error_UnexpectedDirective ("no #if for this #endif");
                                        return true;
                                } else {
-                                       int pop = (int) ifstack.Pop ();
+                                       pop = (int) ifstack.Pop ();
                                        
-                                       if (region_directive && ((pop & REGION) == 0))
-                                               Report.Error (1027, Location, "Expected `#endif' directive");
-                                       else if (!region_directive && ((pop & REGION) != 0))
+                                       if ((pop & REGION) != 0)
                                                Report.Error (1038, Location, "#endregion directive expected");
                                        
-                                       if (!region_directive && arg.Length != 0) {
-                                               Report.Error (1025, Location, "Single-line comment or end-of-line expected");
+                                       if (arg.Length != 0) {
+                                               Error_EndLineExpected ();
                                        }
                                        
                                        if (ifstack.Count == 0)
                                                return true;
-                                       else {
-                                               int state = (int) ifstack.Peek ();
 
-                                               if ((state & TAKING) != 0)
-                                                       return true;
-                                               else
-                                                       return false;
-                                       }
+                                       int state = (int) ifstack.Peek ();
+                                       return (state & TAKING) != 0;
                                }
 
                        case "elif":
@@ -1958,7 +2122,7 @@ namespace Mono.CSharp
                                        Error_UnexpectedDirective ("no #if for this #elif");
                                        return true;
                                } else {
-                                       int state = (int) ifstack.Peek ();
+                                       int state = (int) ifstack.Pop ();
 
                                        if ((state & REGION) != 0) {
                                                Report.Error (1038, Location, "#endregion directive expected");
@@ -1970,15 +2134,18 @@ namespace Mono.CSharp
                                                return true;
                                        }
 
-                                       if ((state & TAKING) != 0)
+                                       if ((state & TAKING) != 0) {
+                                               ifstack.Push (0);
                                                return false;
+                                       }
 
                                        if (eval (arg) && ((state & PARENT_TAKING) != 0)){
-                                               state = (int) ifstack.Pop ();
                                                ifstack.Push (state | TAKING);
                                                return true;
-                                       } else 
-                                               return false;
+                                       }
+
+                                       ifstack.Push (state);
+                                       return false;
                                }
 
                        case "else":
@@ -2000,6 +2167,11 @@ namespace Mono.CSharp
 
                                        ifstack.Pop ();
 
+                                       if (arg.Length != 0) {
+                                               Error_EndLineExpected ();
+                                               return true;
+                                       }
+
                                        bool ret = false;
                                        if ((state & PARENT_TAKING) != 0) {
                                                ret = (state & TAKING) == 0;
@@ -2019,7 +2191,7 @@ namespace Mono.CSharp
                                                Error_TokensSeen ();
                                                return caller_is_taking;
                                        }
-                                       PreProcessDefinition (true, arg);
+                                       PreProcessDefinition (true, arg, caller_is_taking);
                                        return caller_is_taking;
 
                                case "undef":
@@ -2027,7 +2199,7 @@ namespace Mono.CSharp
                                                Error_TokensSeen ();
                                                return caller_is_taking;
                                        }
-                                       PreProcessDefinition (false, arg);
+                                       PreProcessDefinition (false, arg, caller_is_taking);
                                        return caller_is_taking;
                        }
 
@@ -2048,7 +2220,7 @@ namespace Mono.CSharp
 
                        case "pragma":
                                if (RootContext.Version == LanguageVersion.ISO_1) {
-                                       Report.FeatureIsNotStandardized (Location, "#pragma");
+                                       Report.FeatureIsNotISO1 (Location, "#pragma");
                                        return true;
                                }
 
@@ -2129,7 +2301,9 @@ namespace Mono.CSharp
                                if (ok)
                                        return res;
 
-//                             Report.Error (267, Location, "The `partial' modifier can be used only immediately before keyword `class', `struct', or `interface'");
+                               if (next_token < Token.LAST_KEYWORD)
+                                       Report.Error (267, Location, "The `partial' modifier can be used only immediately before keyword `class', `struct', or `interface'");
+
                                val = new LocatedToken (Location, "partial");
                                return Token.IDENTIFIER;
                        }
@@ -2225,9 +2399,13 @@ namespace Mono.CSharp
 
                        // Whether we have seen comments on the current line
                        bool comments_seen = false;
-                       
                        val = null;
                        for (;(c = getChar ()) != -1;) {
+                               if (c == '\t'){
+                                       col = ((col + 8) / 8) * 8;
+                                       continue;
+                               }
+                               
                                if (c == ' ' || c == '\t' || c == '\f' || c == '\v' || c == 0xa0)
                                        continue;
 
@@ -2259,8 +2437,8 @@ namespace Mono.CSharp
                                                        }
                                                }
                                                while ((d = getChar ()) != -1 && (d != '\n') && d != '\r')
-                                               if (d == '\n'){
-                                               }
+                                                       if (d == '\n'){
+                                                       }
                                                any_token_seen |= tokens_seen;
                                                tokens_seen = false;
                                                comments_seen = false;
@@ -2357,10 +2535,6 @@ namespace Mono.CSharp
                                }
                                
                                if (c == '#') {
-                                       // return NONE if we're not processing directives (during token peeks)
-                                       if (!process_directives)
-                                               return Token.NONE;
-
                                        if (tokens_seen || comments_seen) {
                                                Eror_WrongPreprocessorLocation ();
                                                return Token.ERROR;