2007-03-21 Mike Kestner <mkestner@novell.com>
[mono.git] / mcs / mcs / cs-tokenizer.cs
index 6f5862beb23123da2c1e17ee4a4f88c7b646c663..ed10088f42f6983fef2bc7a6ca663b43d6e59d84 100644 (file)
 // (C) 2001, 2002 Ximian, Inc (http://www.ximian.com)
 // (C) 2004 Novell, Inc
 //
-
-/*
- * TODO:
- *   Make sure we accept the proper Unicode ranges, per the spec.
- *   Report error 1032
-*/
+//
 
 using System;
 using System.Text;
@@ -45,10 +40,13 @@ namespace Mono.CSharp
                bool handle_assembly = false;
                bool handle_constraints = false;
                bool handle_typeof = false;
+               bool query_parsing;
                Location current_location;
                Location current_comment_location = Location.Null;
                ArrayList escapedIdentifiers = new ArrayList ();
 
+               static bool linq;
+
                //
                // XML documentation buffer. The save point is used to divide
                // comments on types and comments on members.
@@ -73,6 +71,7 @@ namespace Mono.CSharp
                bool any_token_seen = false;
 
                static Hashtable tokenValues;
+               static readonly char[] simple_whitespaces = new char[] { ' ', '\t' };
 
                private static Hashtable TokenValueName
                {
@@ -163,6 +162,12 @@ namespace Mono.CSharp
                        }
                }
 
+               public static bool LinqEnabled {
+                       set {
+                               linq = value;
+                       }
+               }
+
                public bool TypeOfParsing {
                        get {
                                return handle_typeof;
@@ -173,6 +178,12 @@ namespace Mono.CSharp
                        }
                }
 
+               public bool QueryParsing {
+                       set {
+                               query_parsing = value;
+                       }
+               }
+
                public XmlCommentState doc_state {
                        get { return xmlDocState; }
                        set {
@@ -196,7 +207,7 @@ namespace Mono.CSharp
                // Class variables
                // 
                static CharArrayHashtable[] keywords;
-               static Hashtable keywordStrings = new Hashtable ();
+               static Hashtable keywordStrings;
                static NumberStyles styles;
                static NumberFormatInfo csharp_format_info;
                
@@ -268,9 +279,10 @@ namespace Mono.CSharp
                        public int col;
                        public int putback_char;
                        public int previous_col;
-#if GMCS_SOURCES
+                       public Stack ifstack;
                        public int parsing_generic_less_than;
-#endif                 
+                       public int current_token;
+
                        public Position (Tokenizer t)
                        {
                                position = t.reader.Position;
@@ -278,9 +290,10 @@ namespace Mono.CSharp
                                col = t.col;
                                putback_char = t.putback_char;
                                previous_col = t.previous_col;
-#if GMCS_SOURCES
+                               if (t.ifstack != null && t.ifstack.Count != 0)
+                                       ifstack = (Stack)t.ifstack.Clone ();
                                parsing_generic_less_than = t.parsing_generic_less_than;
-#endif
+                               current_token = t.current_token;
                        }
                }
                
@@ -298,7 +311,9 @@ namespace Mono.CSharp
                        col = p.col;
                        putback_char = p.putback_char;
                        previous_col = p.previous_col;
-
+                       ifstack = p.ifstack;
+                       parsing_generic_less_than = p.parsing_generic_less_than;
+                       current_token = p.current_token;
                }
 
                // Do not reset the position, ignore it.
@@ -307,7 +322,8 @@ namespace Mono.CSharp
                        position_stack.Pop ();
                }
                
-               static void AddKeyword (string kw, int token) {
+               static void AddKeyword (string kw, int token)
+               {
                        keywordStrings.Add (kw, kw);
                        if (keywords [kw.Length] == null) {
                                keywords [kw.Length] = new CharArrayHashtable (kw.Length);
@@ -317,6 +333,7 @@ namespace Mono.CSharp
 
                static void InitTokens ()
                {
+                       keywordStrings = new Hashtable ();
                        keywords = new CharArrayHashtable [64];
 
                        AddKeyword ("__arglist", Token.ARGLIST);
@@ -405,6 +422,21 @@ namespace Mono.CSharp
                        AddKeyword ("partial", Token.PARTIAL);
 #if GMCS_SOURCE
                        AddKeyword ("where", Token.WHERE);
+
+                       if (linq) {
+                               AddKeyword ("from", Token.FROM);
+                               AddKeyword ("join", Token.JOIN);
+                               AddKeyword ("on", Token.ON);
+                               AddKeyword ("equals", Token.EQUALS);
+                               AddKeyword ("select", Token.SELECT);
+                               AddKeyword ("group", Token.GROUP);
+                               AddKeyword ("by", Token.BY);
+                               AddKeyword ("let", Token.LET);
+                               AddKeyword ("orderby", Token.ORDERBY);
+                               AddKeyword ("ascending", Token.ASCENDING);
+                               AddKeyword ("descending", Token.DESCENDING);
+                               AddKeyword ("into", Token.INTO);
+                       }
 #endif
                }
 
@@ -412,11 +444,16 @@ namespace Mono.CSharp
                // Class initializer
                // 
                static Tokenizer ()
+               {
+                       Reset ();
+               }
+
+               public static void Reset ()
                {
                        InitTokens ();
                        csharp_format_info = NumberFormatInfo.InvariantInfo;
                        styles = NumberStyles.Float;
-                       
+
                        string_builder = new System.Text.StringBuilder ();
                }
 
@@ -443,7 +480,21 @@ namespace Mono.CSharp
                        if (handle_assembly == false && res == Token.ASSEMBLY)
                                return -1;
 #if GMCS_SOURCE
-                       if (handle_constraints == false && res == Token.WHERE)
+                       if (linq) {
+                               if (res == Token.FROM &&
+                                       (current_token == Token.ASSIGN || current_token == Token.OPEN_BRACKET ||
+                                        current_token == Token.RETURN || current_token == Token.IN)) {
+                                       query_parsing = true;
+                                       return res;
+                               }
+
+                               if (!query_parsing && res > Token.QUERY_FIRST_TOKEN && res < Token.QUERY_LAST_TOKEN)
+                                       return -1;
+
+                               return res;
+                       }
+
+                       if (!handle_constraints && res == Token.WHERE)
                                return -1;
 #endif
                        return res;
@@ -494,7 +545,8 @@ namespace Mono.CSharp
 
                static bool is_identifier_part_character (char c)
                {
-                       return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= '0' && c <= '9') || Char.IsLetter (c);
+                       return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= '0' && c <= '9') ||
+                               Char.IsLetter (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation;
                }
 
                public static bool IsKeyword (string s)
@@ -517,7 +569,6 @@ namespace Mono.CSharp
                        return true;
                }
 
-#if GMCS_SOURCE
                bool parse_generic_dimension (out int dimension)
                {
                        dimension = 1;
@@ -593,8 +644,7 @@ namespace Mono.CSharp
                        return false;
                }
 
-               int parsing_generic_less_than = 0;
-
+#if GMCS_SOURCE
                public void PutbackNullable ()
                {
                        if (nullable_pos < 0)
@@ -623,6 +673,155 @@ namespace Mono.CSharp
                                nullable_pos = -1;
                }
 #endif
+               
+               public int peek_token ()
+               {
+                       int the_token;
+
+                       PushPosition ();
+                       the_token = token ();
+                       PopPosition ();
+                       
+                       return the_token;
+               }
+               
+               bool parse_namespace_or_typename (int next)
+               {
+                       if (next == -1)
+                               next = peek_token ();
+                       while (next == Token.IDENTIFIER){
+                               token ();
+                         again:
+                               next = peek_token ();
+                               if (next == Token.DOT || next == Token.DOUBLE_COLON){
+                                       token ();
+                                       next = peek_token ();
+                                       continue;
+                               }
+                               if (next == Token.OP_GENERICS_LT){
+                                       token ();
+                                       if (!parse_less_than ())
+                                               return false;
+                                       goto again;
+                               }
+                               return true;
+                       }
+
+                       return false;
+               }
+
+               bool is_simple_type (int token)
+               {
+                       return  (token == Token.BOOL ||
+                                token == Token.DECIMAL ||
+                                token == Token.SBYTE ||
+                                token == Token.BYTE ||
+                                token == Token.SHORT ||
+                                token == Token.USHORT ||
+                                token == Token.INT ||
+                                token == Token.UINT ||
+                                token == Token.LONG ||
+                                token == Token.ULONG ||
+                                token == Token.CHAR ||
+                                token == Token.FLOAT ||
+                                token == Token.DOUBLE);
+               }
+
+               bool is_builtin_reference_type (int token)
+               {
+                       return (token == Token.OBJECT || token == Token.STRING);
+               }
+
+               bool parse_opt_rank (int next)
+               {
+                       while (true){
+                               if (next != Token.OPEN_BRACKET)
+                                       return true;
+
+                               token ();
+                               while (true){
+                                       next = token ();
+                                       if (next == Token.CLOSE_BRACKET){
+                                               next = peek_token ();
+                                               break;
+                                       }
+                                       if (next == Token.COMMA)
+                                               continue;
+                                       
+                                       return false;
+                               }
+                       }
+               }
+                       
+               bool parse_type ()
+               {
+                       int next = peek_token ();
+                       
+                       if (is_simple_type (next)){
+                               token ();
+                               next = peek_token ();
+                               if (next == Token.INTERR)
+                                       token ();
+                               return parse_opt_rank (peek_token ());
+                       }
+                       if (parse_namespace_or_typename (next)){
+                               next = peek_token ();
+                               if (next == Token.INTERR)
+                                       token ();
+                               return parse_opt_rank (peek_token ());
+                       } else if (is_builtin_reference_type (next)){
+                               token ();
+                               return parse_opt_rank (peek_token ());
+                       }
+                       
+                       return false;
+               }
+               
+               //
+               // Invoked after '(' has been seen and tries to parse:
+               // type identifier [, type identifier]*
+               //
+               // if this is the case, instead of returning an
+               // OPEN_PARENS token we return a special token that
+               // triggers lambda parsing.
+               //
+               // This is needed because we can not introduce the
+               // explicitly_typed_lambda_parameter_list after a '(' in the
+               // grammar without introducing reduce/reduce conflicts.
+               //
+               // We need to parse a type and if it is followed by an
+               // identifier, we know it has to be parsed as a lambda
+               // expression.  
+               //
+               // the type expression can be prefixed with `ref' or `out'
+               //
+               public bool parse_lambda_parameters ()
+               {
+                       while (true){
+                               int next = peek_token ();
+
+                               if (next == Token.REF || next == Token.OUT)
+                                       token ();
+                                                
+                               if (parse_type ()){
+                                       next = peek_token ();
+                                       if (next == Token.IDENTIFIER){
+                                               token ();
+                                               next = peek_token ();
+                                               if (next == Token.COMMA){
+                                                       token ();
+                                                       continue;
+                                               }
+                                               if (next == Token.CLOSE_PARENS)
+                                                       return true;
+                                       }
+                               }
+                               return false;
+                       }
+               }
+
+               int parsing_generic_less_than = 0;
+               
                int is_punct (char c, ref bool doread)
                {
                        int d;
@@ -645,7 +844,17 @@ namespace Mono.CSharp
                        case ']':
                                return Token.CLOSE_BRACKET;
                        case '(':
-                               return Token.OPEN_PARENS;
+                               if (linq){
+                                       PushPosition ();
+                                       bool have_lambda_parameter = parse_lambda_parameters ();
+                                       PopPosition ();
+                                       
+                                       if (have_lambda_parameter)
+                                               return Token.OPEN_PARENS_LAMBDA;
+                                       else
+                                               return Token.OPEN_PARENS;
+                               } else
+                                       return Token.OPEN_PARENS;
                        case ')': {
                                if (deambiguate_close_parens == 0)
                                        return Token.CLOSE_PARENS;
@@ -654,16 +863,8 @@ namespace Mono.CSharp
 
                                PushPosition ();
 
-                               // Backup preprocessor flow data because we'll restore cursor possition
-                               Stack ifstack_backup = null;
-                               if (ifstack != null && ifstack.Count != 0)
-                                       ifstack_backup = (Stack)ifstack.Clone ();
-
                                int new_token = xtoken ();
 
-                               if (ifstack_backup != null)
-                                       ifstack = ifstack_backup;
-
                                PopPosition ();
 
                                if (new_token == Token.OPEN_PARENS)
@@ -709,7 +910,6 @@ namespace Mono.CSharp
                                PopPosition ();
 
                                if (is_generic_lt) {
-                                       parsing_generic_less_than++;
                                        return Token.OP_GENERICS_LT;
                                } else
                                        parsing_generic_less_than = 0;
@@ -799,6 +999,11 @@ namespace Mono.CSharp
                                        doread = true;
                                        return Token.OP_EQ;
                                }
+                               if (d == '>'){
+                                       doread = true;
+                                       val = Location;
+                                       return Token.ARROW;
+                               }
                                return Token.ASSIGN;
                        }
 
@@ -1581,15 +1786,14 @@ namespace Mono.CSharp
                //
                // Handles #define and #undef
                //
-               void PreProcessDefinition (bool is_define, string arg)
+               void PreProcessDefinition (bool is_define, string arg, bool caller_is_taking)
                {
                        if (arg.Length == 0 || arg == "true" || arg == "false"){
                                Report.Error (1001, Location, "Missing identifer to pre-processor directive");
                                return;
                        }
 
-                       char[] whitespace = { ' ', '\t' };
-                       if (arg.IndexOfAny (whitespace) != -1){
+                       if (arg.IndexOfAny (simple_whitespaces) != -1){
                                Error_EndLineExpected ();
                                return;
                        }
@@ -1604,6 +1808,9 @@ namespace Mono.CSharp
                                }
                        }
 
+                       if (!caller_is_taking)
+                               return;
+
                        if (is_define){
                                if (defines == null)
                                        defines = new Hashtable ();
@@ -1894,7 +2101,7 @@ namespace Mono.CSharp
                        // Eat any trailing whitespaces and single-line comments
                        if (arg.IndexOf ("//") != -1)
                                arg = arg.Substring (0, arg.IndexOf ("//"));
-                       arg = arg.Trim (' ', '\t');
+                       arg = arg.Trim (simple_whitespaces);
 
                        //
                        // The first group of pre-processing instructions is always processed
@@ -1964,7 +2171,7 @@ namespace Mono.CSharp
                                        Error_UnexpectedDirective ("no #if for this #elif");
                                        return true;
                                } else {
-                                       int state = (int) ifstack.Peek ();
+                                       int state = (int) ifstack.Pop ();
 
                                        if ((state & REGION) != 0) {
                                                Report.Error (1038, Location, "#endregion directive expected");
@@ -1976,15 +2183,18 @@ namespace Mono.CSharp
                                                return true;
                                        }
 
-                                       if ((state & TAKING) != 0)
+                                       if ((state & TAKING) != 0) {
+                                               ifstack.Push (0);
                                                return false;
+                                       }
 
                                        if (eval (arg) && ((state & PARENT_TAKING) != 0)){
-                                               state = (int) ifstack.Pop ();
                                                ifstack.Push (state | TAKING);
                                                return true;
-                                       } else 
-                                               return false;
+                                       }
+
+                                       ifstack.Push (state);
+                                       return false;
                                }
 
                        case "else":
@@ -2030,7 +2240,7 @@ namespace Mono.CSharp
                                                Error_TokensSeen ();
                                                return caller_is_taking;
                                        }
-                                       PreProcessDefinition (true, arg);
+                                       PreProcessDefinition (true, arg, caller_is_taking);
                                        return caller_is_taking;
 
                                case "undef":
@@ -2038,7 +2248,7 @@ namespace Mono.CSharp
                                                Error_TokensSeen ();
                                                return caller_is_taking;
                                        }
-                                       PreProcessDefinition (false, arg);
+                                       PreProcessDefinition (false, arg, caller_is_taking);
                                        return caller_is_taking;
                        }
 
@@ -2059,7 +2269,7 @@ namespace Mono.CSharp
 
                        case "pragma":
                                if (RootContext.Version == LanguageVersion.ISO_1) {
-                                       Report.FeatureIsNotStandardized (Location, "#pragma");
+                                       Report.FeatureIsNotISO1 (Location, "#pragma");
                                        return true;
                                }
 
@@ -2140,7 +2350,9 @@ namespace Mono.CSharp
                                if (ok)
                                        return res;
 
-//                             Report.Error (267, Location, "The `partial' modifier can be used only immediately before keyword `class', `struct', or `interface'");
+                               if (next_token < Token.LAST_KEYWORD)
+                                       Report.Error (267, Location, "The `partial' modifier can be used only immediately before keyword `class', `struct', or `interface'");
+
                                val = new LocatedToken (Location, "partial");
                                return Token.IDENTIFIER;
                        }
@@ -2236,10 +2448,14 @@ namespace Mono.CSharp
 
                        // Whether we have seen comments on the current line
                        bool comments_seen = false;
-                       
                        val = null;
                        for (;(c = getChar ()) != -1;) {
-                               if (c == ' ' || c == '\t' || c == '\f' || c == '\v' || c == 0xa0)
+                               if (c == '\t'){
+                                       col = ((col + 8) / 8) * 8;
+                                       continue;
+                               }
+                               
+                               if (c == ' ' || c == '\f' || c == '\v' || c == 0xa0 || c == 0)
                                        continue;
 
                                if (c == '\r') {
@@ -2270,8 +2486,8 @@ namespace Mono.CSharp
                                                        }
                                                }
                                                while ((d = getChar ()) != -1 && (d != '\n') && d != '\r')
-                                               if (d == '\n'){
-                                               }
+                                                       if (d == '\n'){
+                                                       }
                                                any_token_seen |= tokens_seen;
                                                tokens_seen = false;
                                                comments_seen = false;