2007-03-21 Mike Kestner <mkestner@novell.com>
[mono.git] / mcs / mcs / cs-tokenizer.cs
index 5751834068bee5af85065143c61bd0380befec68..ed10088f42f6983fef2bc7a6ca663b43d6e59d84 100644 (file)
 // (C) 2001, 2002 Ximian, Inc (http://www.ximian.com)
 // (C) 2004 Novell, Inc
 //
-
-/*
- * TODO:
- *   Make sure we accept the proper Unicode ranges, per the spec.
- *   Report error 1032
-*/
+//
 
 using System;
 using System.Text;
@@ -45,10 +40,13 @@ namespace Mono.CSharp
                bool handle_assembly = false;
                bool handle_constraints = false;
                bool handle_typeof = false;
+               bool query_parsing;
                Location current_location;
                Location current_comment_location = Location.Null;
                ArrayList escapedIdentifiers = new ArrayList ();
 
+               static bool linq;
+
                //
                // XML documentation buffer. The save point is used to divide
                // comments on types and comments on members.
@@ -164,6 +162,12 @@ namespace Mono.CSharp
                        }
                }
 
+               public static bool LinqEnabled {
+                       set {
+                               linq = value;
+                       }
+               }
+
                public bool TypeOfParsing {
                        get {
                                return handle_typeof;
@@ -174,6 +178,12 @@ namespace Mono.CSharp
                        }
                }
 
+               public bool QueryParsing {
+                       set {
+                               query_parsing = value;
+                       }
+               }
+
                public XmlCommentState doc_state {
                        get { return xmlDocState; }
                        set {
@@ -197,7 +207,7 @@ namespace Mono.CSharp
                // Class variables
                // 
                static CharArrayHashtable[] keywords;
-               static Hashtable keywordStrings = new Hashtable ();
+               static Hashtable keywordStrings;
                static NumberStyles styles;
                static NumberFormatInfo csharp_format_info;
                
@@ -270,9 +280,9 @@ namespace Mono.CSharp
                        public int putback_char;
                        public int previous_col;
                        public Stack ifstack;
-#if GMCS_SOURCES
                        public int parsing_generic_less_than;
-#endif                 
+                       public int current_token;
+
                        public Position (Tokenizer t)
                        {
                                position = t.reader.Position;
@@ -282,9 +292,8 @@ namespace Mono.CSharp
                                previous_col = t.previous_col;
                                if (t.ifstack != null && t.ifstack.Count != 0)
                                        ifstack = (Stack)t.ifstack.Clone ();
-#if GMCS_SOURCES
                                parsing_generic_less_than = t.parsing_generic_less_than;
-#endif
+                               current_token = t.current_token;
                        }
                }
                
@@ -303,6 +312,8 @@ namespace Mono.CSharp
                        putback_char = p.putback_char;
                        previous_col = p.previous_col;
                        ifstack = p.ifstack;
+                       parsing_generic_less_than = p.parsing_generic_less_than;
+                       current_token = p.current_token;
                }
 
                // Do not reset the position, ignore it.
@@ -311,7 +322,8 @@ namespace Mono.CSharp
                        position_stack.Pop ();
                }
                
-               static void AddKeyword (string kw, int token) {
+               static void AddKeyword (string kw, int token)
+               {
                        keywordStrings.Add (kw, kw);
                        if (keywords [kw.Length] == null) {
                                keywords [kw.Length] = new CharArrayHashtable (kw.Length);
@@ -321,6 +333,7 @@ namespace Mono.CSharp
 
                static void InitTokens ()
                {
+                       keywordStrings = new Hashtable ();
                        keywords = new CharArrayHashtable [64];
 
                        AddKeyword ("__arglist", Token.ARGLIST);
@@ -409,6 +422,21 @@ namespace Mono.CSharp
                        AddKeyword ("partial", Token.PARTIAL);
 #if GMCS_SOURCE
                        AddKeyword ("where", Token.WHERE);
+
+                       if (linq) {
+                               AddKeyword ("from", Token.FROM);
+                               AddKeyword ("join", Token.JOIN);
+                               AddKeyword ("on", Token.ON);
+                               AddKeyword ("equals", Token.EQUALS);
+                               AddKeyword ("select", Token.SELECT);
+                               AddKeyword ("group", Token.GROUP);
+                               AddKeyword ("by", Token.BY);
+                               AddKeyword ("let", Token.LET);
+                               AddKeyword ("orderby", Token.ORDERBY);
+                               AddKeyword ("ascending", Token.ASCENDING);
+                               AddKeyword ("descending", Token.DESCENDING);
+                               AddKeyword ("into", Token.INTO);
+                       }
 #endif
                }
 
@@ -416,11 +444,16 @@ namespace Mono.CSharp
                // Class initializer
                // 
                static Tokenizer ()
+               {
+                       Reset ();
+               }
+
+               public static void Reset ()
                {
                        InitTokens ();
                        csharp_format_info = NumberFormatInfo.InvariantInfo;
                        styles = NumberStyles.Float;
-                       
+
                        string_builder = new System.Text.StringBuilder ();
                }
 
@@ -447,7 +480,21 @@ namespace Mono.CSharp
                        if (handle_assembly == false && res == Token.ASSEMBLY)
                                return -1;
 #if GMCS_SOURCE
-                       if (handle_constraints == false && res == Token.WHERE)
+                       if (linq) {
+                               if (res == Token.FROM &&
+                                       (current_token == Token.ASSIGN || current_token == Token.OPEN_BRACKET ||
+                                        current_token == Token.RETURN || current_token == Token.IN)) {
+                                       query_parsing = true;
+                                       return res;
+                               }
+
+                               if (!query_parsing && res > Token.QUERY_FIRST_TOKEN && res < Token.QUERY_LAST_TOKEN)
+                                       return -1;
+
+                               return res;
+                       }
+
+                       if (!handle_constraints && res == Token.WHERE)
                                return -1;
 #endif
                        return res;
@@ -498,7 +545,8 @@ namespace Mono.CSharp
 
                static bool is_identifier_part_character (char c)
                {
-                       return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= '0' && c <= '9') || Char.IsLetter (c);
+                       return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= '0' && c <= '9') ||
+                               Char.IsLetter (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation;
                }
 
                public static bool IsKeyword (string s)
@@ -521,7 +569,6 @@ namespace Mono.CSharp
                        return true;
                }
 
-#if GMCS_SOURCE
                bool parse_generic_dimension (out int dimension)
                {
                        dimension = 1;
@@ -597,8 +644,7 @@ namespace Mono.CSharp
                        return false;
                }
 
-               int parsing_generic_less_than = 0;
-
+#if GMCS_SOURCE
                public void PutbackNullable ()
                {
                        if (nullable_pos < 0)
@@ -627,6 +673,155 @@ namespace Mono.CSharp
                                nullable_pos = -1;
                }
 #endif
+               
+               public int peek_token ()
+               {
+                       int the_token;
+
+                       PushPosition ();
+                       the_token = token ();
+                       PopPosition ();
+                       
+                       return the_token;
+               }
+               
+               bool parse_namespace_or_typename (int next)
+               {
+                       if (next == -1)
+                               next = peek_token ();
+                       while (next == Token.IDENTIFIER){
+                               token ();
+                         again:
+                               next = peek_token ();
+                               if (next == Token.DOT || next == Token.DOUBLE_COLON){
+                                       token ();
+                                       next = peek_token ();
+                                       continue;
+                               }
+                               if (next == Token.OP_GENERICS_LT){
+                                       token ();
+                                       if (!parse_less_than ())
+                                               return false;
+                                       goto again;
+                               }
+                               return true;
+                       }
+
+                       return false;
+               }
+
+               bool is_simple_type (int token)
+               {
+                       return  (token == Token.BOOL ||
+                                token == Token.DECIMAL ||
+                                token == Token.SBYTE ||
+                                token == Token.BYTE ||
+                                token == Token.SHORT ||
+                                token == Token.USHORT ||
+                                token == Token.INT ||
+                                token == Token.UINT ||
+                                token == Token.LONG ||
+                                token == Token.ULONG ||
+                                token == Token.CHAR ||
+                                token == Token.FLOAT ||
+                                token == Token.DOUBLE);
+               }
+
+               bool is_builtin_reference_type (int token)
+               {
+                       return (token == Token.OBJECT || token == Token.STRING);
+               }
+
+               bool parse_opt_rank (int next)
+               {
+                       while (true){
+                               if (next != Token.OPEN_BRACKET)
+                                       return true;
+
+                               token ();
+                               while (true){
+                                       next = token ();
+                                       if (next == Token.CLOSE_BRACKET){
+                                               next = peek_token ();
+                                               break;
+                                       }
+                                       if (next == Token.COMMA)
+                                               continue;
+                                       
+                                       return false;
+                               }
+                       }
+               }
+                       
+               bool parse_type ()
+               {
+                       int next = peek_token ();
+                       
+                       if (is_simple_type (next)){
+                               token ();
+                               next = peek_token ();
+                               if (next == Token.INTERR)
+                                       token ();
+                               return parse_opt_rank (peek_token ());
+                       }
+                       if (parse_namespace_or_typename (next)){
+                               next = peek_token ();
+                               if (next == Token.INTERR)
+                                       token ();
+                               return parse_opt_rank (peek_token ());
+                       } else if (is_builtin_reference_type (next)){
+                               token ();
+                               return parse_opt_rank (peek_token ());
+                       }
+                       
+                       return false;
+               }
+               
+               //
+               // Invoked after '(' has been seen and tries to parse:
+               // type identifier [, type identifier]*
+               //
+               // if this is the case, instead of returning an
+               // OPEN_PARENS token we return a special token that
+               // triggers lambda parsing.
+               //
+               // This is needed because we can not introduce the
+               // explicitly_typed_lambda_parameter_list after a '(' in the
+               // grammar without introducing reduce/reduce conflicts.
+               //
+               // We need to parse a type and if it is followed by an
+               // identifier, we know it has to be parsed as a lambda
+               // expression.  
+               //
+               // the type expression can be prefixed with `ref' or `out'
+               //
+               public bool parse_lambda_parameters ()
+               {
+                       while (true){
+                               int next = peek_token ();
+
+                               if (next == Token.REF || next == Token.OUT)
+                                       token ();
+                                                
+                               if (parse_type ()){
+                                       next = peek_token ();
+                                       if (next == Token.IDENTIFIER){
+                                               token ();
+                                               next = peek_token ();
+                                               if (next == Token.COMMA){
+                                                       token ();
+                                                       continue;
+                                               }
+                                               if (next == Token.CLOSE_PARENS)
+                                                       return true;
+                                       }
+                               }
+                               return false;
+                       }
+               }
+
+               int parsing_generic_less_than = 0;
+               
                int is_punct (char c, ref bool doread)
                {
                        int d;
@@ -649,7 +844,17 @@ namespace Mono.CSharp
                        case ']':
                                return Token.CLOSE_BRACKET;
                        case '(':
-                               return Token.OPEN_PARENS;
+                               if (linq){
+                                       PushPosition ();
+                                       bool have_lambda_parameter = parse_lambda_parameters ();
+                                       PopPosition ();
+                                       
+                                       if (have_lambda_parameter)
+                                               return Token.OPEN_PARENS_LAMBDA;
+                                       else
+                                               return Token.OPEN_PARENS;
+                               } else
+                                       return Token.OPEN_PARENS;
                        case ')': {
                                if (deambiguate_close_parens == 0)
                                        return Token.CLOSE_PARENS;
@@ -705,7 +910,6 @@ namespace Mono.CSharp
                                PopPosition ();
 
                                if (is_generic_lt) {
-                                       parsing_generic_less_than++;
                                        return Token.OP_GENERICS_LT;
                                } else
                                        parsing_generic_less_than = 0;
@@ -795,6 +999,11 @@ namespace Mono.CSharp
                                        doread = true;
                                        return Token.OP_EQ;
                                }
+                               if (d == '>'){
+                                       doread = true;
+                                       val = Location;
+                                       return Token.ARROW;
+                               }
                                return Token.ASSIGN;
                        }
 
@@ -2060,7 +2269,7 @@ namespace Mono.CSharp
 
                        case "pragma":
                                if (RootContext.Version == LanguageVersion.ISO_1) {
-                                       Report.FeatureIsNotStandardized (Location, "#pragma");
+                                       Report.FeatureIsNotISO1 (Location, "#pragma");
                                        return true;
                                }
 
@@ -2239,10 +2448,14 @@ namespace Mono.CSharp
 
                        // Whether we have seen comments on the current line
                        bool comments_seen = false;
-                       
                        val = null;
                        for (;(c = getChar ()) != -1;) {
-                               if (c == ' ' || c == '\t' || c == '\f' || c == '\v' || c == 0xa0)
+                               if (c == '\t'){
+                                       col = ((col + 8) / 8) * 8;
+                                       continue;
+                               }
+                               
+                               if (c == ' ' || c == '\f' || c == '\v' || c == 0xa0 || c == 0)
                                        continue;
 
                                if (c == '\r') {
@@ -2273,8 +2486,8 @@ namespace Mono.CSharp
                                                        }
                                                }
                                                while ((d = getChar ()) != -1 && (d != '\n') && d != '\r')
-                                               if (d == '\n'){
-                                               }
+                                                       if (d == '\n'){
+                                                       }
                                                any_token_seen |= tokens_seen;
                                                tokens_seen = false;
                                                comments_seen = false;