This commit was manufactured by cvs2svn to create branch 'mono-1-0'.
[mono.git] / mcs / gmcs / cs-tokenizer.cs
index 548fa287d26279535c2381605b2a78531fa741d0..0fdc84b6ce5fe49dc9c9f9832b9d4ac230b0fd3b 100755 (executable)
@@ -1,3 +1,4 @@
+// -*- coding: dos -*-\r
 //\r
 // cs-tokenizer.cs: The Tokenizer for the C# compiler\r
 //                  This also implements the preprocessor\r
@@ -218,6 +219,7 @@ namespace Mono.CSharp
                {\r
                        keywords = new CharArrayHashtable [64];\r
 \r
+                       AddKeyword ("__arglist", Token.ARGLIST);\r
                        AddKeyword ("abstract", Token.ABSTRACT);\r
                        AddKeyword ("as", Token.AS);\r
                        AddKeyword ("add", Token.ADD);\r
@@ -384,14 +386,29 @@ namespace Mono.CSharp
                        identifiers = null;\r
                }\r
 \r
-               bool is_identifier_start_character (char c)\r
+               static bool is_identifier_start_character (char c)\r
                {\r
                        return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_' || Char.IsLetter (c);\r
                }\r
 \r
-               bool is_identifier_part_character (char c)\r
+               static bool is_identifier_part_character (char c)\r
                {\r
                        return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= '0' && c <= '9') || Char.IsLetter (c);\r
+               }
+               
+               public static bool IsValidIdentifier (string s)
+               {
+                       if (s == null || s.Length == 0)
+                               return false;
+                       
+                       if (!is_identifier_start_character (s [0]))
+                               return false;
+                       
+                       for (int i = 1; i < s.Length; i ++)
+                               if (! is_identifier_part_character (s [i]))
+                                       return false;
+                       
+                       return true;
                }\r
 \r
                bool parse_less_than ()\r
@@ -426,7 +443,7 @@ namespace Mono.CSharp
 \r
                        if (the_token == Token.OP_GENERICS_GT)\r
                                return true;\r
-                       else if (the_token == Token.COMMA)\r
+                       else if ((the_token == Token.COMMA) || (the_token == Token.DOT))\r
                                goto start;\r
                        else if (the_token == Token.OP_GENERICS_LT) {\r
                                if (!parse_less_than ())\r
@@ -1174,7 +1191,19 @@ namespace Mono.CSharp
                public int token ()\r
                 {\r
                        current_token = xtoken ();\r
-                        return current_token;\r
+\r
+                       if (current_token != Token.DEFAULT)\r
+                               return current_token;\r
+\r
+                       int c = consume_whitespace ();\r
+                       if (c == -1)\r
+                               current_token = Token.ERROR;\r
+                       else if (c == '(')\r
+                               current_token = Token.DEFAULT_OPEN_PARENS;\r
+                       else\r
+                               putback (c);\r
+\r
+                       return current_token;\r
                 }\r
 \r
                static StringBuilder static_cmd_arg = new System.Text.StringBuilder ();\r
@@ -1192,8 +1221,13 @@ namespace Mono.CSharp
                                ;\r
                                \r
                        while ((c != -1) && (c != '\n') && (c != ' ') && (c != '\t') && (c != '\r')){\r
-                               static_cmd_arg.Append ((char) c);\r
-                                c = getChar ();\r
+                               if (is_identifier_part_character ((char) c)){\r
+                                       static_cmd_arg.Append ((char) c);\r
+                                       c = getChar ();\r
+                               } else {\r
+                                       putback (c);\r
+                                       break;\r
+                               }\r
                        }\r
 \r
                        cmd = static_cmd_arg.ToString ();\r
@@ -1264,6 +1298,8 @@ namespace Mono.CSharp
                                        \r
                                        string name = arg.Substring (pos). Trim (quotes);\r
                                        ref_name = Location.LookupFile (name);\r
+                                       file_name.HasLineDirective = true;\r
+                                       ref_name.HasLineDirective = true;\r
                                        Location.Push (ref_name);\r
                                } else {\r
                                        ref_line = System.Int32.Parse (arg);\r
@@ -1759,7 +1795,7 @@ namespace Mono.CSharp
                        // Optimization: avoids doing the keyword lookup\r
                        // on uppercase letters and _\r
                        //\r
-                       if (s >= 'a'){\r
+                       if (s >= 'a' || s == '_'){\r
                                int keyword = GetKeyword (id_builder, pos);\r
                                if (keyword != -1 && !quoted)\r
                                return keyword;\r
@@ -1788,8 +1824,8 @@ namespace Mono.CSharp
 \r
                        return Token.IDENTIFIER;\r
                }\r
-               \r
-               public int xtoken ()\r
+\r
+               int consume_whitespace ()\r
                {\r
                        int t;\r
                        bool doread = false;\r
@@ -1859,22 +1895,7 @@ namespace Mono.CSharp
                                        goto is_punct_label;\r
                                }\r
 \r
-                               \r
-                               if (is_identifier_start_character ((char)c)){\r
-                                       tokens_seen = true;\r
-                                       return consume_identifier (c, false);\r
-                               }\r
-\r
                        is_punct_label:\r
-                               if ((t = is_punct ((char)c, ref doread)) != Token.ERROR){\r
-                                       tokens_seen = true;\r
-                                       if (doread){\r
-                                               getChar ();\r
-                                               col++;\r
-                                       }\r
-                                       return t;\r
-                               }\r
-\r
                                // white space\r
                                if (c == '\n'){\r
                                        line++;\r
@@ -1885,19 +1906,6 @@ namespace Mono.CSharp
                                        continue;\r
                                }\r
 \r
-                               if (c >= '0' && c <= '9'){\r
-                                       tokens_seen = true;\r
-                                       return is_number (c);\r
-                               }\r
-\r
-                               if (c == '.'){\r
-                                       tokens_seen = true;\r
-                                       int peek = peekChar ();\r
-                                       if (peek >= '0' && peek <= '9')\r
-                                               return is_number (c);\r
-                                       return Token.DOT;\r
-                               }\r
-                               \r
                                /* For now, ignore pre-processor commands */\r
                                // FIXME: In C# the '#' is not limited to appear\r
                                // on the first column.\r
@@ -1934,72 +1942,115 @@ namespace Mono.CSharp
                                                Report.Error (1027, Location, "#endif/#endregion expected");\r
                                        continue;\r
                                }\r
-                               \r
-                               if (c == '"') \r
-                                       return consume_string (false);\r
 \r
-                               if (c == '\''){\r
-                                       c = getChar ();\r
-                                       tokens_seen = true;\r
-                                       if (c == '\''){\r
-                                               error_details = "Empty character literal";\r
-                                               Report.Error (1011, Location, error_details);\r
-                                               return Token.ERROR;\r
-                                       }\r
-                                       c = escape (c);\r
-                                       if (c == -1)\r
-                                               return Token.ERROR;\r
-                                       val = new System.Char ();\r
-                                       val = (char) c;\r
-                                       c = getChar ();\r
+                               return c;\r
+                       }\r
 \r
-                                       if (c != '\''){\r
-                                               error_details = "Too many characters in character literal";\r
-                                               Report.Error (1012, Location, error_details);\r
+                       return -1;\r
+               }\r
+               \r
+               public int xtoken ()\r
+               {\r
+                       int t;\r
+                       bool doread = false;\r
+                       int c;\r
 \r
-                                               // Try to recover, read until newline or next "'"\r
-                                               while ((c = getChar ()) != -1){\r
-                                                       if (c == '\n' || c == '\''){\r
-                                                               line++;\r
-                                                               ref_line++;\r
-                                                               col = 0;\r
-                                                               break;\r
-                                                       } else\r
-                                                               col++;\r
-                                                       \r
-                                               }\r
-                                               return Token.ERROR;\r
-                                       }\r
-                                       return Token.LITERAL_CHARACTER;\r
-                               }\r
-                               \r
-                               if (c == '@') {\r
-                                       c = getChar ();\r
-                                       if (c == '"') {\r
-                                               tokens_seen = true;\r
-                                               return consume_string (true);\r
-                                       } else if (is_identifier_start_character ((char) c)){\r
-                                               return consume_identifier (c, true);\r
-                                       } else {\r
-                                               Report.Error (1033, Location, "'@' must be followed by string constant or identifier");\r
-                                       }\r
+                       val = null;\r
+                       // optimization: eliminate col and implement #directive semantic correctly.\r
+\r
+                       c = consume_whitespace ();\r
+                       if (c == -1)\r
+                               return Token.EOF;\r
+\r
+                       if (is_identifier_start_character ((char)c)){\r
+                               tokens_seen = true;\r
+                               return consume_identifier (c, false);\r
+                       }\r
+\r
+               is_punct_label:\r
+                       if ((t = is_punct ((char)c, ref doread)) != Token.ERROR){\r
+                               tokens_seen = true;\r
+                               if (doread){\r
+                                       getChar ();\r
+                                       col++;\r
                                }\r
+                               return t;\r
+                       }\r
 \r
-                               if (c == '#') {\r
-                                       error_details = "Preprocessor directives must appear as the first non-whitespace " +\r
-                                               "character on a line.";\r
+                       if (c >= '0' && c <= '9'){\r
+                               tokens_seen = true;\r
+                               return is_number (c);\r
+                       }\r
+\r
+                       if (c == '.'){\r
+                               tokens_seen = true;\r
+                               int peek = peekChar ();\r
+                               if (peek >= '0' && peek <= '9')\r
+                                       return is_number (c);\r
+                               return Token.DOT;\r
+                       }\r
 \r
-                                       Report.Error (1040, Location, error_details);\r
+                       if (c == '"') \r
+                               return consume_string (false);\r
 \r
+                       if (c == '\''){\r
+                               c = getChar ();\r
+                               tokens_seen = true;\r
+                               if (c == '\''){\r
+                                       error_details = "Empty character literal";\r
+                                       Report.Error (1011, Location, error_details);\r
                                        return Token.ERROR;\r
                                }\r
+                               c = escape (c);\r
+                               if (c == -1)\r
+                                       return Token.ERROR;\r
+                               val = new System.Char ();\r
+                               val = (char) c;\r
+                               c = getChar ();\r
+\r
+                               if (c != '\''){\r
+                                       error_details = "Too many characters in character literal";\r
+                                       Report.Error (1012, Location, error_details);\r
 \r
-                               error_details = ((char)c).ToString ();\r
+                                       // Try to recover, read until newline or next "'"\r
+                                       while ((c = getChar ()) != -1){\r
+                                               if (c == '\n' || c == '\''){\r
+                                                       line++;\r
+                                                       ref_line++;\r
+                                                       col = 0;\r
+                                                       break;\r
+                                               } else\r
+                                                       col++;\r
+                                       }\r
+                                       return Token.ERROR;\r
+                               }\r
+                               return Token.LITERAL_CHARACTER;\r
+                       }\r
                                \r
+                       if (c == '@') {\r
+                               c = getChar ();\r
+                               if (c == '"') {\r
+                                       tokens_seen = true;\r
+                                       return consume_string (true);\r
+                               } else if (is_identifier_start_character ((char) c)){\r
+                                       return consume_identifier (c, true);\r
+                               } else {\r
+                                       Report.Error (1033, Location, "'@' must be followed by string constant or identifier");\r
+                               }\r
+                       }\r
+\r
+                       if (c == '#') {\r
+                               error_details = "Preprocessor directives must appear as the first non-whitespace " +\r
+                                       "character on a line.";\r
+\r
+                               Report.Error (1040, Location, error_details);\r
+\r
                                return Token.ERROR;\r
                        }\r
 \r
-                       return Token.EOF;\r
+                       error_details = ((char)c).ToString ();\r
+\r
+                       return Token.ERROR;\r
                }\r
 \r
                public void cleanup ()\r