+// -*- coding: dos -*-\r
//\r
// cs-tokenizer.cs: The Tokenizer for the C# compiler\r
// This also implements the preprocessor\r
{\r
keywords = new CharArrayHashtable [64];\r
\r
+ AddKeyword ("__arglist", Token.ARGLIST);\r
AddKeyword ("abstract", Token.ABSTRACT);\r
AddKeyword ("as", Token.AS);\r
AddKeyword ("add", Token.ADD);\r
identifiers = null;\r
}\r
\r
- bool is_identifier_start_character (char c)\r
+ static bool is_identifier_start_character (char c)\r
{\r
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_' || Char.IsLetter (c);\r
}\r
\r
- bool is_identifier_part_character (char c)\r
+ static bool is_identifier_part_character (char c)\r
{\r
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= '0' && c <= '9') || Char.IsLetter (c);\r
+ }
+
+ public static bool IsValidIdentifier (string s)
+ {
+ if (s == null || s.Length == 0)
+ return false;
+
+ if (!is_identifier_start_character (s [0]))
+ return false;
+
+ for (int i = 1; i < s.Length; i ++)
+ if (! is_identifier_part_character (s [i]))
+ return false;
+
+ return true;
}\r
\r
bool parse_less_than ()\r
\r
if (the_token == Token.OP_GENERICS_GT)\r
return true;\r
- else if (the_token == Token.COMMA)\r
+ else if ((the_token == Token.COMMA) || (the_token == Token.DOT))\r
goto start;\r
else if (the_token == Token.OP_GENERICS_LT) {\r
if (!parse_less_than ())\r
public int token ()\r
{\r
current_token = xtoken ();\r
- return current_token;\r
+\r
+ if (current_token != Token.DEFAULT)\r
+ return current_token;\r
+\r
+ int c = consume_whitespace ();\r
+ if (c == -1)\r
+ current_token = Token.ERROR;\r
+ else if (c == '(')\r
+ current_token = Token.DEFAULT_OPEN_PARENS;\r
+ else\r
+ putback (c);\r
+\r
+ return current_token;\r
}\r
\r
static StringBuilder static_cmd_arg = new System.Text.StringBuilder ();\r
;\r
\r
while ((c != -1) && (c != '\n') && (c != ' ') && (c != '\t') && (c != '\r')){\r
- static_cmd_arg.Append ((char) c);\r
- c = getChar ();\r
+ if (is_identifier_part_character ((char) c)){\r
+ static_cmd_arg.Append ((char) c);\r
+ c = getChar ();\r
+ } else {\r
+ putback (c);\r
+ break;\r
+ }\r
}\r
\r
cmd = static_cmd_arg.ToString ();\r
\r
string name = arg.Substring (pos). Trim (quotes);\r
ref_name = Location.LookupFile (name);\r
+ file_name.HasLineDirective = true;\r
+ ref_name.HasLineDirective = true;\r
Location.Push (ref_name);\r
} else {\r
ref_line = System.Int32.Parse (arg);\r
// Optimization: avoids doing the keyword lookup\r
// on uppercase letters and _\r
//\r
- if (s >= 'a'){\r
+ if (s >= 'a' || s == '_'){\r
int keyword = GetKeyword (id_builder, pos);\r
if (keyword != -1 && !quoted)\r
return keyword;\r
\r
return Token.IDENTIFIER;\r
}\r
- \r
- public int xtoken ()\r
+\r
+ int consume_whitespace ()\r
{\r
int t;\r
bool doread = false;\r
goto is_punct_label;\r
}\r
\r
- \r
- if (is_identifier_start_character ((char)c)){\r
- tokens_seen = true;\r
- return consume_identifier (c, false);\r
- }\r
-\r
is_punct_label:\r
- if ((t = is_punct ((char)c, ref doread)) != Token.ERROR){\r
- tokens_seen = true;\r
- if (doread){\r
- getChar ();\r
- col++;\r
- }\r
- return t;\r
- }\r
-\r
// white space\r
if (c == '\n'){\r
line++;\r
continue;\r
}\r
\r
- if (c >= '0' && c <= '9'){\r
- tokens_seen = true;\r
- return is_number (c);\r
- }\r
-\r
- if (c == '.'){\r
- tokens_seen = true;\r
- int peek = peekChar ();\r
- if (peek >= '0' && peek <= '9')\r
- return is_number (c);\r
- return Token.DOT;\r
- }\r
- \r
/* For now, ignore pre-processor commands */\r
// FIXME: In C# the '#' is not limited to appear\r
// on the first column.\r
Report.Error (1027, Location, "#endif/#endregion expected");\r
continue;\r
}\r
- \r
- if (c == '"') \r
- return consume_string (false);\r
\r
- if (c == '\''){\r
- c = getChar ();\r
- tokens_seen = true;\r
- if (c == '\''){\r
- error_details = "Empty character literal";\r
- Report.Error (1011, Location, error_details);\r
- return Token.ERROR;\r
- }\r
- c = escape (c);\r
- if (c == -1)\r
- return Token.ERROR;\r
- val = new System.Char ();\r
- val = (char) c;\r
- c = getChar ();\r
+ return c;\r
+ }\r
\r
- if (c != '\''){\r
- error_details = "Too many characters in character literal";\r
- Report.Error (1012, Location, error_details);\r
+ return -1;\r
+ }\r
+ \r
+ public int xtoken ()\r
+ {\r
+ int t;\r
+ bool doread = false;\r
+ int c;\r
\r
- // Try to recover, read until newline or next "'"\r
- while ((c = getChar ()) != -1){\r
- if (c == '\n' || c == '\''){\r
- line++;\r
- ref_line++;\r
- col = 0;\r
- break;\r
- } else\r
- col++;\r
- \r
- }\r
- return Token.ERROR;\r
- }\r
- return Token.LITERAL_CHARACTER;\r
- }\r
- \r
- if (c == '@') {\r
- c = getChar ();\r
- if (c == '"') {\r
- tokens_seen = true;\r
- return consume_string (true);\r
- } else if (is_identifier_start_character ((char) c)){\r
- return consume_identifier (c, true);\r
- } else {\r
- Report.Error (1033, Location, "'@' must be followed by string constant or identifier");\r
- }\r
+ val = null;\r
+ // optimization: eliminate col and implement #directive semantic correctly.\r
+\r
+ c = consume_whitespace ();\r
+ if (c == -1)\r
+ return Token.EOF;\r
+\r
+ if (is_identifier_start_character ((char)c)){\r
+ tokens_seen = true;\r
+ return consume_identifier (c, false);\r
+ }\r
+\r
+ is_punct_label:\r
+ if ((t = is_punct ((char)c, ref doread)) != Token.ERROR){\r
+ tokens_seen = true;\r
+ if (doread){\r
+ getChar ();\r
+ col++;\r
}\r
+ return t;\r
+ }\r
\r
- if (c == '#') {\r
- error_details = "Preprocessor directives must appear as the first non-whitespace " +\r
- "character on a line.";\r
+ if (c >= '0' && c <= '9'){\r
+ tokens_seen = true;\r
+ return is_number (c);\r
+ }\r
+\r
+ if (c == '.'){\r
+ tokens_seen = true;\r
+ int peek = peekChar ();\r
+ if (peek >= '0' && peek <= '9')\r
+ return is_number (c);\r
+ return Token.DOT;\r
+ }\r
\r
- Report.Error (1040, Location, error_details);\r
+ if (c == '"') \r
+ return consume_string (false);\r
\r
+ if (c == '\''){\r
+ c = getChar ();\r
+ tokens_seen = true;\r
+ if (c == '\''){\r
+ error_details = "Empty character literal";\r
+ Report.Error (1011, Location, error_details);\r
return Token.ERROR;\r
}\r
+ c = escape (c);\r
+ if (c == -1)\r
+ return Token.ERROR;\r
+ val = new System.Char ();\r
+ val = (char) c;\r
+ c = getChar ();\r
+\r
+ if (c != '\''){\r
+ error_details = "Too many characters in character literal";\r
+ Report.Error (1012, Location, error_details);\r
\r
- error_details = ((char)c).ToString ();\r
+ // Try to recover, read until newline or next "'"\r
+ while ((c = getChar ()) != -1){\r
+ if (c == '\n' || c == '\''){\r
+ line++;\r
+ ref_line++;\r
+ col = 0;\r
+ break;\r
+ } else\r
+ col++;\r
+ }\r
+ return Token.ERROR;\r
+ }\r
+ return Token.LITERAL_CHARACTER;\r
+ }\r
\r
+ if (c == '@') {\r
+ c = getChar ();\r
+ if (c == '"') {\r
+ tokens_seen = true;\r
+ return consume_string (true);\r
+ } else if (is_identifier_start_character ((char) c)){\r
+ return consume_identifier (c, true);\r
+ } else {\r
+ Report.Error (1033, Location, "'@' must be followed by string constant or identifier");\r
+ }\r
+ }\r
+\r
+ if (c == '#') {\r
+ error_details = "Preprocessor directives must appear as the first non-whitespace " +\r
+ "character on a line.";\r
+\r
+ Report.Error (1040, Location, error_details);\r
+\r
return Token.ERROR;\r
}\r
\r
- return Token.EOF;\r
+ error_details = ((char)c).ToString ();\r
+\r
+ return Token.ERROR;\r
}\r
\r
public void cleanup ()\r