+// -*- coding: dos -*-\r
//\r
// cs-tokenizer.cs: The Tokenizer for the C# compiler\r
// This also implements the preprocessor\r
\r
public class Tokenizer : yyParser.yyInput\r
{\r
- StreamReader reader;\r
+ SeekableStreamReader reader;\r
public SourceFile ref_name;\r
public SourceFile file_name;\r
public int ref_line = 1;\r
//\r
// Class variables\r
// \r
- static Hashtable keywords;\r
+ static CharArrayHashtable[] keywords;\r
static NumberStyles styles;\r
static NumberFormatInfo csharp_format_info;\r
\r
const int max_id_size = 512;\r
static char [] id_builder = new char [max_id_size];\r
\r
+ static CharArrayHashtable [] identifiers = new CharArrayHashtable [max_id_size + 1];\r
+\r
const int max_number_size = 128;\r
static char [] number_builder = new char [max_number_size];\r
static int number_pos;\r
}\r
}\r
\r
+ static void AddKeyword (string kw, int token) {\r
+ if (keywords [kw.Length] == null) {\r
+ keywords [kw.Length] = new CharArrayHashtable (kw.Length);\r
+ }\r
+ keywords [kw.Length] [kw.ToCharArray ()] = token;\r
+ }\r
+\r
static void InitTokens ()\r
{\r
- keywords = new Hashtable ();\r
-\r
- keywords.Add ("abstract", Token.ABSTRACT);\r
- keywords.Add ("as", Token.AS);\r
- keywords.Add ("add", Token.ADD);\r
- keywords.Add ("assembly", Token.ASSEMBLY);\r
- keywords.Add ("base", Token.BASE);\r
- keywords.Add ("bool", Token.BOOL);\r
- keywords.Add ("break", Token.BREAK);\r
- keywords.Add ("byte", Token.BYTE);\r
- keywords.Add ("case", Token.CASE);\r
- keywords.Add ("catch", Token.CATCH);\r
- keywords.Add ("char", Token.CHAR);\r
- keywords.Add ("checked", Token.CHECKED);\r
- keywords.Add ("class", Token.CLASS);\r
- keywords.Add ("const", Token.CONST);\r
- keywords.Add ("continue", Token.CONTINUE);\r
- keywords.Add ("decimal", Token.DECIMAL);\r
- keywords.Add ("default", Token.DEFAULT);\r
- keywords.Add ("delegate", Token.DELEGATE);\r
- keywords.Add ("do", Token.DO);\r
- keywords.Add ("double", Token.DOUBLE);\r
- keywords.Add ("else", Token.ELSE);\r
- keywords.Add ("enum", Token.ENUM);\r
- keywords.Add ("event", Token.EVENT);\r
- keywords.Add ("explicit", Token.EXPLICIT);\r
- keywords.Add ("extern", Token.EXTERN);\r
- keywords.Add ("false", Token.FALSE);\r
- keywords.Add ("finally", Token.FINALLY);\r
- keywords.Add ("fixed", Token.FIXED);\r
- keywords.Add ("float", Token.FLOAT);\r
- keywords.Add ("for", Token.FOR);\r
- keywords.Add ("foreach", Token.FOREACH);\r
- keywords.Add ("goto", Token.GOTO);\r
- keywords.Add ("get", Token.GET);\r
- keywords.Add ("if", Token.IF);\r
- keywords.Add ("implicit", Token.IMPLICIT);\r
- keywords.Add ("in", Token.IN);\r
- keywords.Add ("int", Token.INT);\r
- keywords.Add ("interface", Token.INTERFACE);\r
- keywords.Add ("internal", Token.INTERNAL);\r
- keywords.Add ("is", Token.IS);\r
- keywords.Add ("lock", Token.LOCK);\r
- keywords.Add ("long", Token.LONG);\r
- keywords.Add ("namespace", Token.NAMESPACE);\r
- keywords.Add ("new", Token.NEW);\r
- keywords.Add ("null", Token.NULL);\r
- keywords.Add ("object", Token.OBJECT);\r
- keywords.Add ("operator", Token.OPERATOR);\r
- keywords.Add ("out", Token.OUT);\r
- keywords.Add ("override", Token.OVERRIDE);\r
- keywords.Add ("params", Token.PARAMS);\r
- keywords.Add ("private", Token.PRIVATE);\r
- keywords.Add ("protected", Token.PROTECTED);\r
- keywords.Add ("public", Token.PUBLIC);\r
- keywords.Add ("readonly", Token.READONLY);\r
- keywords.Add ("ref", Token.REF);\r
- keywords.Add ("remove", Token.REMOVE);\r
- keywords.Add ("return", Token.RETURN);\r
- keywords.Add ("sbyte", Token.SBYTE);\r
- keywords.Add ("sealed", Token.SEALED);\r
- keywords.Add ("set", Token.SET);\r
- keywords.Add ("short", Token.SHORT);\r
- keywords.Add ("sizeof", Token.SIZEOF);\r
- keywords.Add ("stackalloc", Token.STACKALLOC);\r
- keywords.Add ("static", Token.STATIC);\r
- keywords.Add ("string", Token.STRING);\r
- keywords.Add ("struct", Token.STRUCT);\r
- keywords.Add ("switch", Token.SWITCH);\r
- keywords.Add ("this", Token.THIS);\r
- keywords.Add ("throw", Token.THROW);\r
- keywords.Add ("true", Token.TRUE);\r
- keywords.Add ("try", Token.TRY);\r
- keywords.Add ("typeof", Token.TYPEOF);\r
- keywords.Add ("uint", Token.UINT);\r
- keywords.Add ("ulong", Token.ULONG);\r
- keywords.Add ("unchecked", Token.UNCHECKED);\r
- keywords.Add ("unsafe", Token.UNSAFE);\r
- keywords.Add ("ushort", Token.USHORT);\r
- keywords.Add ("using", Token.USING);\r
- keywords.Add ("virtual", Token.VIRTUAL);\r
- keywords.Add ("void", Token.VOID);\r
- keywords.Add ("volatile", Token.VOLATILE);\r
- keywords.Add ("while", Token.WHILE);\r
-\r
- if (RootContext.V2){\r
- keywords.Add ("__yield", Token.YIELD);\r
- keywords.Add ("yield", Token.YIELD);\r
- }\r
+ keywords = new CharArrayHashtable [64];\r
+\r
+ AddKeyword ("__arglist", Token.ARGLIST);\r
+ AddKeyword ("abstract", Token.ABSTRACT);\r
+ AddKeyword ("as", Token.AS);\r
+ AddKeyword ("add", Token.ADD);\r
+ AddKeyword ("assembly", Token.ASSEMBLY);\r
+ AddKeyword ("base", Token.BASE);\r
+ AddKeyword ("bool", Token.BOOL);\r
+ AddKeyword ("break", Token.BREAK);\r
+ AddKeyword ("byte", Token.BYTE);\r
+ AddKeyword ("case", Token.CASE);\r
+ AddKeyword ("catch", Token.CATCH);\r
+ AddKeyword ("char", Token.CHAR);\r
+ AddKeyword ("checked", Token.CHECKED);\r
+ AddKeyword ("class", Token.CLASS);\r
+ AddKeyword ("const", Token.CONST);\r
+ AddKeyword ("continue", Token.CONTINUE);\r
+ AddKeyword ("decimal", Token.DECIMAL);\r
+ AddKeyword ("default", Token.DEFAULT);\r
+ AddKeyword ("delegate", Token.DELEGATE);\r
+ AddKeyword ("do", Token.DO);\r
+ AddKeyword ("double", Token.DOUBLE);\r
+ AddKeyword ("else", Token.ELSE);\r
+ AddKeyword ("enum", Token.ENUM);\r
+ AddKeyword ("event", Token.EVENT);\r
+ AddKeyword ("explicit", Token.EXPLICIT);\r
+ AddKeyword ("extern", Token.EXTERN);\r
+ AddKeyword ("false", Token.FALSE);\r
+ AddKeyword ("finally", Token.FINALLY);\r
+ AddKeyword ("fixed", Token.FIXED);\r
+ AddKeyword ("float", Token.FLOAT);\r
+ AddKeyword ("for", Token.FOR);\r
+ AddKeyword ("foreach", Token.FOREACH);\r
+ AddKeyword ("goto", Token.GOTO);\r
+ AddKeyword ("get", Token.GET);\r
+ AddKeyword ("if", Token.IF);\r
+ AddKeyword ("implicit", Token.IMPLICIT);\r
+ AddKeyword ("in", Token.IN);\r
+ AddKeyword ("int", Token.INT);\r
+ AddKeyword ("interface", Token.INTERFACE);\r
+ AddKeyword ("internal", Token.INTERNAL);\r
+ AddKeyword ("is", Token.IS);\r
+ AddKeyword ("lock", Token.LOCK);\r
+ AddKeyword ("long", Token.LONG);\r
+ AddKeyword ("namespace", Token.NAMESPACE);\r
+ AddKeyword ("new", Token.NEW);\r
+ AddKeyword ("null", Token.NULL);\r
+ AddKeyword ("object", Token.OBJECT);\r
+ AddKeyword ("operator", Token.OPERATOR);\r
+ AddKeyword ("out", Token.OUT);\r
+ AddKeyword ("override", Token.OVERRIDE);\r
+ AddKeyword ("params", Token.PARAMS);\r
+ AddKeyword ("private", Token.PRIVATE);\r
+ AddKeyword ("protected", Token.PROTECTED);\r
+ AddKeyword ("public", Token.PUBLIC);\r
+ AddKeyword ("readonly", Token.READONLY);\r
+ AddKeyword ("ref", Token.REF);\r
+ AddKeyword ("remove", Token.REMOVE);\r
+ AddKeyword ("return", Token.RETURN);\r
+ AddKeyword ("sbyte", Token.SBYTE);\r
+ AddKeyword ("sealed", Token.SEALED);\r
+ AddKeyword ("set", Token.SET);\r
+ AddKeyword ("short", Token.SHORT);\r
+ AddKeyword ("sizeof", Token.SIZEOF);\r
+ AddKeyword ("stackalloc", Token.STACKALLOC);\r
+ AddKeyword ("static", Token.STATIC);\r
+ AddKeyword ("string", Token.STRING);\r
+ AddKeyword ("struct", Token.STRUCT);\r
+ AddKeyword ("switch", Token.SWITCH);\r
+ AddKeyword ("this", Token.THIS);\r
+ AddKeyword ("throw", Token.THROW);\r
+ AddKeyword ("true", Token.TRUE);\r
+ AddKeyword ("try", Token.TRY);\r
+ AddKeyword ("typeof", Token.TYPEOF);\r
+ AddKeyword ("uint", Token.UINT);\r
+ AddKeyword ("ulong", Token.ULONG);\r
+ AddKeyword ("unchecked", Token.UNCHECKED);\r
+ AddKeyword ("unsafe", Token.UNSAFE);\r
+ AddKeyword ("ushort", Token.USHORT);\r
+ AddKeyword ("using", Token.USING);\r
+ AddKeyword ("virtual", Token.VIRTUAL);\r
+ AddKeyword ("void", Token.VOID);\r
+ AddKeyword ("volatile", Token.VOLATILE);\r
+ AddKeyword ("while", Token.WHILE);\r
}\r
\r
//\r
string_builder = new System.Text.StringBuilder ();\r
}\r
\r
- int GetKeyword (string name)\r
+ int GetKeyword (char[] id, int id_len)\r
{\r
- object o = keywords [name];\r
+ /*\r
+ * Keywords are stored in an array of hashtables grouped by their\r
+ * length.\r
+ */\r
+\r
+ if ((id_len >= keywords.Length) || (keywords [id_len] == null))\r
+ return -1;\r
+ object o = keywords [id_len] [id];\r
\r
if (o == null)\r
return -1;\r
defines [def] = true;\r
}\r
\r
- public Tokenizer (StreamReader input, SourceFile file, ArrayList defs)\r
+ public Tokenizer (SeekableStreamReader input, SourceFile file, ArrayList defs)\r
{\r
this.ref_name = file;\r
this.file_name = file;\r
Mono.CSharp.Location.Push (file);\r
}\r
\r
- bool is_identifier_start_character (char c)\r
+ public static void Cleanup () {\r
+ identifiers = null;\r
+ }\r
+\r
+ static bool is_identifier_start_character (char c)\r
{\r
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_' || Char.IsLetter (c);\r
}\r
\r
- bool is_identifier_part_character (char c)\r
+ static bool is_identifier_part_character (char c)\r
{\r
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= '0' && c <= '9') || Char.IsLetter (c);\r
}\r
+ \r
+ public static bool IsValidIdentifier (string s)\r
+ {\r
+ if (s == null || s.Length == 0)\r
+ return false;\r
+ \r
+ if (!is_identifier_start_character (s [0]))\r
+ return false;\r
+ \r
+ for (int i = 1; i < s.Length; i ++)\r
+ if (! is_identifier_part_character (s [i]))\r
+ return false;\r
+ \r
+ return true;\r
+ }\r
\r
int is_punct (char c, ref bool doread)\r
{\r
--deambiguate_close_parens;\r
\r
// Save current position and parse next token.\r
- reader.DiscardBufferedData ();\r
- long old = reader.BaseStream.Position;\r
+ int old = reader.Position;\r
int new_token = token ();\r
- reader.DiscardBufferedData ();\r
- reader.BaseStream.Position = old;\r
+ reader.Position = old;\r
putback_char = -1;\r
\r
if (new_token == Token.OPEN_PARENS)\r
getChar ();\r
while ((d = peekChar ()) != -1){\r
if (is_hex (d)){\r
- if (number_pos == 16){\r
- Report.Error (1021, Location, "Integral constant too large");\r
- return Token.ERROR;\r
- }\r
number_builder [number_pos++] = (char) d;\r
getChar ();\r
} else\r
}\r
\r
string s = new String (number_builder, 0, number_pos);\r
- if (number_pos <= 8)\r
- ul = System.UInt32.Parse (s, NumberStyles.HexNumber);\r
- else\r
- ul = System.UInt64.Parse (s, NumberStyles.HexNumber);\r
+ try {\r
+ if (number_pos <= 8)\r
+ ul = System.UInt32.Parse (s, NumberStyles.HexNumber);\r
+ else\r
+ ul = System.UInt64.Parse (s, NumberStyles.HexNumber);\r
+ } catch (OverflowException){\r
+ error_details = "Integral constant is too large";\r
+ Report.Error (1021, Location, error_details);\r
+ val = 0ul;\r
+ return Token.LITERAL_INTEGER;\r
+ }\r
+ \r
return integer_type_suffix (ul, peekChar ());\r
}\r
\r
//\r
int getHex (int count, out bool error)\r
{\r
- int [] buffer = new int [8];\r
int i;\r
int total = 0;\r
int c;\r
case Token.TYPEOF:\r
case Token.UNCHECKED:\r
case Token.UNSAFE:\r
+\r
+ //\r
+ // These can be part of a member access\r
+ //\r
+ case Token.INT:\r
+ case Token.UINT:\r
+ case Token.SHORT:\r
+ case Token.USHORT:\r
+ case Token.LONG:\r
+ case Token.ULONG:\r
+ case Token.DOUBLE:\r
+ case Token.FLOAT:\r
+ case Token.CHAR:\r
return true;\r
\r
default:\r
tokens_seen = false;\r
arg = "";\r
static_cmd_arg.Length = 0;\r
- \r
- while ((c = getChar ()) != -1 && (c != '\n') && (c != ' ') && (c != '\t') && (c != '\r')){\r
- static_cmd_arg.Append ((char) c);\r
+\r
+ // skip over white space\r
+ while ((c = getChar ()) != -1 && (c != '\n') && ((c == '\r') || (c == ' ') || (c == '\t')))\r
+ ;\r
+\r
+\r
+ while ((c != -1) && (c != '\n') && (c != ' ') && (c != '\t') && (c != '\r')){\r
+ if (is_identifier_part_character ((char) c)){\r
+ static_cmd_arg.Append ((char) c);\r
+ c = getChar ();\r
+ } else {\r
+ putback (c);\r
+ break;\r
+ }\r
}\r
\r
cmd = static_cmd_arg.ToString ();\r
ref_name = file_name;\r
Location.Push (ref_name);\r
return true;\r
+ } else if (arg == "hidden"){\r
+ //\r
+ // We ignore #line hidden\r
+ //\r
+ return true;\r
}\r
- \r
+\r
try {\r
int pos;\r
\r
\r
string name = arg.Substring (pos). Trim (quotes);\r
ref_name = Location.LookupFile (name);\r
+ file_name.HasLineDirective = true;\r
+ ref_name.HasLineDirective = true;\r
Location.Push (ref_name);\r
} else {\r
ref_line = System.Int32.Parse (arg);\r
return;\r
}\r
\r
- foreach (char c in arg){\r
- if (!Char.IsLetter (c) && (c != '_')){\r
- Report.Error (1001, Location, "Identifier expected");\r
+ if (!is_identifier_start_character (arg [0]))\r
+ Report.Error (1001, Location, "Identifier expected: " + arg);\r
+ \r
+ foreach (char c in arg.Substring (1)){\r
+ if (!is_identifier_part_character (c)){\r
+ Report.Error (1001, Location, "Identifier expected: " + arg);\r
return;\r
}\r
}\r
//\r
bool handle_preprocessing_directive (bool caller_is_taking)\r
{\r
- char [] blank = { ' ', '\t' };\r
string cmd, arg;\r
bool region_directive = false;\r
\r
// The first group of pre-processing instructions is always processed\r
//\r
switch (cmd){\r
+ case "pragma":\r
+ if (RootContext.V2)\r
+ return true;\r
+ break;\r
+ \r
case "line":\r
if (!PreProcessLine (arg))\r
Report.Error (\r
}\r
}\r
\r
- string ids = new String (id_builder, 0, pos);\r
-\r
//\r
// Optimization: avoids doing the keyword lookup\r
// on uppercase letters and _\r
//\r
- if (s >= 'a'){\r
- int keyword = GetKeyword (ids);\r
- if (keyword == -1 || quoted){\r
- val = ids;\r
+ if (s >= 'a' || s == '_'){\r
+ int keyword = GetKeyword (id_builder, pos);\r
+ if (keyword != -1 && !quoted)\r
+ return keyword;\r
+ }\r
+\r
+ //\r
+ // Keep identifiers in an array of hashtables to avoid needless\r
+ // allocations\r
+ //\r
+\r
+ if (identifiers [pos] != null) {\r
+ val = identifiers [pos][id_builder];\r
+ if (val != null) {\r
return Token.IDENTIFIER;\r
}\r
- return keyword;\r
}\r
- val = ids;\r
+ else\r
+ identifiers [pos] = new CharArrayHashtable (pos);\r
+\r
+ val = new String (id_builder, 0, pos);\r
+\r
+ char [] chars = new char [pos];\r
+ Array.Copy (id_builder, chars, pos);\r
+\r
+ identifiers [pos] [chars] = val;\r
+\r
return Token.IDENTIFIER;\r
}\r
\r
val = null;\r
// optimization: eliminate col and implement #directive semantic correctly.\r
for (;(c = getChar ()) != -1; col++) {\r
- if (c == ' ' || c == '\t' || c == '\f' || c == '\v' || c == '\r' || c == 0xa0){\r
- \r
- if (c == '\t')\r
- col = (((col + 8) / 8) * 8) - 1;\r
+ if (c == ' ')\r
+ continue;\r
+ \r
+ if (c == '\t') {\r
+ col = (((col + 8) / 8) * 8) - 1;\r
+ continue;\r
+ }\r
+ \r
+ if (c == ' ' || c == '\f' || c == '\v' || c == 0xa0)\r
+ continue;\r
+\r
+ if (c == '\r') {\r
+ if (peekChar () == '\n')\r
+ getChar ();\r
+\r
+ line++;\r
+ ref_line++;\r
+ col = 0;\r
+ any_token_seen |= tokens_seen;\r
+ tokens_seen = false;\r
continue;\r
}\r
\r
}\r
\r
}\r
-\r
}\r
}\r
\r