2002-11-30 Miguel de Icaza <miguel@ximian.com>
- * cs-tokenizer.cs: Handle 0xa0 as whitespace (#34752)
+ * cs-tokenizer.cs (consume_identifier): use read directly, instead
+ of calling getChar/putback, uses internal knowledge of it.
+ (xtoken): Reorder tokenizer so most common patterns are checked
+ first. This reduces the compilation time in another 5% (from 8.11s
+ average to 7.73s for bootstrapping mcs on my Mobile p4/1.8ghz).
+
+ The parsing time is 22% of the compilation in mcs, and from that
+ 64% is spent on the tokenization process.
+
+ I tried using a binary search for keywords, but this is slower
+ than the hashtable. Another option would be to do a couple of
+ things:
+
+ * Not use a StringBuilder, instead use an array of chars,
+ with a set value. Notice that this way we could catch
+ the 645 error without having to do it *afterwards*.
+
+ * We could write a hand-parser to avoid the hashtable
+ compares altogether.
+
+ The identifier consumption process takes 37% of the tokenization
+ time. Another 15% is spent on is_number. 56% of the time spent
+ on is_number is spent on Int64.Parse:
+
+ * We could probably choose based on the string length to
+ use Int32.Parse or Int64.Parse and avoid all the 64-bit
+ computations.
+
+ Another 3% is spend on wrapping `xtoken' in the `token' function.
+
+ Handle 0xa0 as whitespace (#34752)
+
2002-11-26 Miguel de Icaza <miguel@ximian.com>
* typemanager.cs (IsCLRType): New routine to tell whether a type
//\r
// Licensed under the terms of the GNU GPL\r
//\r
-// (C) 2001 Ximian, Inc (http://www.ximian.com)\r
+// (C) 2001, 2002 Ximian, Inc (http://www.ximian.com)\r
//\r
\r
/*\r
return col;\r
}\r
}\r
- \r
+\r
static void InitTokens ()\r
{\r
keywords = new Hashtable ();\r
return -1;\r
\r
int res = (int) o;\r
- \r
+\r
if (handle_get_set == false && (res == Token.GET || res == Token.SET))\r
return -1;\r
if (handle_remove_add == false && (res == Token.REMOVE || res == Token.ADD))\r
{\r
if (putback_char != -1)\r
return putback_char;\r
- return reader.Peek ();\r
+ putback_char = reader.Read ();\r
+ return putback_char;\r
}\r
\r
void putback (int c)\r
{\r
- if (putback_char != -1)\r
+ if (putback_char != -1){\r
+ Console.WriteLine ("Col: " + col);\r
+ Console.WriteLine ("Row: " + line);\r
+ Console.WriteLine ("Name: " + ref_name);\r
+ Console.WriteLine ("Current [{0}] putting back [{1}] ", putback_char, c);\r
throw new Exception ("This should not happen putback on putback");\r
+ }\r
putback_char = c;\r
}\r
\r
\r
id_builder.Append ((char) c);\r
\r
- while ((c = peekChar ()) != -1) {\r
+ while ((c = reader.Read ()) != -1) {\r
if (is_identifier_part_character ((char) c)){\r
- id_builder.Append ((char)getChar ());\r
+ id_builder.Append ((char)c);\r
+ putback_char = -1;\r
col++;\r
- } else \r
+ } else {\r
+ putback_char = c;\r
break;\r
+ }\r
}\r
\r
string ids = id_builder.ToString ();\r
val = null;\r
// optimization: eliminate col and implement #directive semantic correctly.\r
for (;(c = getChar ()) != -1; col++) {\r
- if (is_identifier_start_character ((char)c)){\r
- tokens_seen = true;\r
- return consume_identifier (c, false);\r
- }\r
-\r
- if (c == '.'){\r
- tokens_seen = true;\r
- int peek = peekChar ();\r
- if (peek >= '0' && peek <= '9')\r
- return is_number (c);\r
- return Token.DOT;\r
- }\r
- \r
- if (c >= '0' && c <= '9'){\r
- tokens_seen = true;\r
- return is_number (c);\r
+ if (c == ' ' || c == '\t' || c == '\f' || c == '\v' || c == '\r' || c == 0xa0){\r
+ \r
+ if (c == '\t')\r
+ col = (((col + 8) / 8) * 8) - 1;\r
+ continue;\r
}\r
\r
// Handle double-slash comments.\r
}\r
continue;\r
}\r
+ goto is_punct_label;\r
+ }\r
+\r
+ \r
+ if (is_identifier_start_character ((char)c)){\r
+ tokens_seen = true;\r
+ return consume_identifier (c, false);\r
+ }\r
+\r
+ is_punct_label:\r
+ if ((t = is_punct ((char)c, ref doread)) != Token.ERROR){\r
+ tokens_seen = true;\r
+ if (doread){\r
+ getChar ();\r
+ col++;\r
+ }\r
+ return t;\r
}\r
\r
+ // white space\r
+ if (c == '\n'){\r
+ line++;\r
+ ref_line++;\r
+ col = 0;\r
+ any_token_seen |= tokens_seen;\r
+ tokens_seen = false;\r
+ continue;\r
+ }\r
+\r
+ if (c >= '0' && c <= '9'){\r
+ tokens_seen = true;\r
+ return is_number (c);\r
+ }\r
+\r
+ if (c == '.'){\r
+ tokens_seen = true;\r
+ int peek = peekChar ();\r
+ if (peek >= '0' && peek <= '9')\r
+ return is_number (c);\r
+ return Token.DOT;\r
+ }\r
+ \r
/* For now, ignore pre-processor commands */\r
// FIXME: In C# the '#' is not limited to appear\r
// on the first column.\r
continue;\r
}\r
\r
- if ((t = is_punct ((char)c, ref doread)) != Token.ERROR){\r
- tokens_seen = true;\r
- if (doread){\r
- getChar ();\r
- col++;\r
- }\r
- return t;\r
- }\r
- \r
- if (c == '"') {\r
+ if (c == '"') \r
return consume_string (false);\r
- }\r
\r
if (c == '\''){\r
c = getChar ();\r
return Token.LITERAL_CHARACTER;\r
}\r
\r
- // white space\r
- if (c == '\n'){\r
- line++;\r
- ref_line++;\r
- col = 0;\r
- any_token_seen |= tokens_seen;\r
- tokens_seen = false;\r
- continue;\r
- }\r
-\r
- if (c == ' ' || c == '\t' || c == '\f' || c == '\v' || c == '\r' || c == 0xa0){\r
- if (c == '\t')\r
- col = (((col + 8) / 8) * 8) - 1;\r
- continue;\r
- }\r
-\r
if (c == '@') {\r
c = getChar ();\r
if (c == '"') {\r