More low hanging fruit.
authorMiguel de Icaza <miguel@gnome.org>
Sat, 30 Nov 2002 22:34:28 +0000 (22:34 -0000)
committerMiguel de Icaza <miguel@gnome.org>
Sat, 30 Nov 2002 22:34:28 +0000 (22:34 -0000)
2002-11-30  Miguel de Icaza  <miguel@ximian.com>

* cs-tokenizer.cs (consume_identifier): use read directly, instead
of calling getChar/putback, uses internal knowledge of it.

(xtoken): Reorder tokenizer so most common patterns are checked
first.  This reduces the compilation time in another 5% (from 8.11s
average to 7.73s for bootstrapping mcs on my Mobile p4/1.8ghz).

The parsing time is 22% of the compilation in mcs, and from that
64% is spent on the tokenization process.

I tried using a binary search for keywords, but this is slower
than the hashtable.  Another option would be to do a couple of
things:

* Not use a StringBuilder, instead use an array of chars,
  with a set value.  Notice that this way we could catch
  the 645 error without having to do it *afterwards*.

* We could write a hand-parser to avoid the hashtable
  compares altogether.

The identifier consumption process takes 37% of the tokenization
time.  Another 15% is spent on is_number.  56% of the time spent
on is_number is spent on Int64.Parse:

* We could probably choose based on the string length to
  use Int32.Parse or Int64.Parse and avoid all the 64-bit
  computations.

Another 3% is spend on wrapping `xtoken' in the `token' function.

svn path=/trunk/mcs/; revision=9295

mcs/mcs/ChangeLog
mcs/mcs/cs-tokenizer.cs

index 85b98a6bc57a164860cd2d89cd95e2aec14392cf..86bcf66a0f9e480f5307343796f13ab3667ae36b 100755 (executable)
@@ -1,7 +1,38 @@
 2002-11-30  Miguel de Icaza  <miguel@ximian.com>
 
-       * cs-tokenizer.cs: Handle 0xa0 as whitespace (#34752)
+       * cs-tokenizer.cs (consume_identifier): use read directly, instead
+       of calling getChar/putback, uses internal knowledge of it.    
 
+       (xtoken): Reorder tokenizer so most common patterns are checked
+       first.  This reduces the compilation time in another 5% (from 8.11s
+       average to 7.73s for bootstrapping mcs on my Mobile p4/1.8ghz).
+
+       The parsing time is 22% of the compilation in mcs, and from that
+       64% is spent on the tokenization process.  
+
+       I tried using a binary search for keywords, but this is slower
+       than the hashtable.  Another option would be to do a couple of
+       things:
+
+               * Not use a StringBuilder, instead use an array of chars,
+                 with a set value.  Notice that this way we could catch
+                 the 645 error without having to do it *afterwards*.
+
+               * We could write a hand-parser to avoid the hashtable
+                 compares altogether.
+
+       The identifier consumption process takes 37% of the tokenization
+       time.  Another 15% is spent on is_number.  56% of the time spent
+       on is_number is spent on Int64.Parse:
+
+               * We could probably choose based on the string length to
+                 use Int32.Parse or Int64.Parse and avoid all the 64-bit
+                 computations. 
+
+       Another 3% is spend on wrapping `xtoken' in the `token' function.
+
+       Handle 0xa0 as whitespace (#34752)
+       
 2002-11-26  Miguel de Icaza  <miguel@ximian.com>
 
        * typemanager.cs (IsCLRType): New routine to tell whether a type
index 1d37aaa9e74d4bb311807f39bfccf6a0b088842f..af5b6a48fe6fecbda8dc727ebddcf7852a55ab09 100755 (executable)
@@ -6,7 +6,7 @@
 //\r
 // Licensed under the terms of the GNU GPL\r
 //\r
-// (C) 2001 Ximian, Inc (http://www.ximian.com)\r
+// (C) 2001, 2002 Ximian, Inc (http://www.ximian.com)\r
 //\r
 \r
 /*\r
@@ -186,7 +186,7 @@ namespace Mono.CSharp
                                return col;\r
                        }\r
                }\r
-               \r
+\r
                static void InitTokens ()\r
                {\r
                        keywords = new Hashtable ();\r
@@ -297,7 +297,7 @@ namespace Mono.CSharp
                                return -1;\r
                        \r
                        int res = (int) o;\r
-                       \r
+\r
                        if (handle_get_set == false && (res == Token.GET || res == Token.SET))\r
                                return -1;\r
                        if (handle_remove_add == false && (res == Token.REMOVE || res == Token.ADD))\r
@@ -899,13 +899,19 @@ namespace Mono.CSharp
                {\r
                        if (putback_char != -1)\r
                                return putback_char;\r
-                       return reader.Peek ();\r
+                       putback_char = reader.Read ();\r
+                       return putback_char;\r
                }\r
 \r
                void putback (int c)\r
                {\r
-                       if (putback_char != -1)\r
+                       if (putback_char != -1){\r
+                               Console.WriteLine ("Col: " + col);\r
+                               Console.WriteLine ("Row: " + line);\r
+                               Console.WriteLine ("Name: " + ref_name);\r
+                               Console.WriteLine ("Current [{0}] putting back [{1}]  ", putback_char, c);\r
                                throw new Exception ("This should not happen putback on putback");\r
+                       }\r
                        putback_char = c;\r
                }\r
 \r
@@ -1440,12 +1446,15 @@ namespace Mono.CSharp
 \r
                        id_builder.Append ((char) c);\r
                                        \r
-                       while ((c = peekChar ()) != -1) {\r
+                       while ((c = reader.Read ()) != -1) {\r
                                if (is_identifier_part_character ((char) c)){\r
-                                       id_builder.Append ((char)getChar ());\r
+                                       id_builder.Append ((char)c);\r
+                                       putback_char = -1;\r
                                        col++;\r
-                               } else \r
+                               } else {\r
+                                       putback_char = c;\r
                                        break;\r
+                               }\r
                        }\r
                                        \r
                        string ids = id_builder.ToString ();\r
@@ -1473,22 +1482,11 @@ namespace Mono.CSharp
                        val = null;\r
                        // optimization: eliminate col and implement #directive semantic correctly.\r
                        for (;(c = getChar ()) != -1; col++) {\r
-                               if (is_identifier_start_character ((char)c)){\r
-                                       tokens_seen = true;\r
-                                       return consume_identifier (c, false);\r
-                               }\r
-\r
-                               if (c == '.'){\r
-                                       tokens_seen = true;\r
-                                       int peek = peekChar ();\r
-                                       if (peek >= '0' && peek <= '9')\r
-                                               return is_number (c);\r
-                                       return Token.DOT;\r
-                               }\r
-                               \r
-                               if (c >= '0' && c <= '9'){\r
-                                       tokens_seen = true;\r
-                                       return is_number (c);\r
+                               if (c == ' ' || c == '\t' || c == '\f' || c == '\v' || c == '\r' || c == 0xa0){\r
+                                       \r
+                                       if (c == '\t')\r
+                                               col = (((col + 8) / 8) * 8) - 1;\r
+                                       continue;\r
                                }\r
 \r
                                // Handle double-slash comments.\r
@@ -1526,8 +1524,48 @@ namespace Mono.CSharp
                                                }\r
                                                continue;\r
                                        }\r
+                                       goto is_punct_label;\r
+                               }\r
+\r
+                               \r
+                               if (is_identifier_start_character ((char)c)){\r
+                                       tokens_seen = true;\r
+                                       return consume_identifier (c, false);\r
+                               }\r
+\r
+                       is_punct_label:\r
+                               if ((t = is_punct ((char)c, ref doread)) != Token.ERROR){\r
+                                       tokens_seen = true;\r
+                                       if (doread){\r
+                                               getChar ();\r
+                                               col++;\r
+                                       }\r
+                                       return t;\r
                                }\r
 \r
+                               // white space\r
+                               if (c == '\n'){\r
+                                       line++;\r
+                                       ref_line++;\r
+                                       col = 0;\r
+                                       any_token_seen |= tokens_seen;\r
+                                       tokens_seen = false;\r
+                                       continue;\r
+                               }\r
+\r
+                               if (c >= '0' && c <= '9'){\r
+                                       tokens_seen = true;\r
+                                       return is_number (c);\r
+                               }\r
+\r
+                               if (c == '.'){\r
+                                       tokens_seen = true;\r
+                                       int peek = peekChar ();\r
+                                       if (peek >= '0' && peek <= '9')\r
+                                               return is_number (c);\r
+                                       return Token.DOT;\r
+                               }\r
+                               \r
                                /* For now, ignore pre-processor commands */\r
                                // FIXME: In C# the '#' is not limited to appear\r
                                // on the first column.\r
@@ -1565,18 +1603,8 @@ namespace Mono.CSharp
                                        continue;\r
                                }\r
                                \r
-                               if ((t = is_punct ((char)c, ref doread)) != Token.ERROR){\r
-                                       tokens_seen = true;\r
-                                       if (doread){\r
-                                               getChar ();\r
-                                               col++;\r
-                                       }\r
-                                       return t;\r
-                               }\r
-                               \r
-                               if (c == '"') {\r
+                               if (c == '"') \r
                                        return consume_string (false);\r
-                               }\r
 \r
                                if (c == '\''){\r
                                        c = getChar ();\r
@@ -1613,22 +1641,6 @@ namespace Mono.CSharp
                                        return Token.LITERAL_CHARACTER;\r
                                }\r
                                \r
-                               // white space\r
-                               if (c == '\n'){\r
-                                       line++;\r
-                                       ref_line++;\r
-                                       col = 0;\r
-                                       any_token_seen |= tokens_seen;\r
-                                       tokens_seen = false;\r
-                                       continue;\r
-                               }\r
-\r
-                               if (c == ' ' || c == '\t' || c == '\f' || c == '\v' || c == '\r' || c == 0xa0){\r
-                                       if (c == '\t')\r
-                                               col = (((col + 8) / 8) * 8) - 1;\r
-                                       continue;\r
-                               }\r
-\r
                                if (c == '@') {\r
                                        c = getChar ();\r
                                        if (c == '"') {\r