More low hanging fruit.

author Miguel de Icaza <miguel@gnome.org>

Sat, 30 Nov 2002 22:34:28 +0000 (22:34 -0000)

committer Miguel de Icaza <miguel@gnome.org>

Sat, 30 Nov 2002 22:34:28 +0000 (22:34 -0000)
author Miguel de Icaza <miguel@gnome.org>
Sat, 30 Nov 2002 22:34:28 +0000 (22:34 -0000)
committer Miguel de Icaza <miguel@gnome.org>
Sat, 30 Nov 2002 22:34:28 +0000 (22:34 -0000)
diff --git a/mcs/mcs/ChangeLog b/mcs/mcs/ChangeLog

index 85b98a6bc57a164860cd2d89cd95e2aec14392cf..86bcf66a0f9e480f5307343796f13ab3667ae36b 100755 (executable)
--- a/mcs/mcs/ChangeLog
+++ b/mcs/mcs/ChangeLog
@@ -1,7 +1,38 @@
  2002-11-30  Miguel de Icaza  <miguel@ximian.com>
  
-       * cs-tokenizer.cs: Handle 0xa0 as whitespace (#34752)
+       * cs-tokenizer.cs (consume_identifier): use read directly, instead
+       of calling getChar/putback, uses internal knowledge of it.    
  
+       (xtoken): Reorder tokenizer so most common patterns are checked
+       first.  This reduces the compilation time in another 5% (from 8.11s
+       average to 7.73s for bootstrapping mcs on my Mobile p4/1.8ghz).
+
+       The parsing time is 22% of the compilation in mcs, and from that
+       64% is spent on the tokenization process.  
+
+       I tried using a binary search for keywords, but this is slower
+       than the hashtable.  Another option would be to do a couple of
+       things:
+
+               * Not use a StringBuilder, instead use an array of chars,
+                 with a set value.  Notice that this way we could catch
+                 the 645 error without having to do it *afterwards*.
+
+               * We could write a hand-parser to avoid the hashtable
+                 compares altogether.
+
+       The identifier consumption process takes 37% of the tokenization
+       time.  Another 15% is spent on is_number.  56% of the time spent
+       on is_number is spent on Int64.Parse:
+
+               * We could probably choose based on the string length to
+                 use Int32.Parse or Int64.Parse and avoid all the 64-bit
+                 computations. 
+
+       Another 3% is spend on wrapping `xtoken' in the `token' function.
+
+       Handle 0xa0 as whitespace (#34752)
+       
  2002-11-26  Miguel de Icaza  <miguel@ximian.com>
  
         * typemanager.cs (IsCLRType): New routine to tell whether a type
diff --git a/mcs/mcs/cs-tokenizer.cs b/mcs/mcs/cs-tokenizer.cs

index 1d37aaa9e74d4bb311807f39bfccf6a0b088842f..af5b6a48fe6fecbda8dc727ebddcf7852a55ab09 100755 (executable)
--- a/mcs/mcs/cs-tokenizer.cs
+++ b/mcs/mcs/cs-tokenizer.cs
@@ -6,7 +6,7 @@
  //\r
  // Licensed under the terms of the GNU GPL\r
  //\r
-// (C) 2001 Ximian, Inc (http://www.ximian.com)\r
+// (C) 2001, 2002 Ximian, Inc (http://www.ximian.com)\r
  //\r
  \r
  /*\r
@@ -186,7 +186,7 @@ namespace Mono.CSharp
                                 return col;\r
                         }\r
                 }\r
-               \r
+\r
                 static void InitTokens ()\r
                 {\r
                         keywords = new Hashtable ();\r
@@ -297,7 +297,7 @@ namespace Mono.CSharp
                                 return -1;\r
                         \r
                         int res = (int) o;\r
-                       \r
+\r
                         if (handle_get_set == false && (res == Token.GET || res == Token.SET))\r
                                 return -1;\r
                         if (handle_remove_add == false && (res == Token.REMOVE || res == Token.ADD))\r
@@ -899,13 +899,19 @@ namespace Mono.CSharp
                 {\r
                         if (putback_char != -1)\r
                                 return putback_char;\r
-                       return reader.Peek ();\r
+                       putback_char = reader.Read ();\r
+                       return putback_char;\r
                 }\r
  \r
                 void putback (int c)\r
                 {\r
-                       if (putback_char != -1)\r
+                       if (putback_char != -1){\r
+                               Console.WriteLine ("Col: " + col);\r
+                               Console.WriteLine ("Row: " + line);\r
+                               Console.WriteLine ("Name: " + ref_name);\r
+                               Console.WriteLine ("Current [{0}] putting back [{1}]  ", putback_char, c);\r
                                 throw new Exception ("This should not happen putback on putback");\r
+                       }\r
                         putback_char = c;\r
                 }\r
  \r
@@ -1440,12 +1446,15 @@ namespace Mono.CSharp
  \r
                         id_builder.Append ((char) c);\r
                                         \r
-                       while ((c = peekChar ()) != -1) {\r
+                       while ((c = reader.Read ()) != -1) {\r
                                 if (is_identifier_part_character ((char) c)){\r
-                                       id_builder.Append ((char)getChar ());\r
+                                       id_builder.Append ((char)c);\r
+                                       putback_char = -1;\r
                                         col++;\r
-                               } else \r
+                               } else {\r
+                                       putback_char = c;\r
                                         break;\r
+                               }\r
                         }\r
                                         \r
                         string ids = id_builder.ToString ();\r
@@ -1473,22 +1482,11 @@ namespace Mono.CSharp
                         val = null;\r
                         // optimization: eliminate col and implement #directive semantic correctly.\r
                         for (;(c = getChar ()) != -1; col++) {\r
-                               if (is_identifier_start_character ((char)c)){\r
-                                       tokens_seen = true;\r
-                                       return consume_identifier (c, false);\r
-                               }\r
-\r
-                               if (c == '.'){\r
-                                       tokens_seen = true;\r
-                                       int peek = peekChar ();\r
-                                       if (peek >= '0' && peek <= '9')\r
-                                               return is_number (c);\r
-                                       return Token.DOT;\r
-                               }\r
-                               \r
-                               if (c >= '0' && c <= '9'){\r
-                                       tokens_seen = true;\r
-                                       return is_number (c);\r
+                               if (c == ' ' || c == '\t' || c == '\f' || c == '\v' || c == '\r' || c == 0xa0){\r
+                                       \r
+                                       if (c == '\t')\r
+                                               col = (((col + 8) / 8) * 8) - 1;\r
+                                       continue;\r
                                 }\r
  \r
                                 // Handle double-slash comments.\r
@@ -1526,8 +1524,48 @@ namespace Mono.CSharp
                                                 }\r
                                                 continue;\r
                                         }\r
+                                       goto is_punct_label;\r
+                               }\r
+\r
+                               \r
+                               if (is_identifier_start_character ((char)c)){\r
+                                       tokens_seen = true;\r
+                                       return consume_identifier (c, false);\r
+                               }\r
+\r
+                       is_punct_label:\r
+                               if ((t = is_punct ((char)c, ref doread)) != Token.ERROR){\r
+                                       tokens_seen = true;\r
+                                       if (doread){\r
+                                               getChar ();\r
+                                               col++;\r
+                                       }\r
+                                       return t;\r
                                 }\r
  \r
+                               // white space\r
+                               if (c == '\n'){\r
+                                       line++;\r
+                                       ref_line++;\r
+                                       col = 0;\r
+                                       any_token_seen |= tokens_seen;\r
+                                       tokens_seen = false;\r
+                                       continue;\r
+                               }\r
+\r
+                               if (c >= '0' && c <= '9'){\r
+                                       tokens_seen = true;\r
+                                       return is_number (c);\r
+                               }\r
+\r
+                               if (c == '.'){\r
+                                       tokens_seen = true;\r
+                                       int peek = peekChar ();\r
+                                       if (peek >= '0' && peek <= '9')\r
+                                               return is_number (c);\r
+                                       return Token.DOT;\r
+                               }\r
+                               \r
                                 /* For now, ignore pre-processor commands */\r
                                 // FIXME: In C# the '#' is not limited to appear\r
                                 // on the first column.\r
@@ -1565,18 +1603,8 @@ namespace Mono.CSharp
                                         continue;\r
                                 }\r
                                 \r
-                               if ((t = is_punct ((char)c, ref doread)) != Token.ERROR){\r
-                                       tokens_seen = true;\r
-                                       if (doread){\r
-                                               getChar ();\r
-                                               col++;\r
-                                       }\r
-                                       return t;\r
-                               }\r
-                               \r
-                               if (c == '"') {\r
+                               if (c == '"') \r
                                         return consume_string (false);\r
-                               }\r
  \r
                                 if (c == '\''){\r
                                         c = getChar ();\r
@@ -1613,22 +1641,6 @@ namespace Mono.CSharp
                                         return Token.LITERAL_CHARACTER;\r
                                 }\r
                                 \r
-                               // white space\r
-                               if (c == '\n'){\r
-                                       line++;\r
-                                       ref_line++;\r
-                                       col = 0;\r
-                                       any_token_seen |= tokens_seen;\r
-                                       tokens_seen = false;\r
-                                       continue;\r
-                               }\r
-\r
-                               if (c == ' ' || c == '\t' || c == '\f' || c == '\v' || c == '\r' || c == 0xa0){\r
-                                       if (c == '\t')\r
-                                               col = (((col + 8) / 8) * 8) - 1;\r
-                                       continue;\r
-                               }\r
-\r
                                 if (c == '@') {\r
                                         c = getChar ();\r
                                         if (c == '"') {\r
author	Miguel de Icaza <miguel@gnome.org>
	Sat, 30 Nov 2002 22:34:28 +0000 (22:34 -0000)
committer	Miguel de Icaza <miguel@gnome.org>
	Sat, 30 Nov 2002 22:34:28 +0000 (22:34 -0000)
mcs/mcs/ChangeLog		patch \| blob \| history
mcs/mcs/cs-tokenizer.cs		patch \| blob \| history