2009-01-07 Zoltan Varga <vargaz@gmail.com>
authorZoltan Varga <vargaz@gmail.com>
Wed, 7 Jan 2009 19:51:33 +0000 (19:51 -0000)
committerZoltan Varga <vargaz@gmail.com>
Wed, 7 Jan 2009 19:51:33 +0000 (19:51 -0000)
Backport of r122700.

* RxInterpreter.cs: Add support for constant strings in anchors. Fixes
#464135.

svn path=/branches/mono-2-2/mcs/; revision=122701

mcs/class/System/System.Text.RegularExpressions/ChangeLog
mcs/class/System/System.Text.RegularExpressions/RxInterpreter.cs

index 9bd2ddf7fb1f68728c02f72d18d1c3985f4ff7d9..9cd59e0d164ab7b2c02f48e7c9a34309a8cf5663 100644 (file)
@@ -1,3 +1,10 @@
+2009-01-07  Zoltan Varga  <vargaz@gmail.com>
+
+       Backport of r122700.
+       
+       * RxInterpreter.cs: Add support for constant strings in anchors. Fixes
+       #464135.
+
 2008-11-13  Zoltan Varga  <vargaz@gmail.com>
 
        Backport r118729 from HEAD.
index 77a42c526b8076c8858b30fe35c6b675187409f8..980e17ff34c52d9da0be5df0c1f8d749bf201f20 100644 (file)
@@ -414,14 +414,34 @@ namespace System.Text.RegularExpressions {
                                        }
                                        return false;
                                case RxOp.Anchor:
-                                       length = program [pc + 3] | ((int)program [pc + 4] << 8);
-                                       pc += program [pc + 1] | ((int)program [pc + 2] << 8);
+                                       int skip = program [pc + 1] | ((int)program [pc + 2] << 8);
+                                       int anch_offset = program [pc + 3] | ((int)program [pc + 4] << 8);
+
+                                       /*
+                                        * In the general case, we have to evaluate the bytecode
+                                        * starting at pc + skip, however the optimizer emits some
+                                        * special cases, whose bytecode begins at pc + 5.
+                                        */
+                                       int anch_pc = pc + 5;
+                                       RxOp anch_op = (RxOp)(program[anch_pc] & 0x00ff);
+
+                                       bool spec_anch = false;
 
-                                       RxOp anch_op = (RxOp)(program[pc] & 0x00ff);
+                                       // FIXME: Add more special cases from interpreter.cs
+                                       if (anch_op == RxOp.String || anch_op == RxOp.StringIgnoreCase) {
+                                               if (pc + skip == anch_pc + 2 + program [anch_pc + 1] + 1) {
+                                                       // Anchor
+                                                       //      String
+                                                       //      True
+                                                       spec_anch = true;
+                                                       if (trace_rx)
+                                                               Console.WriteLine ("  string anchor at {0}, offset {1}", anch_pc, anch_offset);
+                                               }
+                                       }
 
-                                       // Optimize some common cases
+                                       pc += skip;
 
-                                       if (anch_op == RxOp.StartOfString) {
+                                       if ((RxOp)program [pc] == RxOp.StartOfString) {
                                                if (strpos == 0) {
                                                        int res = strpos;
                                                        if (groups.Length > 1) {
@@ -439,12 +459,26 @@ namespace System.Text.RegularExpressions {
                                                return false;
                                        }
 
-                                       // FIXME: Add more special cases from interpreter.cs
-
                                        // it's important to test also the end of the string
                                        // position for things like: "" =~ /$/
                                        end = string_end + 1;
                                        while (strpos < end) {
+                                               if (spec_anch) {
+                                                       if (anch_op == RxOp.String || anch_op == RxOp.StringIgnoreCase) {
+                                                               /* 
+                                                                * This means the match must contain a given
+                                                                * string at a constant position, so we can skip 
+                                                                * forward until the string matches. This is a win if
+                                                                * the rest of the regex 
+                                                                * has a complex positive lookbehind for example.
+                                                                */
+                                                               int tmp_res = strpos;
+                                                               if (!EvalByteCode (anch_pc, strpos + anch_offset, ref tmp_res)) {
+                                                                       strpos ++;
+                                                                       continue;
+                                                               }
+                                                       }
+                                               }
                                                int res = strpos;
                                                if (groups.Length > 1) {
                                                        ResetGroups ();