2006-09-11 Gonzalo Paniagua Javier <gonzalo@ximian.com>
[mono.git] / mcs / class / System / System.Text.RegularExpressions / interpreter.cs
index fb0656ef67cdda0b701355284aee153c50a0d66f..cc0de2c747dc4fc7f5ed7ab8ff85286b0cab0eb3 100644 (file)
@@ -35,6 +35,14 @@ using System.Globalization;
 namespace System.Text.RegularExpressions {
 
        class Interpreter : IMachine {
+               private int ReadProgramCount (int ptr)
+               {
+                       int ret = program [ptr + 1];
+                       ret <<= 16;
+                       ret += program [ptr];
+                       return ret;
+               }
+
                public Interpreter (ushort[] program) {
                        this.program = program;
                        this.qs = null;
@@ -43,13 +51,13 @@ namespace System.Text.RegularExpressions {
 
                        Debug.Assert ((OpCode)program[0] == OpCode.Info, "Regex", "Cant' find info block");
 
-                       this.group_count = program[1] + 1;
-                       this.match_min = program[2];
-                       //this.match_max = program[3];
+                       this.group_count = ReadProgramCount (1) + 1;
+                       this.match_min = ReadProgramCount (3);
+                       //this.match_max = ReadProgramCount (5);
 
                        // setup
 
-                       this.program_start = 4;
+                       this.program_start = 7;
                        this.groups = new int [group_count];
                }
 
@@ -106,16 +114,16 @@ namespace System.Text.RegularExpressions {
                                                //      True
 
                                                switch ((Position)program[pc + 4]) {
-                                               case Position.StartOfString:                                                    
+                                               case Position.StartOfString:
                                                        if (anch_reverse || anch_offset == 0) {
-                                                               ptr = anch_offset;
+                                                               if (anch_reverse)
+                                                                       ptr = anch_offset;
                                                                if (TryMatch (ref ptr, pc + skip))
                                                                        goto Pass;
                                                        }
                                                        break;
                                                
                                                case Position.StartOfLine:
-                                                                                                       
                                                         if (anch_ptr == 0) {
                                                                ptr = 0;
                                                                if (TryMatch (ref ptr, pc + skip))
@@ -316,7 +324,7 @@ namespace System.Text.RegularExpressions {
                                        break;
                                }
 
-                               case OpCode.Character: case OpCode.Category:
+                               case OpCode.Character: case OpCode.Category: case OpCode.NotCategory:
                                case OpCode.Range: case OpCode.Set: {
                                        if (!EvalChar (mode, ref ptr, ref pc, false))
                                                goto Fail;
@@ -420,10 +428,10 @@ namespace System.Text.RegularExpressions {
                                case OpCode.Repeat: {
                                        this.repeat = new RepeatContext (
                                                this.repeat,                    // previous context
-                                               program[pc + 2],                // minimum
-                                               program[pc + 3],                // maximum
+                                               ReadProgramCount (pc + 2),              // minimum
+                                               ReadProgramCount (pc + 4),              // maximum
                                                (flags & OpFlags.Lazy) != 0,    // lazy
-                                               pc + 4                          // subexpression
+                                               pc + 6                          // subexpression
                                        );
 
                                        if (Eval (Mode.Match, ref ptr, pc + program[pc + 1]))
@@ -457,7 +465,7 @@ namespace System.Text.RegularExpressions {
                                                ++ current.Count;
                                                current.Start = ptr;
                                                deep = current;
-                                               if (!Eval (Mode.Match, ref ptr, repeat.Expression)) {
+                                               if (!Eval (Mode.Match, ref ptr, current.Expression)) {
                                                        current.Start = start;
                                                        current.Count = start_count;
                                                        goto Fail;
@@ -466,10 +474,10 @@ namespace System.Text.RegularExpressions {
                                                        goto Pass;
                                        }
 
-                                       DegenerateMatch:
                                        if (ptr == current.Start) {
                                                // degenerate match ... match tail or fail
                                                this.repeat = current.Previous;
+                                               deep = null;
                                                if (Eval (Mode.Match, ref ptr, pc + 1))
                                                        goto Pass;
                                        
@@ -481,6 +489,7 @@ namespace System.Text.RegularExpressions {
                                                for (;;) {
                                                        // match tail first ...
                                                        this.repeat = current.Previous;
+                                                       deep = null;
                                                        int cp = Checkpoint ();
                                                        if (Eval (Mode.Match, ref ptr, pc + 1))
                                                                goto Pass;
@@ -490,7 +499,7 @@ namespace System.Text.RegularExpressions {
                                                        // ... then match more
                                                        this.repeat = current;
                                                        if (current.IsMaximum)
-                                                               return false;
+                                                               goto Fail;
                                                        ++ current.Count;
                                                        current.Start = ptr;
                                                        deep = current;
@@ -501,9 +510,10 @@ namespace System.Text.RegularExpressions {
                                                        }
                                                        if (deep != current)    // recursive mode
                                                                goto Pass;
-                                                       // Degenerate match: no point retrying current.Expression if tail match fails
+                                                       // Degenerate match: ptr has not moved since the last (failed) tail match.
+                                                       // So, next and subsequent tail matches will fail.
                                                        if (ptr == current.Start)
-                                                               goto DegenerateMatch;
+                                                               goto Fail;
                                                }
                                        } else {
                                                int stack_size = stack.Count;
@@ -539,6 +549,7 @@ namespace System.Text.RegularExpressions {
                                                // then, match the tail, backtracking as necessary.
                                                this.repeat = current.Previous;
                                                for (;;) {
+                                                       deep = null;
                                                        if (Eval (Mode.Match, ref ptr, pc + 1)) {
                                                                stack.Count = stack_size;
                                                                goto Pass;
@@ -558,14 +569,12 @@ namespace System.Text.RegularExpressions {
                                case OpCode.FastRepeat: {
                                        this.fast = new RepeatContext (
                                                fast,
-                                               program[pc + 2],                // minimum
-                                               program[pc + 3],                // maximum
+                                               ReadProgramCount (pc + 2),              // minimum
+                                               ReadProgramCount (pc + 4),              // maximum
                                                (flags & OpFlags.Lazy) != 0,    // lazy
-                                               pc + 4                          // subexpression
+                                               pc + 6                          // subexpression
                                        );
 
-                                       deep = fast;
-
                                        fast.Start = ptr;
 
                                        int cp = Checkpoint ();
@@ -573,13 +582,17 @@ namespace System.Text.RegularExpressions {
                                        pc += program[pc + 1];          // tail expression
                                        ushort tail_word = program[pc];
 
-                                       int c1, c2;                     // first character of tail operator
-                                       int coff;                       // 0 or -1 depending on direction
+                                       int c1 = -1;            // first character of tail operator
+                                       int c2 = -1;            // ... and the same character, in upper case if ignoring case
+                                       int coff = 0;           // 0 or -1 depending on direction
 
                                        OpCode tail_op = (OpCode)(tail_word & 0xff);
                                        if (tail_op == OpCode.Character || tail_op == OpCode.String) {
                                                OpFlags tail_flags = (OpFlags)(tail_word & 0xff00);
 
+                                               if ((tail_flags & OpFlags.Negate) != 0)
+                                                       goto skip;
+
                                                if (tail_op == OpCode.String)
                                                {
                                                        int offset = 0;
@@ -604,11 +617,8 @@ namespace System.Text.RegularExpressions {
                                                else
                                                        coff = 0;
                                        }
-                                       else {
-                                               c1 = c2 = -1;
-                                               coff = 0;
-                                       }
 
+                               skip:
                                        if (fast.IsLazy) {
                                                if (!fast.IsMinimum && !Eval (Mode.Count, ref ptr, fast.Expression)) {
                                                        //Console.WriteLine ("lazy fast: failed mininum.");
@@ -618,9 +628,11 @@ namespace System.Text.RegularExpressions {
                                                
                                                while (true) {
                                                        int p = ptr + coff;
-                                                       if ((c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) &&
-                                                           Eval (Mode.Match, ref ptr, pc))
-                                                               break;
+                                                       if (c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) {
+                                                               deep = null;
+                                                               if (Eval (Mode.Match, ref ptr, pc))
+                                                                       break;
+                                                       }
 
                                                        if (fast.IsMaximum) {
                                                                //Console.WriteLine ("lazy fast: failed with maximum.");
@@ -652,9 +664,11 @@ namespace System.Text.RegularExpressions {
 
                                                while (true) {
                                                        int p = ptr + coff;
-                                                       if ((c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) &&
-                                                           Eval (Mode.Match, ref ptr, pc))
-                                                               break;
+                                                       if (c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) {
+                                                               deep = null;
+                                                               if (Eval (Mode.Match, ref ptr, pc))
+                                                                       break;
+                                                       }
 
                                                        -- fast.Count;
                                                        if (!fast.IsMinimum) {
@@ -770,7 +784,12 @@ namespace System.Text.RegularExpressions {
                                case OpCode.Category: {
                                        if (CategoryUtils.IsCategory ((Category)program[pc ++], c))
                                                return !negate;
+                                       break;
+                               }
 
+                               case OpCode.NotCategory: {
+                                       if (!CategoryUtils.IsCategory ((Category)program[pc ++], c))
+                                               return !negate;
                                        break;
                                }