SortedSet: Enable set comparision operations on views, and improve performance
[mono.git] / mcs / class / System / System.Text.RegularExpressions / interpreter.cs
index e107c24e3e6026d3350ebe7b272152f7a714ae32..0edc3239f6b177cb4c614ae99b2617dc553c2b04 100644 (file)
@@ -34,28 +34,34 @@ using System.Globalization;
 
 namespace System.Text.RegularExpressions {
 
-       class Interpreter : IMachine {
+       partial class Interpreter : BaseMachine {
+               private int ReadProgramCount (int ptr)
+               {
+                       int ret = program [ptr + 1];
+                       ret <<= 16;
+                       ret += program [ptr];
+                       return ret;
+               }
+
                public Interpreter (ushort[] program) {
                        this.program = program;
                        this.qs = null;
 
                        // process info block
-
                        Debug.Assert ((OpCode)program[0] == OpCode.Info, "Regex", "Cant' find info block");
-
-                       this.group_count = program[1] + 1;
-                       this.match_min = program[2];
-                       this.match_max = program[3];
+                       this.group_count = ReadProgramCount (1) + 1;
+                       this.match_min = ReadProgramCount (3);
+                       //this.match_max = ReadProgramCount (5);
 
                        // setup
 
-                       this.program_start = 4;
+                       this.program_start = 7;
                        this.groups = new int [group_count];
                }
 
                // IMachine implementation
 
-               public Match Scan (Regex regex, string text, int start, int end) {
+               public override Match Scan (Regex regex, string text, int start, int end) {
                        this.text = text;
                        this.text_end = end;
                        this.scan_ptr = start;
@@ -106,16 +112,16 @@ namespace System.Text.RegularExpressions {
                                                //      True
 
                                                switch ((Position)program[pc + 4]) {
-                                               case Position.StartOfString:                                                    
+                                               case Position.StartOfString:
                                                        if (anch_reverse || anch_offset == 0) {
-                                                               ptr = anch_offset;
+                                                               if (anch_reverse)
+                                                                       ptr = anch_offset;
                                                                if (TryMatch (ref ptr, pc + skip))
                                                                        goto Pass;
                                                        }
                                                        break;
                                                
                                                case Position.StartOfLine:
-                                                                                                       
                                                         if (anch_ptr == 0) {
                                                                ptr = 0;
                                                                if (TryMatch (ref ptr, pc + skip))
@@ -161,11 +167,10 @@ namespace System.Text.RegularExpressions {
                                                //      True
                                 
                                                bool reverse = ((OpFlags)program[pc + 3] & OpFlags.RightToLeft) != 0;
-                                               string substring = GetString (pc + 3);
 
                                                if (qs == null) {
                                                        bool ignore = ((OpFlags)program[pc + 3] & OpFlags.IgnoreCase) != 0;
-
+                                                       string substring = GetString (pc + 3);
                                                        qs = new QuickSearch (substring, ignore, reverse);
                                                }
                                                while ((anch_reverse && anch_ptr >= anch_begin) 
@@ -175,7 +180,7 @@ namespace System.Text.RegularExpressions {
                                                        {
                                                                anch_ptr = qs.Search (text, anch_ptr, anch_begin);
                                                                if (anch_ptr != -1)
-                                                                       anch_ptr += substring.Length ;
+                                                                       anch_ptr += qs.Length ;
                                                                
                                                        }
                                                        else
@@ -300,12 +305,13 @@ namespace System.Text.RegularExpressions {
                                                goto Fail;
 
                                        pc += 2;
-                                       for (int i = 0; i < len; ++ i) {
-                                               if (ignore) {
+                                       if (ignore) {
+                                               for (int i = 0; i < len; ++ i) {
                                                        if (Char.ToLower (text[ptr + i]) != Char.ToLower (text[str + i]))
                                                                goto Fail;
                                                }
-                                               else {
+                                       } else {
+                                               for (int i = 0; i < len; ++ i) {
                                                        if (text[ptr + i] != text[str + i])
                                                                goto Fail;
                                                }
@@ -316,7 +322,7 @@ namespace System.Text.RegularExpressions {
                                        break;
                                }
 
-                               case OpCode.Character: case OpCode.Category:
+                               case OpCode.Character: case OpCode.Category: case OpCode.NotCategory:
                                case OpCode.Range: case OpCode.Set: {
                                        if (!EvalChar (mode, ref ptr, ref pc, false))
                                                goto Fail;
@@ -420,10 +426,10 @@ namespace System.Text.RegularExpressions {
                                case OpCode.Repeat: {
                                        this.repeat = new RepeatContext (
                                                this.repeat,                    // previous context
-                                               program[pc + 2],                // minimum
-                                               program[pc + 3],                // maximum
+                                               ReadProgramCount (pc + 2),              // minimum
+                                               ReadProgramCount (pc + 4),              // maximum
                                                (flags & OpFlags.Lazy) != 0,    // lazy
-                                               pc + 4                          // subexpression
+                                               pc + 6                          // subexpression
                                        );
 
                                        if (Eval (Mode.Match, ref ptr, pc + program[pc + 1]))
@@ -436,23 +442,40 @@ namespace System.Text.RegularExpressions {
 
                                case OpCode.Until: {
                                        RepeatContext current = this.repeat;
+
+                                       //
+                                       // Can we avoid recursion?
+                                       //
+                                       // Backtracking can be forced in nested quantifiers from the tail of this quantifier.
+                                       // Thus, we cannot, in general, use a simple loop on repeat.Expression to handle
+                                       // quantifiers.
+                                       //
+                                       // If 'deep' was unmolested, that implies that there was no nested quantifiers.
+                                       // Thus, we can safely avoid recursion.
+                                       //
+                                       if (deep == current)
+                                               goto Pass;
+
                                        int start = current.Start;
+                                       int start_count = current.Count;
 
-                                       if (!current.IsMinimum) {
+                                       while (!current.IsMinimum) {
                                                ++ current.Count;
                                                current.Start = ptr;
-                                               if (Eval (Mode.Match, ref ptr, repeat.Expression))
+                                               deep = current;
+                                               if (!Eval (Mode.Match, ref ptr, current.Expression)) {
+                                                       current.Start = start;
+                                                       current.Count = start_count;
+                                                       goto Fail;
+                                               }
+                                               if (deep != current)    // recursive mode
                                                        goto Pass;
-
-                                               current.Start = start;
-                                               -- current.Count;
-                                               goto Fail;
                                        }
 
                                        if (ptr == current.Start) {
                                                // degenerate match ... match tail or fail
-
                                                this.repeat = current.Previous;
+                                               deep = null;
                                                if (Eval (Mode.Match, ref ptr, pc + 1))
                                                        goto Pass;
                                        
@@ -461,65 +484,95 @@ namespace System.Text.RegularExpressions {
                                        }
 
                                        if (current.IsLazy) {
-                                               // match tail first ...
-
-                                               this.repeat = current.Previous;
-                                               int cp = Checkpoint ();
-                                               if (Eval (Mode.Match, ref ptr, pc + 1))
-                                                       goto Pass;
-
-                                               Backtrack (cp);
+                                               for (;;) {
+                                                       // match tail first ...
+                                                       this.repeat = current.Previous;
+                                                       deep = null;
+                                                       int cp = Checkpoint ();
+                                                       if (Eval (Mode.Match, ref ptr, pc + 1))
+                                                               goto Pass;
 
-                                               // ... then match more
+                                                       Backtrack (cp);
 
-                                               this.repeat = current;
-                                               if (!current.IsMaximum) {
+                                                       // ... then match more
+                                                       this.repeat = current;
+                                                       if (current.IsMaximum)
+                                                               goto Fail;
                                                        ++ current.Count;
                                                        current.Start = ptr;
-                                                       if (Eval (Mode.Match, ref ptr, current.Expression))
+                                                       deep = current;
+                                                       if (!Eval (Mode.Match, ref ptr, current.Expression)) {
+                                                               current.Start = start;
+                                                               current.Count = start_count;
+                                                               goto Fail;
+                                                       }
+                                                       if (deep != current)    // recursive mode
                                                                goto Pass;
-
-                                                       current.Start = start;
-                                                       -- current.Count;
-                                                       goto Fail;
+                                                       // Degenerate match: ptr has not moved since the last (failed) tail match.
+                                                       // So, next and subsequent tail matches will fail.
+                                                       if (ptr == current.Start)
+                                                               goto Fail;
                                                }
+                                       } else {
+                                               int stack_size = stack.Count;
 
-                                               return false;
-                                       }
-                                       else {
-                                               // match more first ...
-
-                                               if (!current.IsMaximum) {
+                                               // match greedily as much as possible
+                                               while (!current.IsMaximum) {
                                                        int cp = Checkpoint ();
+                                                       int old_ptr = ptr;
+                                                       int old_start = current.Start;
+
                                                        ++ current.Count;
                                                        current.Start = ptr;
-                                                       if (Eval (Mode.Match, ref ptr, current.Expression))
+                                                       deep = current;
+                                                       if (!Eval (Mode.Match, ref ptr, current.Expression)) {
+                                                               -- current.Count;
+                                                               current.Start = old_start;
+                                                               Backtrack (cp);
+                                                               break;
+                                                       }
+                                                       if (deep != current) {
+                                                               // recursive mode: no more backtracking, truncate the stack
+                                                               stack.Count = stack_size;
                                                                goto Pass;
+                                                       }
+                                                       stack.Push (cp);
+                                                       stack.Push (old_ptr);
 
-                                                       current.Start = start;
-                                                       -- current.Count;
-                                                       Backtrack (cp);
+                                                       // Degenerate match: no point going on
+                                                       if (ptr == current.Start)
+                                                               break;
                                                }
 
-                                               // ... then match tail
-
+                                               // then, match the tail, backtracking as necessary.
                                                this.repeat = current.Previous;
-                                               if (Eval (Mode.Match, ref ptr, pc + 1))
-                                                       goto Pass;
+                                               for (;;) {
+                                                       deep = null;
+                                                       if (Eval (Mode.Match, ref ptr, pc + 1)) {
+                                                               stack.Count = stack_size;
+                                                               goto Pass;
+                                                       }
+                                                       if (stack.Count == stack_size) {
+                                                               this.repeat = current;
+                                                               goto Fail;
+                                                       }
 
-                                               this.repeat = current;
-                                               goto Fail;
+                                                       --current.Count;
+                                                       ptr = stack.Pop ();
+                                                       Backtrack (stack.Pop ());
+                                               }
                                        }
                                }
 
                                case OpCode.FastRepeat: {
                                        this.fast = new RepeatContext (
                                                fast,
-                                               program[pc + 2],                // minimum
-                                               program[pc + 3],                // maximum
+                                               ReadProgramCount (pc + 2),              // minimum
+                                               ReadProgramCount (pc + 4),              // maximum
                                                (flags & OpFlags.Lazy) != 0,    // lazy
-                                               pc + 4                          // subexpression
+                                               pc + 6                          // subexpression
                                        );
+
                                        fast.Start = ptr;
 
                                        int cp = Checkpoint ();
@@ -527,13 +580,17 @@ namespace System.Text.RegularExpressions {
                                        pc += program[pc + 1];          // tail expression
                                        ushort tail_word = program[pc];
 
-                                       int c1, c2;                     // first character of tail operator
-                                       int coff;                       // 0 or -1 depending on direction
+                                       int c1 = -1;            // first character of tail operator
+                                       int c2 = -1;            // ... and the same character, in upper case if ignoring case
+                                       int coff = 0;           // 0 or -1 depending on direction
 
                                        OpCode tail_op = (OpCode)(tail_word & 0xff);
                                        if (tail_op == OpCode.Character || tail_op == OpCode.String) {
                                                OpFlags tail_flags = (OpFlags)(tail_word & 0xff00);
 
+                                               if ((tail_flags & OpFlags.Negate) != 0)
+                                                       goto skip;
+
                                                if (tail_op == OpCode.String)
                                                {
                                                        int offset = 0;
@@ -558,11 +615,8 @@ namespace System.Text.RegularExpressions {
                                                else
                                                        coff = 0;
                                        }
-                                       else {
-                                               c1 = c2 = -1;
-                                               coff = 0;
-                                       }
 
+                               skip:
                                        if (fast.IsLazy) {
                                                if (!fast.IsMinimum && !Eval (Mode.Count, ref ptr, fast.Expression)) {
                                                        //Console.WriteLine ("lazy fast: failed mininum.");
@@ -572,9 +626,11 @@ namespace System.Text.RegularExpressions {
                                                
                                                while (true) {
                                                        int p = ptr + coff;
-                                                       if ((c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) &&
-                                                           Eval (Mode.Match, ref ptr, pc))
-                                                               break;
+                                                       if (c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) {
+                                                               deep = null;
+                                                               if (Eval (Mode.Match, ref ptr, pc))
+                                                                       break;
+                                                       }
 
                                                        if (fast.IsMaximum) {
                                                                //Console.WriteLine ("lazy fast: failed with maximum.");
@@ -606,9 +662,11 @@ namespace System.Text.RegularExpressions {
 
                                                while (true) {
                                                        int p = ptr + coff;
-                                                       if ((c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) &&
-                                                           Eval (Mode.Match, ref ptr, pc))
-                                                               break;
+                                                       if (c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) {
+                                                               deep = null;
+                                                               if (Eval (Mode.Match, ref ptr, pc))
+                                                                       break;
+                                                       }
 
                                                        -- fast.Count;
                                                        if (!fast.IsMinimum) {
@@ -724,7 +782,12 @@ namespace System.Text.RegularExpressions {
                                case OpCode.Category: {
                                        if (CategoryUtils.IsCategory ((Category)program[pc ++], c))
                                                return !negate;
+                                       break;
+                               }
 
+                               case OpCode.NotCategory: {
+                                       if (!CategoryUtils.IsCategory ((Category)program[pc ++], c))
+                                               return !negate;
                                        break;
                                }
                                
@@ -857,9 +920,7 @@ namespace System.Text.RegularExpressions {
                                //Group not previously matched
                                return false;
                        }
-
                        Debug.Assert (marks [b].IsDefined, "Regex", "Balancng group not closed");
-
                        if (gid > 0 && capture){ 
                                Open (gid, marks [b].Index + marks [b].Length);
                                Close (gid, ptr);
@@ -877,7 +938,6 @@ namespace System.Text.RegularExpressions {
 
                private void Backtrack (int cp) {
                        Debug.Assert (cp > mark_start, "Regex", "Attempt to backtrack forwards");
-
                        for (int i = 0; i < groups.Length; ++ i) {
                                int m = groups [i];
                                while (cp <= m)
@@ -926,24 +986,56 @@ namespace System.Text.RegularExpressions {
                        return m;
                }
 
-               private Match GenerateMatch (Regex regex) {
-                       int[][] grps = new int[groups.Length][];
-                       ArrayList caps = new ArrayList ();
-
-                       for (int gid = 0; gid < groups.Length; ++ gid) {
-                               caps.Clear ();
-                               for (int m = groups[gid]; m >= 0; m = marks[m].Previous) {
-                                       if (!marks[m].IsDefined)
-                                               continue;
-                                       
-                                       caps.Add (marks[m].Index);
-                                       caps.Add (marks[m].Length);
-                               }
+               private void GetGroupInfo (int gid, out int first_mark_index, out int n_caps)
+               {
+                       first_mark_index = -1;
+                       n_caps = 0;
+                       for (int m = groups [gid]; m >= 0; m = marks [m].Previous) {
+                               if (!marks [m].IsDefined)
+                                       continue;
+                               if (first_mark_index < 0)
+                                       first_mark_index = m;
+                               ++n_caps;
+                       }
+               }
 
-                               grps[gid] = (int[])caps.ToArray (typeof (int));
+               private void PopulateGroup (Group g, int first_mark_index, int n_caps)
+               {
+                       int i = 1;
+                       for (int m = marks [first_mark_index].Previous; m >= 0; m = marks [m].Previous) {
+                               if (!marks [m].IsDefined)
+                                       continue;
+                               Capture cap = new Capture (text, marks [m].Index, marks [m].Length);
+                               g.Captures.SetValue (cap, n_caps - 1 - i);
+                               ++i;
                        }
+               }
 
-                       return new Match (regex, this, text, text_end, grps);
+               private Match GenerateMatch (Regex regex)
+               {
+                       int n_caps, first_mark_index;
+                       Group g;
+                       GetGroupInfo (0, out first_mark_index, out n_caps);
+
+                       // Avoid fully populating the Match instance if not needed
+                       if (!needs_groups_or_captures)
+                               return new Match (regex, this, text, text_end, 0, marks [first_mark_index].Index, marks [first_mark_index].Length);
+
+                       Match retval = new Match (regex, this, text, text_end, groups.Length, 
+                                                 marks [first_mark_index].Index, marks [first_mark_index].Length, n_caps);
+                       PopulateGroup (retval, first_mark_index, n_caps);
+
+                       for (int gid = 1; gid < groups.Length; ++ gid) {
+                               GetGroupInfo (gid, out first_mark_index, out n_caps);
+                               if (first_mark_index < 0) {
+                                       g = Group.Fail;
+                               } else {
+                                       g = new Group (text, marks [first_mark_index].Index, marks [first_mark_index].Length, n_caps);
+                                       PopulateGroup (g, first_mark_index, n_caps);
+                               }
+                               retval.Groups.SetValue (g, gid);
+                       }
+                       return retval;
                }
 
                // interpreter attributes
@@ -953,7 +1045,7 @@ namespace System.Text.RegularExpressions {
                private string text;                    // input text
                private int text_end;                   // end of input text (last character + 1)
                private int group_count;                // number of capturing groups
-               private int match_min, match_max;       // match width information
+               private int match_min;//, match_max;    // match width information
                private QuickSearch qs;                 // fast substring matcher
 
                // match state
@@ -963,6 +1055,10 @@ namespace System.Text.RegularExpressions {
                private RepeatContext repeat;           // current repeat context
                private RepeatContext fast;             // fast repeat context
 
+               // Repeat/Until handling
+               private IntStack stack = new IntStack (); // utility stack
+               private RepeatContext deep;             // points to the most-nested repeat context
+
                private Mark[] marks = null;            // mark stack
                private int mark_start;                 // start of current checkpoint
                private int mark_end;                   // end of checkpoint/next free mark
@@ -970,24 +1066,41 @@ namespace System.Text.RegularExpressions {
                private int[] groups;                   // current group definitions
 
                // private classes
-/*
-               private struct Mark {
-                       public int Start, End;
-                       public int Previous;
 
-                       public bool IsDefined {
-                               get { return Start >= 0 && End >= 0; }
+               private struct IntStack {
+                       int [] values;
+                       int count;
+                       public int Pop ()
+                       {
+                               return values [--count];
                        }
-
-                       public int Index {
-                               get { return Start < End ? Start : End; }
+                       public void Push (int value)
+                       {
+                               if (values == null) {
+                                       values = new int [8];
+                               } else if (count == values.Length) {
+                                       int new_size = values.Length;
+                                       new_size += new_size >> 1;
+                                       int [] new_values = new int [new_size];
+                                       for (int i = 0; i < count; ++i)
+                                               new_values [i] = values [i];
+                                       values = new_values;
+                               }
+                               values [count++] = value;
                        }
-
-                       public int Length {
-                               get { return Start < End ? End - Start : Start - End; }
+                       public int Top {
+                               get { return values [count - 1]; }
+                       }
+                       public int Count {
+                               get { return count; }
+                               set {
+                                       if (value > count)
+                                               throw new SystemException ("can only truncate the stack");
+                                       count = value;
+                               }
                        }
                }
-*/
+
                private class RepeatContext {
                        public RepeatContext (RepeatContext previous, int min, int max, bool lazy, int expr_pc) {
                                this.previous = previous;