namespace System.Text.RegularExpressions {
- class Interpreter : IMachine {
+ partial class Interpreter : BaseMachine {
+ private int ReadProgramCount (int ptr)
+ {
+ int ret = program [ptr + 1];
+ ret <<= 16;
+ ret += program [ptr];
+ return ret;
+ }
+
public Interpreter (ushort[] program) {
this.program = program;
this.qs = null;
// process info block
-
Debug.Assert ((OpCode)program[0] == OpCode.Info, "Regex", "Cant' find info block");
-
- this.group_count = program[1] + 1;
- this.match_min = program[2];
- //this.match_max = program[3];
+ this.group_count = ReadProgramCount (1) + 1;
+ this.match_min = ReadProgramCount (3);
+ //this.match_max = ReadProgramCount (5);
// setup
- this.program_start = 4;
+ this.program_start = 7;
this.groups = new int [group_count];
}
// IMachine implementation
- public Match Scan (Regex regex, string text, int start, int end) {
+ public override Match Scan (Regex regex, string text, int start, int end) {
this.text = text;
this.text_end = end;
this.scan_ptr = start;
// True
switch ((Position)program[pc + 4]) {
- case Position.StartOfString:
+ case Position.StartOfString:
if (anch_reverse || anch_offset == 0) {
- ptr = anch_offset;
+ if (anch_reverse)
+ ptr = anch_offset;
if (TryMatch (ref ptr, pc + skip))
goto Pass;
}
break;
case Position.StartOfLine:
-
if (anch_ptr == 0) {
ptr = 0;
if (TryMatch (ref ptr, pc + skip))
// True
bool reverse = ((OpFlags)program[pc + 3] & OpFlags.RightToLeft) != 0;
- string substring = GetString (pc + 3);
if (qs == null) {
bool ignore = ((OpFlags)program[pc + 3] & OpFlags.IgnoreCase) != 0;
-
+ string substring = GetString (pc + 3);
qs = new QuickSearch (substring, ignore, reverse);
}
while ((anch_reverse && anch_ptr >= anch_begin)
{
anch_ptr = qs.Search (text, anch_ptr, anch_begin);
if (anch_ptr != -1)
- anch_ptr += substring.Length ;
+ anch_ptr += qs.Length ;
}
else
goto Fail;
pc += 2;
- for (int i = 0; i < len; ++ i) {
- if (ignore) {
+ if (ignore) {
+ for (int i = 0; i < len; ++ i) {
if (Char.ToLower (text[ptr + i]) != Char.ToLower (text[str + i]))
goto Fail;
}
- else {
+ } else {
+ for (int i = 0; i < len; ++ i) {
if (text[ptr + i] != text[str + i])
goto Fail;
}
break;
}
- case OpCode.Character: case OpCode.Category:
+ case OpCode.Character: case OpCode.Category: case OpCode.NotCategory:
case OpCode.Range: case OpCode.Set: {
if (!EvalChar (mode, ref ptr, ref pc, false))
goto Fail;
case OpCode.Repeat: {
this.repeat = new RepeatContext (
this.repeat, // previous context
- program[pc + 2], // minimum
- program[pc + 3], // maximum
+ ReadProgramCount (pc + 2), // minimum
+ ReadProgramCount (pc + 4), // maximum
(flags & OpFlags.Lazy) != 0, // lazy
- pc + 4 // subexpression
+ pc + 6 // subexpression
);
if (Eval (Mode.Match, ref ptr, pc + program[pc + 1]))
int start = current.Start;
int start_count = current.Count;
- RecurseMinimum:
- if (!current.IsMinimum) {
+ while (!current.IsMinimum) {
++ current.Count;
current.Start = ptr;
deep = current;
- if (Eval (Mode.Match, ref ptr, repeat.Expression)) {
- if (deep == current)
- goto RecurseMinimum;
- else
- goto Pass;
+ if (!Eval (Mode.Match, ref ptr, current.Expression)) {
+ current.Start = start;
+ current.Count = start_count;
+ goto Fail;
}
-
- current.Start = start;
- current.Count = start_count;
- goto Fail;
+ if (deep != current) // recursive mode
+ goto Pass;
}
- Recurse:
if (ptr == current.Start) {
// degenerate match ... match tail or fail
-
this.repeat = current.Previous;
+ deep = null;
if (Eval (Mode.Match, ref ptr, pc + 1))
goto Pass;
}
if (current.IsLazy) {
- // match tail first ...
- this.repeat = current.Previous;
- int cp = Checkpoint ();
- if (Eval (Mode.Match, ref ptr, pc + 1))
- goto Pass;
-
- Backtrack (cp);
+ for (;;) {
+ // match tail first ...
+ this.repeat = current.Previous;
+ deep = null;
+ int cp = Checkpoint ();
+ if (Eval (Mode.Match, ref ptr, pc + 1))
+ goto Pass;
- // ... then match more
+ Backtrack (cp);
- this.repeat = current;
- if (!current.IsMaximum) {
+ // ... then match more
+ this.repeat = current;
+ if (current.IsMaximum)
+ goto Fail;
++ current.Count;
current.Start = ptr;
deep = current;
- if (Eval (Mode.Match, ref ptr, current.Expression)) {
- if (deep == current)
- goto Recurse;
- else
- goto Pass;
+ if (!Eval (Mode.Match, ref ptr, current.Expression)) {
+ current.Start = start;
+ current.Count = start_count;
+ goto Fail;
}
-
- current.Start = start;
- current.Count = start_count;
- goto Fail;
+ if (deep != current) // recursive mode
+ goto Pass;
+ // Degenerate match: ptr has not moved since the last (failed) tail match.
+ // So, next and subsequent tail matches will fail.
+ if (ptr == current.Start)
+ goto Fail;
}
+ } else {
+ int stack_size = stack.Count;
- return false;
- }
- else {
- // match more first ...
-
- if (!current.IsMaximum) {
+ // match greedily as much as possible
+ while (!current.IsMaximum) {
int cp = Checkpoint ();
+ int old_ptr = ptr;
+ int old_start = current.Start;
+
++ current.Count;
current.Start = ptr;
- deep = null; // We have more state to maintain than just
- // the position. Let's defer the work for now.
- if (Eval (Mode.Match, ref ptr, current.Expression))
+ deep = current;
+ if (!Eval (Mode.Match, ref ptr, current.Expression)) {
+ -- current.Count;
+ current.Start = old_start;
+ Backtrack (cp);
+ break;
+ }
+ if (deep != current) {
+ // recursive mode: no more backtracking, truncate the stack
+ stack.Count = stack_size;
goto Pass;
+ }
+ stack.Push (cp);
+ stack.Push (old_ptr);
- current.Start = start;
- -- current.Count;
- Backtrack (cp);
+ // Degenerate match: no point going on
+ if (ptr == current.Start)
+ break;
}
- // ... then match tail
-
+ // then, match the tail, backtracking as necessary.
this.repeat = current.Previous;
- if (Eval (Mode.Match, ref ptr, pc + 1))
- goto Pass;
+ for (;;) {
+ deep = null;
+ if (Eval (Mode.Match, ref ptr, pc + 1)) {
+ stack.Count = stack_size;
+ goto Pass;
+ }
+ if (stack.Count == stack_size) {
+ this.repeat = current;
+ goto Fail;
+ }
- this.repeat = current;
- goto Fail;
+ --current.Count;
+ ptr = stack.Pop ();
+ Backtrack (stack.Pop ());
+ }
}
}
case OpCode.FastRepeat: {
this.fast = new RepeatContext (
fast,
- program[pc + 2], // minimum
- program[pc + 3], // maximum
+ ReadProgramCount (pc + 2), // minimum
+ ReadProgramCount (pc + 4), // maximum
(flags & OpFlags.Lazy) != 0, // lazy
- pc + 4 // subexpression
+ pc + 6 // subexpression
);
- deep = fast;
-
fast.Start = ptr;
int cp = Checkpoint ();
pc += program[pc + 1]; // tail expression
ushort tail_word = program[pc];
- int c1, c2; // first character of tail operator
- int coff; // 0 or -1 depending on direction
+ int c1 = -1; // first character of tail operator
+ int c2 = -1; // ... and the same character, in upper case if ignoring case
+ int coff = 0; // 0 or -1 depending on direction
OpCode tail_op = (OpCode)(tail_word & 0xff);
if (tail_op == OpCode.Character || tail_op == OpCode.String) {
OpFlags tail_flags = (OpFlags)(tail_word & 0xff00);
+ if ((tail_flags & OpFlags.Negate) != 0)
+ goto skip;
+
if (tail_op == OpCode.String)
{
int offset = 0;
else
coff = 0;
}
- else {
- c1 = c2 = -1;
- coff = 0;
- }
+ skip:
if (fast.IsLazy) {
if (!fast.IsMinimum && !Eval (Mode.Count, ref ptr, fast.Expression)) {
//Console.WriteLine ("lazy fast: failed mininum.");
while (true) {
int p = ptr + coff;
- if ((c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) &&
- Eval (Mode.Match, ref ptr, pc))
- break;
+ if (c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) {
+ deep = null;
+ if (Eval (Mode.Match, ref ptr, pc))
+ break;
+ }
if (fast.IsMaximum) {
//Console.WriteLine ("lazy fast: failed with maximum.");
while (true) {
int p = ptr + coff;
- if ((c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) &&
- Eval (Mode.Match, ref ptr, pc))
- break;
+ if (c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) {
+ deep = null;
+ if (Eval (Mode.Match, ref ptr, pc))
+ break;
+ }
-- fast.Count;
if (!fast.IsMinimum) {
case OpCode.Category: {
if (CategoryUtils.IsCategory ((Category)program[pc ++], c))
return !negate;
+ break;
+ }
+ case OpCode.NotCategory: {
+ if (!CategoryUtils.IsCategory ((Category)program[pc ++], c))
+ return !negate;
break;
}
//Group not previously matched
return false;
}
-
Debug.Assert (marks [b].IsDefined, "Regex", "Balancng group not closed");
-
if (gid > 0 && capture){
Open (gid, marks [b].Index + marks [b].Length);
Close (gid, ptr);
private void Backtrack (int cp) {
Debug.Assert (cp > mark_start, "Regex", "Attempt to backtrack forwards");
-
for (int i = 0; i < groups.Length; ++ i) {
int m = groups [i];
while (cp <= m)
private void GetGroupInfo (int gid, out int first_mark_index, out int n_caps)
{
first_mark_index = -1;
- bool first = true;
n_caps = 0;
for (int m = groups [gid]; m >= 0; m = marks [m].Previous) {
if (!marks [m].IsDefined)
continue;
- ++n_caps;
- if (first) {
- first = false;
+ if (first_mark_index < 0)
first_mark_index = m;
- }
+ ++n_caps;
}
}
int n_caps, first_mark_index;
Group g;
GetGroupInfo (0, out first_mark_index, out n_caps);
+
+ // Avoid fully populating the Match instance if not needed
+ if (!needs_groups_or_captures)
+ return new Match (regex, this, text, text_end, 0, marks [first_mark_index].Index, marks [first_mark_index].Length);
+
Match retval = new Match (regex, this, text, text_end, groups.Length,
marks [first_mark_index].Index, marks [first_mark_index].Length, n_caps);
PopulateGroup (retval, first_mark_index, n_caps);
private RepeatContext repeat; // current repeat context
private RepeatContext fast; // fast repeat context
+ // Repeat/Until handling
+ private IntStack stack = new IntStack (); // utility stack
private RepeatContext deep; // points to the most-nested repeat context
private Mark[] marks = null; // mark stack
private int[] groups; // current group definitions
// private classes
-/*
- private struct Mark {
- public int Start, End;
- public int Previous;
- public bool IsDefined {
- get { return Start >= 0 && End >= 0; }
+ private struct IntStack {
+ int [] values;
+ int count;
+ public int Pop ()
+ {
+ return values [--count];
}
-
- public int Index {
- get { return Start < End ? Start : End; }
+ public void Push (int value)
+ {
+ if (values == null) {
+ values = new int [8];
+ } else if (count == values.Length) {
+ int new_size = values.Length;
+ new_size += new_size >> 1;
+ int [] new_values = new int [new_size];
+ for (int i = 0; i < count; ++i)
+ new_values [i] = values [i];
+ values = new_values;
+ }
+ values [count++] = value;
}
-
- public int Length {
- get { return Start < End ? End - Start : Start - End; }
+ public int Top {
+ get { return values [count - 1]; }
+ }
+ public int Count {
+ get { return count; }
+ set {
+ if (value > count)
+ throw new SystemException ("can only truncate the stack");
+ count = value;
+ }
}
}
-*/
+
private class RepeatContext {
public RepeatContext (RepeatContext previous, int min, int max, bool lazy, int expr_pc) {
this.previous = previous;