+// Based upon interpreter.cs, written by Dan Lewis (dlewis@gmx.co.uk)
+
using System;
using System.Collections;
using System.Globalization;
+using System.Diagnostics;
namespace System.Text.RegularExpressions {
internal delegate bool EvalDelegate (RxInterpreter interp, int strpos, ref int strpos_result);
- class RxInterpreter: BaseMachine {
+ sealed class RxInterpreter: BaseMachine {
byte[] program;
string str;
int string_start;
int string_end;
int group_count;
- int match_start;
+// int match_start;
int[] groups;
EvalDelegate eval_del; // optimized EvalByteCode method created by the CILCompiler
int mark_start; // start of current checkpoint
int mark_end; // end of checkpoint/next free mark
+ IntStack stack; // utility stack
+
+ RepeatContext repeat; // current repeat context
+ RepeatContext deep; // points to the most-nested repeat context
+
+ /* The readonly ensures the JIT can optimize out if (trace_rx) statements */
+ public static readonly bool trace_rx = Environment.GetEnvironmentVariable ("MONO_TRACE_RX") != null;
+
+ // private classes
+
+ internal struct IntStack {
+ int [] values;
+ int count;
+ public int Pop ()
+ {
+ return values [--count];
+ }
+ public void Push (int value)
+ {
+ if (values == null) {
+ values = new int [8];
+ } else if (count == values.Length) {
+ int new_size = values.Length;
+ new_size += new_size >> 1;
+ int [] new_values = new int [new_size];
+ for (int i = 0; i < count; ++i)
+ new_values [i] = values [i];
+ values = new_values;
+ }
+ values [count++] = value;
+ }
+ public int Top {
+ get { return values [count - 1]; }
+ }
+ public int Count {
+ get { return count; }
+ set {
+ if (value > count)
+ throw new SystemException ("can only truncate the stack");
+ count = value;
+ }
+ }
+ }
+
+ private class RepeatContext {
+ public RepeatContext (RepeatContext previous, int min, int max, bool lazy, int expr_pc) {
+ this.previous = previous;
+ this.min = min;
+ this.max = max;
+ this.lazy = lazy;
+ this.expr_pc = expr_pc;
+
+ this.start = -1;
+ this.count = 0;
+ }
+
+ public int Count {
+ get { return count; }
+ set { count = value; }
+ }
+
+ public int Start {
+ get { return start; }
+ set { start = value; }
+ }
+
+ public bool IsMinimum {
+ get { return min <= count; }
+ }
+
+ public bool IsMaximum {
+ get { return max <= count; }
+ }
+
+ public bool IsLazy {
+ get { return lazy; }
+ }
+
+ public int Expression {
+ get { return expr_pc; }
+ }
+
+ public RepeatContext Previous {
+ get { return previous; }
+ }
+
+ private int start;
+ private int min, max;
+ private bool lazy;
+ private int expr_pc;
+ private RepeatContext previous;
+
+ private int count;
+ }
+
static int ReadInt (byte[] code, int pc)
{
int val = code [pc];
- val |= code [pc + 1] << 8;
- val |= code [pc + 2] << 16;
- val |= code [pc + 3] << 24;
+ val |= (int)code [pc + 1] << 8;
+ val |= (int)code [pc + 2] << 16;
+ val |= (int)code [pc + 3] << 24;
return val;
}
{
this.program = program;
this.eval_del = eval_del;
- group_count = 1 + (program [1] | (program [2] << 8));
+ group_count = 1 + (program [1] | ((int)program [2] << 8));
groups = new int [group_count];
+ stack = new IntStack ();
+
+ ResetGroups ();
}
public override Match Scan (Regex regex, string text, int start, int end) {
} else {
match = EvalByteCode (11, start, ref res);
}
+ marks [groups [0]].End = res;
if (match) {
- Match m = new Match (regex, this, text, end, 0, match_start, res - match_start);
- return m;
+ return GenerateMatch (regex);
+ //Match m = new Match (regex, this, text, end, 0, match_start, res - match_start);
+ //return m;
}
return Match.Empty;
}
private bool Balance (int gid, int balance_gid, bool capture, int ptr) {
int b = groups [balance_gid];
- if (b == -1 || marks [b].Index < 0) {
+ if(b == -1 || marks[b].Index < 0) {
//Group not previously matched
return false;
}
+ Debug.Assert (marks [b].IsDefined, "Regex", "Balancng group not closed");
if (gid > 0 && capture){
Open (gid, marks [b].Index + marks [b].Length);
Close (gid, ptr);
}
groups [balance_gid] = marks[b].Previous;
+
return true;
}
int n = groups.Length;
if (marks == null)
marks = new Mark [n * 10];
- if (n == 1)
- return;
for (int i = 0; i < n; ++ i) {
groups [i] = i;
}
}
+ private Match GenerateMatch (Regex regex)
+ {
+ int n_caps, first_mark_index;
+ Group g;
+ GetGroupInfo (0, out first_mark_index, out n_caps);
+
+ // Avoid fully populating the Match instance if not needed
+ if (!needs_groups_or_captures)
+ return new Match (regex, this, str, string_end, 0, marks [first_mark_index].Index, marks [first_mark_index].Length);
+
+ Match retval = new Match (regex, this, str, string_end, groups.Length,
+ marks [first_mark_index].Index, marks [first_mark_index].Length, n_caps);
+ PopulateGroup (retval, first_mark_index, n_caps);
+
+ for (int gid = 1; gid < groups.Length; ++ gid) {
+ GetGroupInfo (gid, out first_mark_index, out n_caps);
+ if (first_mark_index < 0) {
+ g = Group.Fail;
+ } else {
+ g = new Group (str, marks [first_mark_index].Index, marks [first_mark_index].Length, n_caps);
+ PopulateGroup (g, first_mark_index, n_caps);
+ }
+ retval.Groups.SetValue (g, gid);
+ }
+ return retval;
+ }
+
+ // used by the IL backend
+ internal void SetStartOfMatch (int pos)
+ {
+ marks [groups [0]].Start = pos;
+ }
+
+ static bool IsWordChar (char c)
+ {
+ return Char.IsLetterOrDigit (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation;
+ }
+
bool EvalByteCode (int pc, int strpos, ref int strpos_result)
{
+ // luckily the IL engine can deal with char_group_end at compile time
+ // this code offset needs to be checked only in opcodes that handle
+ // a single char and that are included in a TestCharGroup expression:
+ // the engine is supposed to jump to this offset as soons as the
+ // first opcode in the expression matches
+ // The code pattern becomes:
+ // on successfull match: check if char_group_end is nonzero and jump to
+ // test_char_group_passed after adjusting strpos
+ // on failure: try the next expression by simply advancing pc
+ int char_group_end = 0;
int length, start, end;
while (true) {
- //Console.WriteLine ("evaluating: {0} at pc: {1}, strpos: {2}", (RxOp)program [pc], pc, strpos);
+ if (trace_rx) {
+ Console.WriteLine ("evaluating: {0} at pc: {1}, strpos: {2}, cge: {3}", (RxOp)program [pc], pc, strpos, char_group_end);
+ //Console.WriteLine ("deep: " + (deep == null ? 0 : deep.GetHashCode ()) + " repeat: " + (this.repeat == null ? 0 : this.repeat.GetHashCode ()));
+ }
switch ((RxOp)program [pc]) {
case RxOp.True:
+ if (char_group_end != 0) {
+ pc = char_group_end;
+ char_group_end = 0;
+ continue;
+ }
strpos_result = strpos;
return true;
case RxOp.False:
continue;
}
return false;
+ case RxOp.WordBoundary:
+ if (string_end == 0)
+ return false;
+ if (strpos == 0) {
+ if (IsWordChar (str [strpos])) {
+ pc++;
+ continue;
+ }
+ } else if (strpos == string_end) {
+ if (IsWordChar (str [strpos - 1])) {
+ pc++;
+ continue;
+ }
+ } else {
+ if (IsWordChar (str [strpos]) != IsWordChar (str [strpos - 1])) {
+ pc++;
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoWordBoundary:
+ if (string_end == 0)
+ return false;
+ if (strpos == 0) {
+ if (!IsWordChar (str [strpos])) {
+ pc++;
+ continue;
+ }
+ } else if (strpos == string_end) {
+ if (!IsWordChar (str [strpos - 1])) {
+ pc++;
+ continue;
+ }
+ } else {
+ if (IsWordChar (str [strpos]) == IsWordChar (str [strpos - 1])) {
+ pc++;
+ continue;
+ }
+ }
+ return false;
case RxOp.Anchor:
- // FIXME: test anchor
- length = program [pc + 3] | (program [pc + 4] << 8);
- pc += program [pc + 1] | (program [pc + 2] << 8);
- while (strpos < string_end) {
+ int skip = program [pc + 1] | ((int)program [pc + 2] << 8);
+ int anch_offset = program [pc + 3] | ((int)program [pc + 4] << 8);
+
+ /*
+ * In the general case, we have to evaluate the bytecode
+ * starting at pc + skip, however the optimizer emits some
+ * special cases, whose bytecode begins at pc + 5.
+ */
+ int anch_pc = pc + 5;
+ RxOp anch_op = (RxOp)(program[anch_pc] & 0x00ff);
+
+ bool spec_anch = false;
+
+ // FIXME: Add more special cases from interpreter.cs
+ if (anch_op == RxOp.String || anch_op == RxOp.StringIgnoreCase) {
+ if (pc + skip == anch_pc + 2 + program [anch_pc + 1] + 1) {
+ // Anchor
+ // String
+ // True
+ spec_anch = true;
+ if (trace_rx)
+ Console.WriteLine (" string anchor at {0}, offset {1}", anch_pc, anch_offset);
+ }
+ }
+
+ pc += skip;
+
+ if ((RxOp)program [pc] == RxOp.StartOfString) {
+ if (strpos == 0) {
+ int res = strpos;
+ if (groups.Length > 1) {
+ ResetGroups ();
+ marks [groups [0]].Start = strpos;
+ }
+ if (EvalByteCode (pc + 1, strpos, ref res)) {
+ marks [groups [0]].Start = strpos;
+ if (groups.Length > 1)
+ marks [groups [0]].End = res;
+ strpos_result = res;
+ return true;
+ }
+ }
+ return false;
+ }
+
+ // it's important to test also the end of the string
+ // position for things like: "" =~ /$/
+ end = string_end + 1;
+ while (strpos < end) {
+ if (spec_anch) {
+ if (anch_op == RxOp.String || anch_op == RxOp.StringIgnoreCase) {
+ /*
+ * This means the match must contain a given
+ * string at a constant position, so we can skip
+ * forward until the string matches. This is a win if
+ * the rest of the regex
+ * has a complex positive lookbehind for example.
+ */
+ int tmp_res = strpos;
+ if (!EvalByteCode (anch_pc, strpos + anch_offset, ref tmp_res)) {
+ strpos ++;
+ continue;
+ }
+ }
+ }
int res = strpos;
if (groups.Length > 1) {
ResetGroups ();
marks [groups [0]].Start = strpos;
}
if (EvalByteCode (pc, strpos, ref res)) {
- match_start = strpos;
+// match_start = strpos;
+ marks [groups [0]].Start = strpos;
if (groups.Length > 1)
marks [groups [0]].End = res;
strpos_result = res;
strpos++;
}
return false;
+ case RxOp.AnchorReverse:
+ length = program [pc + 3] | ((int)program [pc + 4] << 8);
+ pc += program [pc + 1] | ((int)program [pc + 2] << 8);
+ // it's important to test also the end of the string
+ // position for things like: "" =~ /$/
+ end = 0;
+ while (strpos >= 0) {
+ int res = strpos;
+ if (groups.Length > 1) {
+ ResetGroups ();
+ marks [groups [0]].Start = strpos;
+ }
+ if (EvalByteCode (pc, strpos, ref res)) {
+// match_start = strpos;
+ marks [groups [0]].Start = strpos;
+ if (groups.Length > 1)
+ marks [groups [0]].End = res;
+ strpos_result = res;
+ return true;
+ }
+ strpos--;
+ }
+ return false;
case RxOp.Reference:
- length = GetLastDefined (program [pc + 1] | (program [pc + 2] << 8));
+ length = GetLastDefined (program [pc + 1] | ((int)program [pc + 2] << 8));
if (length < 0)
return false;
start = marks [length].Index;
}
pc += 3;
continue;
+ case RxOp.ReferenceIgnoreCase:
+ length = GetLastDefined (program [pc + 1] | ((int)program [pc + 2] << 8));
+ if (length < 0)
+ return false;
+ start = marks [length].Index;
+ length = marks [length].Length;
+ if (strpos + length > string_end)
+ return false;
+ for (end = start + length; start < end; ++start) {
+ if (str [strpos] != str [start] && Char.ToLower (str [strpos]) != Char.ToLower (str [start]))
+ return false;
+ strpos++;
+ }
+ pc += 3;
+ continue;
+ case RxOp.ReferenceReverse: {
+ length = GetLastDefined (program [pc + 1] | ((int)program [pc + 2] << 8));
+ if (length < 0)
+ return false;
+ start = marks [length].Index;
+ length = marks [length].Length;
+ if (strpos - length < 0)
+ return false;
+ int p = strpos - length;
+ for (end = start + length; start < end; ++start, ++p) {
+ if (str [p] != str [start])
+ return false;
+ }
+ strpos -= length;
+ pc += 3;
+ continue;
+ }
case RxOp.IfDefined:
- if (GetLastDefined (program [pc + 3] | (program [pc + 4] << 8)) < 0)
+ if (GetLastDefined (program [pc + 3] | ((int)program [pc + 4] << 8)) >= 0)
pc += 5;
else
- pc += program [pc + 1] | (program [pc + 2] << 8);
+ pc += program [pc + 1] | ((int)program [pc + 2] << 8);
+ continue;
+ case RxOp.SubExpression: {
+ int res = 0;
+ if (EvalByteCode (pc + 3, strpos, ref res)) {
+ pc += program [pc + 1] | ((int)program [pc + 2] << 8);
+ strpos = res;
+ continue;
+ }
+ return false;
+ }
+ case RxOp.Test: {
+ int res = 0;
+ // FIXME: checkpoint
+ if (EvalByteCode (pc + 5, strpos, ref res)) {
+ pc += program [pc + 1] | ((int)program [pc + 2] << 8);
+ } else {
+ pc += program [pc + 3] | ((int)program [pc + 4] << 8);
+ }
continue;
+ }
case RxOp.OpenGroup:
- Open (program [pc + 1] | (program [pc + 2] << 8), strpos);
+ Open (program [pc + 1] | ((int)program [pc + 2] << 8), strpos);
pc += 3;
continue;
case RxOp.CloseGroup:
- Close (program [pc + 1] | (program [pc + 2] << 8), strpos);
+ Close (program [pc + 1] | ((int)program [pc + 2] << 8), strpos);
pc += 3;
continue;
+ case RxOp.BalanceStart: {
+ int res = 0;
+
+ if (!EvalByteCode (pc + 8, strpos, ref res))
+ goto Fail;
+
+ int gid = program [pc + 1] | ((int)program [pc + 2] << 8);
+ int balance_gid = program [pc + 3] | ((int)program [pc + 4] << 8);
+ bool capture = program [pc + 5] > 0;
+ if (!Balance (gid, balance_gid, capture, strpos))
+ goto Fail;
+
+ strpos = res;
+ pc += program[pc + 6] | ((int)program [pc + 7] << 8);
+ break;
+ }
+ case RxOp.Balance: {
+ goto Pass;
+ }
+
case RxOp.Jump:
- pc += program [pc + 1] | (program [pc + 2] << 8);
+ pc += program [pc + 1] | ((int)program [pc + 2] << 8);
+ continue;
+ case RxOp.TestCharGroup:
+ char_group_end = pc + (program [pc + 1] | ((int)program [pc + 2] << 8));
+ pc += 3;
continue;
case RxOp.String:
start = pc + 2;
}
pc = end;
continue;
- case RxOp.UnicodeString:
+ case RxOp.StringReverse: {
+ start = pc + 2;
+ length = program [pc + 1];
+ if (strpos < length)
+ return false;
+ int p = strpos - length;
+ end = start + length;
+ for (; start < end; ++start, ++p) {
+ if (str [p] != program [start])
+ return false;
+ }
+ strpos -= length;
+ pc = end;
+ continue;
+ }
+ case RxOp.StringIgnoreCaseReverse: {
+ start = pc + 2;
+ length = program [pc + 1];
+ if (strpos < length)
+ return false;
+ int p = strpos - length;
+ end = start + length;
+ for (; start < end; ++start, ++p) {
+ if (str [p] != program [start] && Char.ToLower (str [p]) != program [start])
+ return false;
+ }
+ strpos -= length;
+ pc = end;
+ continue;
+ }
+ case RxOp.UnicodeString: {
start = pc + 3;
- length = program [pc + 1] | (program [pc + 2] << 8);
+ length = program [pc + 1] | ((int)program [pc + 2] << 8);
if (strpos + length > string_end)
return false;
end = start + length * 2;
for (; start < end; start += 2) {
- int c = program [start] | (program [start + 1] << 8);
+ int c = program [start] | ((int)program [start + 1] << 8);
if (str [strpos] != c)
return false;
strpos++;
}
pc = end;
continue;
- case RxOp.UnicodeStringIgnoreCase:
+ }
+ case RxOp.UnicodeStringIgnoreCase: {
start = pc + 3;
- length = program [pc + 1] | (program [pc + 2] << 8);
+ length = program [pc + 1] | ((int)program [pc + 2] << 8);
if (strpos + length > string_end)
return false;
end = start + length * 2;
for (; start < end; start += 2) {
- int c = program [start] | (program [start + 1] << 8);
+ int c = program [start] | ((int)program [start + 1] << 8);
if (str [strpos] != c && Char.ToLower (str [strpos]) != c)
return false;
strpos++;
}
pc = end;
continue;
- case RxOp.Char:
- if (strpos < string_end && (str [strpos] == program [pc + 1])) {
- strpos++;
- pc += 2;
- continue;
+ }
+ case RxOp.UnicodeStringReverse: {
+ start = pc + 3;
+ length = program [pc + 1] | ((int)program [pc + 2] << 8);
+ if (strpos < length)
+ return false;
+ int p = strpos - length;
+ end = start + length * 2;
+ for (; start < end; start += 2, p += 2) {
+ int c = program [start] | ((int)program [start + 1] << 8);
+ if (str [p] != c)
+ return false;
}
- return false;
- case RxOp.NoChar:
- if (strpos < string_end && (str [strpos] != program [pc + 1])) {
- strpos++;
- pc += 2;
- continue;
+ strpos -= length;
+ pc = end;
+ continue;
+ }
+ case RxOp.UnicodeStringIgnoreCaseReverse: {
+ start = pc + 3;
+ length = program [pc + 1] | ((int)program [pc + 2] << 8);
+ if (strpos < length)
+ return false;
+ int p = strpos - length;
+ end = start + length * 2;
+ for (; start < end; start += 2, p += 2) {
+ int c = program [start] | ((int)program [start + 1] << 8);
+ if (str [p] != c && Char.ToLower (str [p]) != c)
+ return false;
}
- return false;
- case RxOp.CharIgnoreCase:
- if (strpos < string_end && (Char.ToLower (str [strpos]) == program [pc + 1])) {
- strpos++;
- pc += 2;
- continue;
+ strpos -= length;
+ pc = end;
+ continue;
+ }
+
+ /*
+ * The opcodes below are basically specialized versions of one
+ * generic opcode, which has three parameters:
+ * - reverse (Reverse), revert (No), ignore-case (IgnoreCase)
+ * Thus each opcode has 8 variants.
+ * FIXME: Maybe move all unusual variations
+ * (Reverse+IgnoreCase+Unicode) into a generic GenericChar opcode
+ * like in the old interpreter.
+ * FIXME: Move all the Reverse opcodes to a separate method.
+ */
+#if FALSE
+ if (!reverse) {
+ if (strpos < string_end && (COND (str [strpos]))) {
+ if (!revert) {
+ strpos ++;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += ins_len;
+ continue;
+ } else {
+ /*
+ * If we are inside a char group, the cases are ANDed
+ * together, so we have to continue checking the
+ * other cases, and we need to increase strpos after
+ * the final check.
+ * The char group is termined by a True, hence the
+ * + 1 below.
+ * FIXME: Optimize this.
+ */
+ pc += ins_len;
+ if (char_group_end == 0 || (pc + 1 == char_group_end))
+ strpos ++;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ continue;
+ }
+ } else {
+ if (!revert) {
+ if (char_group_end == 0)
+ return false;
+ pc += ins_len;
+ continue;
+ } else {
+ /* Fail both inside and outside a char group */
+ return false;
+ }
+ }
+ } else {
+ // Same as above, but use:
+ // - strpos > 0 instead of strpos < string_len
+ // - COND (str [strpos - 1]) instead of COND (str [strpos])
+ // - strpos -- instead of strpos ++
}
- return false;
- case RxOp.NoCharIgnoreCase:
- if (strpos < string_end && (Char.ToLower (str [strpos]) != program [pc + 1])) {
- strpos++;
- pc += 2;
- continue;
+#endif
+ // GENERATED BY gen-interp.cs, DO NOT MODIFY
+
+ /* Char */
+
+ case RxOp.Char:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ if (((c == program [pc + 1]))) {
+ strpos ++;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 2;
+ continue;
+ }
}
- return false;
+ if (char_group_end == 0)
+ return false;
+ pc += 2;
+ continue;
+
+ /* Range */
+
case RxOp.Range:
if (strpos < string_end) {
- int c = str [strpos];
- if (c >= program [pc + 1] && c <= program [pc + 2]) {
- strpos++;
+ char c = str [strpos];
+ if (((c >= program [pc + 1] && c <= program [pc + 2]))) {
+ strpos ++;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
pc += 3;
continue;
}
}
- return false;
- case RxOp.NoRange:
+ if (char_group_end == 0)
+ return false;
+ pc += 3;
+ continue;
+
+ /* UnicodeRange */
+
+ case RxOp.UnicodeRange:
if (strpos < string_end) {
- int c = str [strpos];
- if (c >= program [pc + 1] && c <= program [pc + 2])
- return false;
- strpos++;
- pc += 3;
- continue;
+ char c = str [strpos];
+ if (((c >= (program [pc + 1] | ((int)program [pc + 2] << 8))) && (c <= (program [pc + 3] | ((int)program [pc + 4] << 8))))) {
+ strpos ++;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 5;
+ continue;
+ }
}
- return false;
- case RxOp.RangeIgnoreCase:
+ if (char_group_end == 0)
+ return false;
+ pc += 5;
+ continue;
+
+ /* UnicodeChar */
+
+ case RxOp.UnicodeChar:
if (strpos < string_end) {
- int c = Char.ToLower (str [strpos]);
- if (c >= program [pc + 1] && c <= program [pc + 2]) {
- strpos++;
+ char c = str [strpos];
+ if (((c == (program [pc + 1] | ((int)program [pc + 2] << 8))))) {
+ strpos ++;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
pc += 3;
continue;
}
}
- return false;
- case RxOp.NoRangeIgnoreCase:
+ if (char_group_end == 0)
+ return false;
+ pc += 3;
+ continue;
+
+ /* CategoryAny */
+
+ case RxOp.CategoryAny:
if (strpos < string_end) {
- int c = Char.ToLower (str [strpos]);
- if (c >= program [pc + 1] && c <= program [pc + 2])
- return false;
- strpos++;
- pc += 3;
- continue;
+ char c = str [strpos];
+ if (((c != '\n'))) {
+ strpos ++;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 1;
+ continue;
+ }
}
- return false;
- case RxOp.Bitmap:
+ if (char_group_end == 0)
+ return false;
+ pc += 1;
+ continue;
+
+ /* CategoryAnySingleline */
+
+ case RxOp.CategoryAnySingleline:
if (strpos < string_end) {
- int c = str [strpos];
- c -= program [pc + 1];
- length = program [pc + 2];
- if (c < 0 || c >= (length << 3))
- return false;
- pc += 3;
- if ((program [pc + (c >> 3)] & (1 << (c & 0x7))) != 0) {
- strpos++;
- pc += length;
+ char c = str [strpos];
+ if ((true)) {
+ strpos ++;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 1;
continue;
}
}
- return false;
- case RxOp.BitmapIgnoreCase:
+ if (char_group_end == 0)
+ return false;
+ pc += 1;
+ continue;
+
+ /* CategoryWord */
+
+ case RxOp.CategoryWord:
if (strpos < string_end) {
- int c = Char.ToLower (str [strpos]);
- c -= program [pc + 1];
- length = program [pc + 2];
- if (c < 0 || c >= (length << 3))
- return false;
- pc += 3;
- if ((program [pc + (c >> 3)] & (1 << (c & 0x7))) != 0) {
- strpos++;
- pc += length;
+ char c = str [strpos];
+ if (((Char.IsLetterOrDigit (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation))) {
+ strpos ++;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 1;
continue;
}
}
- return false;
- case RxOp.UnicodeChar:
- if (strpos < string_end && (str [strpos] == (program [pc + 1] | (program [pc + 2] << 8)))) {
- strpos++;
- pc += 3;
- continue;
- }
- return false;
+ if (char_group_end == 0)
+ return false;
+ pc += 1;
+ continue;
+
+ /* CategoryDigit */
+
+ case RxOp.CategoryDigit:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ if (((Char.IsDigit (c)))) {
+ strpos ++;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 1;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 1;
+ continue;
+
+ /* CategoryWhiteSpace */
+
+ case RxOp.CategoryWhiteSpace:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ if (((Char.IsWhiteSpace (c)))) {
+ strpos ++;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 1;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 1;
+ continue;
+
+ /* CategoryEcmaWord */
+
+ case RxOp.CategoryEcmaWord:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ if ((('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' || c == '_'))) {
+ strpos ++;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 1;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 1;
+ continue;
+
+ /* CategoryEcmaWhiteSpace */
+
+ case RxOp.CategoryEcmaWhiteSpace:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ if (((c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'))) {
+ strpos ++;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 1;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 1;
+ continue;
+
+ /* CategoryUnicodeSpecials */
+
+ case RxOp.CategoryUnicodeSpecials:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ if ((('\uFEFF' <= c && c <= '\uFEFF' || '\uFFF0' <= c && c <= '\uFFFD'))) {
+ strpos ++;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 1;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 1;
+ continue;
+
+ /* CategoryUnicode */
+
+ case RxOp.CategoryUnicode:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ if (((Char.GetUnicodeCategory (c) == (UnicodeCategory)program [pc + 1]))) {
+ strpos ++;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 2;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 2;
+ continue;
+
+ /* CategoryGeneral */
+
+ case RxOp.CategoryGeneral:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ if (((CategoryUtils.IsCategory ((Category)program [pc + 1], c)))) {
+ strpos ++;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 2;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 2;
+ continue;
+
+ /* Bitmap */
+
+ case RxOp.Bitmap:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ int c2 = (int)c; c2 -= program [pc + 1]; length = program [pc + 2];
+ if (((c2 >= 0 && c2 < (length << 3) && (program [pc + 3 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
+ strpos ++;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 3 + program [pc + 2];
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 3 + program [pc + 2];
+ continue;
+
+ /* UnicodeBitmap */
+
+ case RxOp.UnicodeBitmap:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ int c2 = (int)c; c2 -= (program [pc + 1] | ((int)program [pc + 2] << 8)); length = (program [pc + 3] | ((int)program [pc + 4] << 8));
+ if (((c2 >= 0 && c2 < (length << 3) && (program [pc + 5 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
+ strpos ++;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 5 + (program [pc + 3] | ((int)program [pc + 4] << 8));
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 5 + (program [pc + 3] | ((int)program [pc + 4] << 8));
+ continue;
+ case RxOp.CharIgnoreCase:
+ if (strpos < string_end) {
+ char c = Char.ToLower (str [strpos]);
+ if (((c == program [pc + 1]))) {
+ strpos ++;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 2;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 2;
+ continue;
+ case RxOp.RangeIgnoreCase:
+ if (strpos < string_end) {
+ char c = Char.ToLower (str [strpos]);
+ if (((c >= program [pc + 1] && c <= program [pc + 2]))) {
+ strpos ++;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 3;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 3;
+ continue;
+ case RxOp.UnicodeRangeIgnoreCase:
+ if (strpos < string_end) {
+ char c = Char.ToLower (str [strpos]);
+ if (((c >= (program [pc + 1] | ((int)program [pc + 2] << 8))) && (c <= (program [pc + 3] | ((int)program [pc + 4] << 8))))) {
+ strpos ++;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 5;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 5;
+ continue;
+ case RxOp.UnicodeCharIgnoreCase:
+ if (strpos < string_end) {
+ char c = Char.ToLower (str [strpos]);
+ if (((c == (program [pc + 1] | ((int)program [pc + 2] << 8))))) {
+ strpos ++;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 3;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 3;
+ continue;
+ case RxOp.BitmapIgnoreCase:
+ if (strpos < string_end) {
+ char c = Char.ToLower (str [strpos]);
+ int c2 = (int)c; c2 -= program [pc + 1]; length = program [pc + 2];
+ if (((c2 >= 0 && c2 < (length << 3) && (program [pc + 3 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
+ strpos ++;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 3 + program [pc + 2];
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 3 + program [pc + 2];
+ continue;
+ case RxOp.UnicodeBitmapIgnoreCase:
+ if (strpos < string_end) {
+ char c = Char.ToLower (str [strpos]);
+ int c2 = (int)c; c2 -= (program [pc + 1] | ((int)program [pc + 2] << 8)); length = (program [pc + 3] | ((int)program [pc + 4] << 8));
+ if (((c2 >= 0 && c2 < (length << 3) && (program [pc + 5 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
+ strpos ++;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 5 + (program [pc + 3] | ((int)program [pc + 4] << 8));
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 5 + (program [pc + 3] | ((int)program [pc + 4] << 8));
+ continue;
+ case RxOp.NoChar:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ if (!((c == program [pc + 1]))) {
+ pc += 2;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos ++;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoRange:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ if (!((c >= program [pc + 1] && c <= program [pc + 2]))) {
+ pc += 3;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos ++;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoUnicodeRange:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ if (!((c >= (program [pc + 1] | ((int)program [pc + 2] << 8))) && (c <= (program [pc + 3] | ((int)program [pc + 4] << 8))))) {
+ pc += 5;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos ++;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
case RxOp.NoUnicodeChar:
- if (strpos < string_end && (str [strpos] != (program [pc + 1] | (program [pc + 2] << 8)))) {
- strpos++;
- pc += 3;
- continue;
+ if (strpos < string_end) {
+ char c = str [strpos];
+ if (!((c == (program [pc + 1] | ((int)program [pc + 2] << 8))))) {
+ pc += 3;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos ++;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoCategoryAny:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ if (!((c != '\n'))) {
+ pc += 1;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos ++;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoCategoryAnySingleline:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ if (!(true)) {
+ pc += 1;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos ++;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoCategoryWord:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ if (!((Char.IsLetterOrDigit (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation))) {
+ pc += 1;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos ++;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoCategoryDigit:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ if (!((Char.IsDigit (c)))) {
+ pc += 1;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos ++;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoCategoryWhiteSpace:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ if (!((Char.IsWhiteSpace (c)))) {
+ pc += 1;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos ++;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoCategoryEcmaWord:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ if (!(('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' || c == '_'))) {
+ pc += 1;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos ++;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoCategoryEcmaWhiteSpace:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ if (!((c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'))) {
+ pc += 1;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos ++;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoCategoryUnicodeSpecials:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ if (!(('\uFEFF' <= c && c <= '\uFEFF' || '\uFFF0' <= c && c <= '\uFFFD'))) {
+ pc += 1;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos ++;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoCategoryUnicode:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ if (!((Char.GetUnicodeCategory (c) == (UnicodeCategory)program [pc + 1]))) {
+ pc += 2;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos ++;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoCategoryGeneral:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ if (!((CategoryUtils.IsCategory ((Category)program [pc + 1], c)))) {
+ pc += 2;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos ++;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoBitmap:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ int c2 = (int)c; c2 -= program [pc + 1]; length = program [pc + 2];
+ if (!((c2 >= 0 && c2 < (length << 3) && (program [pc + 3 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
+ pc += 3 + program [pc + 2];
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos ++;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoUnicodeBitmap:
+ if (strpos < string_end) {
+ char c = str [strpos];
+ int c2 = (int)c; c2 -= (program [pc + 1] | ((int)program [pc + 2] << 8)); length = (program [pc + 3] | ((int)program [pc + 4] << 8));
+ if (!((c2 >= 0 && c2 < (length << 3) && (program [pc + 5 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
+ pc += 5 + (program [pc + 3] | ((int)program [pc + 4] << 8));
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos ++;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoCharIgnoreCase:
+ if (strpos < string_end) {
+ char c = Char.ToLower (str [strpos]);
+ if (!((c == program [pc + 1]))) {
+ pc += 2;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos ++;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoRangeIgnoreCase:
+ if (strpos < string_end) {
+ char c = Char.ToLower (str [strpos]);
+ if (!((c >= program [pc + 1] && c <= program [pc + 2]))) {
+ pc += 3;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos ++;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoUnicodeRangeIgnoreCase:
+ if (strpos < string_end) {
+ char c = Char.ToLower (str [strpos]);
+ if (!((c >= (program [pc + 1] | ((int)program [pc + 2] << 8))) && (c <= (program [pc + 3] | ((int)program [pc + 4] << 8))))) {
+ pc += 5;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos ++;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoUnicodeCharIgnoreCase:
+ if (strpos < string_end) {
+ char c = Char.ToLower (str [strpos]);
+ if (!((c == (program [pc + 1] | ((int)program [pc + 2] << 8))))) {
+ pc += 3;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos ++;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoBitmapIgnoreCase:
+ if (strpos < string_end) {
+ char c = Char.ToLower (str [strpos]);
+ int c2 = (int)c; c2 -= program [pc + 1]; length = program [pc + 2];
+ if (!((c2 >= 0 && c2 < (length << 3) && (program [pc + 3 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
+ pc += 3 + program [pc + 2];
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos ++;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoUnicodeBitmapIgnoreCase:
+ if (strpos < string_end) {
+ char c = Char.ToLower (str [strpos]);
+ int c2 = (int)c; c2 -= (program [pc + 1] | ((int)program [pc + 2] << 8)); length = (program [pc + 3] | ((int)program [pc + 4] << 8));
+ if (!((c2 >= 0 && c2 < (length << 3) && (program [pc + 5 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
+ pc += 5 + (program [pc + 3] | ((int)program [pc + 4] << 8));
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos ++;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.CharReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (((c == program [pc + 1]))) {
+ strpos --;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 2;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 2;
+ continue;
+ case RxOp.RangeReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (((c >= program [pc + 1] && c <= program [pc + 2]))) {
+ strpos --;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 3;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 3;
+ continue;
+ case RxOp.UnicodeRangeReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (((c >= (program [pc + 1] | ((int)program [pc + 2] << 8))) && (c <= (program [pc + 3] | ((int)program [pc + 4] << 8))))) {
+ strpos --;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 5;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 5;
+ continue;
+ case RxOp.UnicodeCharReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (((c == (program [pc + 1] | ((int)program [pc + 2] << 8))))) {
+ strpos --;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 3;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 3;
+ continue;
+ case RxOp.CategoryAnyReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (((c != '\n'))) {
+ strpos --;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 1;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 1;
+ continue;
+ case RxOp.CategoryAnySinglelineReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if ((true)) {
+ strpos --;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 1;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 1;
+ continue;
+ case RxOp.CategoryWordReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (((Char.IsLetterOrDigit (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation))) {
+ strpos --;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 1;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 1;
+ continue;
+ case RxOp.CategoryDigitReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (((Char.IsDigit (c)))) {
+ strpos --;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 1;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 1;
+ continue;
+ case RxOp.CategoryWhiteSpaceReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (((Char.IsWhiteSpace (c)))) {
+ strpos --;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 1;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 1;
+ continue;
+ case RxOp.CategoryEcmaWordReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if ((('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' || c == '_'))) {
+ strpos --;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 1;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 1;
+ continue;
+ case RxOp.CategoryEcmaWhiteSpaceReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (((c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'))) {
+ strpos --;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 1;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 1;
+ continue;
+ case RxOp.CategoryUnicodeSpecialsReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if ((('\uFEFF' <= c && c <= '\uFEFF' || '\uFFF0' <= c && c <= '\uFFFD'))) {
+ strpos --;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 1;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 1;
+ continue;
+ case RxOp.CategoryUnicodeReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (((Char.GetUnicodeCategory (c) == (UnicodeCategory)program [pc + 1]))) {
+ strpos --;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 2;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 2;
+ continue;
+ case RxOp.CategoryGeneralReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (((CategoryUtils.IsCategory ((Category)program [pc + 1], c)))) {
+ strpos --;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 2;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 2;
+ continue;
+ case RxOp.BitmapReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ int c2 = (int)c; c2 -= program [pc + 1]; length = program [pc + 2];
+ if (((c2 >= 0 && c2 < (length << 3) && (program [pc + 3 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
+ strpos --;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 3 + program [pc + 2];
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 3 + program [pc + 2];
+ continue;
+ case RxOp.UnicodeBitmapReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ int c2 = (int)c; c2 -= (program [pc + 1] | ((int)program [pc + 2] << 8)); length = (program [pc + 3] | ((int)program [pc + 4] << 8));
+ if (((c2 >= 0 && c2 < (length << 3) && (program [pc + 5 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
+ strpos --;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 5 + (program [pc + 3] | ((int)program [pc + 4] << 8));
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 5 + (program [pc + 3] | ((int)program [pc + 4] << 8));
+ continue;
+ case RxOp.CharIgnoreCaseReverse:
+ if (strpos > 0) {
+ char c = Char.ToLower (str [strpos - 1]);
+ if (((c == program [pc + 1]))) {
+ strpos --;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 2;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 2;
+ continue;
+ case RxOp.RangeIgnoreCaseReverse:
+ if (strpos > 0) {
+ char c = Char.ToLower (str [strpos - 1]);
+ if (((c >= program [pc + 1] && c <= program [pc + 2]))) {
+ strpos --;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 3;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 3;
+ continue;
+ case RxOp.UnicodeRangeIgnoreCaseReverse:
+ if (strpos > 0) {
+ char c = Char.ToLower (str [strpos - 1]);
+ if (((c >= (program [pc + 1] | ((int)program [pc + 2] << 8))) && (c <= (program [pc + 3] | ((int)program [pc + 4] << 8))))) {
+ strpos --;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 5;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 5;
+ continue;
+ case RxOp.UnicodeCharIgnoreCaseReverse:
+ if (strpos > 0) {
+ char c = Char.ToLower (str [strpos - 1]);
+ if (((c == (program [pc + 1] | ((int)program [pc + 2] << 8))))) {
+ strpos --;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 3;
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 3;
+ continue;
+ case RxOp.BitmapIgnoreCaseReverse:
+ if (strpos > 0) {
+ char c = Char.ToLower (str [strpos - 1]);
+ int c2 = (int)c; c2 -= program [pc + 1]; length = program [pc + 2];
+ if (((c2 >= 0 && c2 < (length << 3) && (program [pc + 3 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
+ strpos --;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 3 + program [pc + 2];
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 3 + program [pc + 2];
+ continue;
+ case RxOp.UnicodeBitmapIgnoreCaseReverse:
+ if (strpos > 0) {
+ char c = Char.ToLower (str [strpos - 1]);
+ int c2 = (int)c; c2 -= (program [pc + 1] | ((int)program [pc + 2] << 8)); length = (program [pc + 3] | ((int)program [pc + 4] << 8));
+ if (((c2 >= 0 && c2 < (length << 3) && (program [pc + 5 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
+ strpos --;
+ if (char_group_end != 0)
+ goto test_char_group_passed;
+ pc += 5 + (program [pc + 3] | ((int)program [pc + 4] << 8));
+ continue;
+ }
+ }
+ if (char_group_end == 0)
+ return false;
+ pc += 5 + (program [pc + 3] | ((int)program [pc + 4] << 8));
+ continue;
+ case RxOp.NoCharReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (!((c == program [pc + 1]))) {
+ pc += 2;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos --;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
}
return false;
- case RxOp.UnicodeCharIgnoreCase:
- if (strpos < string_end && (Char.ToLower (str [strpos]) == (program [pc + 1] | (program [pc + 2] << 8)))) {
- strpos++;
- pc += 3;
- continue;
+ case RxOp.NoRangeReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (!((c >= program [pc + 1] && c <= program [pc + 2]))) {
+ pc += 3;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos --;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
}
return false;
- case RxOp.NoUnicodeCharIgnoreCase:
- if (strpos < string_end && (Char.ToLower (str [strpos]) != (program [pc + 1] | (program [pc + 2] << 8)))) {
- strpos++;
- pc += 3;
- continue;
+ case RxOp.NoUnicodeRangeReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (!((c >= (program [pc + 1] | ((int)program [pc + 2] << 8))) && (c <= (program [pc + 3] | ((int)program [pc + 4] << 8))))) {
+ pc += 5;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos --;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
}
return false;
- case RxOp.CategoryAny:
- if (strpos < string_end && str [strpos] != '\n') {
- strpos++;
- pc++;
- continue;
+ case RxOp.NoUnicodeCharReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (!((c == (program [pc + 1] | ((int)program [pc + 2] << 8))))) {
+ pc += 3;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos --;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
}
return false;
- case RxOp.CategoryWord:
- if (strpos < string_end) {
- char c = str [strpos];
- if (Char.IsLetterOrDigit (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation) {
- strpos++;
- pc++;
+ case RxOp.NoCategoryAnyReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (!((c != '\n'))) {
+ pc += 1;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos --;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
continue;
}
}
return false;
- case RxOp.NoCategoryWord:
- if (strpos < string_end) {
- char c = str [strpos];
- if (!Char.IsLetterOrDigit (c) && Char.GetUnicodeCategory (c) != UnicodeCategory.ConnectorPunctuation) {
- strpos++;
- pc++;
+ case RxOp.NoCategoryAnySinglelineReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (!(true)) {
+ pc += 1;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos --;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
continue;
}
}
return false;
- case RxOp.CategoryDigit:
- if (strpos < string_end && Char.IsDigit (str [strpos])) {
- strpos++;
- pc++;
- continue;
+ case RxOp.NoCategoryWordReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (!((Char.IsLetterOrDigit (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation))) {
+ pc += 1;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos --;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
}
return false;
- case RxOp.CategoryWhiteSpace:
- if (strpos < string_end && Char.IsWhiteSpace (str [strpos])) {
- strpos++;
- pc++;
- continue;
+ case RxOp.NoCategoryDigitReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (!((Char.IsDigit (c)))) {
+ pc += 1;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos --;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
}
return false;
- case RxOp.CategoryEcmaWord:
- if (strpos < string_end) {
- int c = str [strpos];
- if ('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' || c == '_') {
- strpos++;
- pc++;
+ case RxOp.NoCategoryWhiteSpaceReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (!((Char.IsWhiteSpace (c)))) {
+ pc += 1;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos --;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
continue;
}
}
return false;
- case RxOp.CategoryEcmaDigit:
- if (strpos < string_end) {
- int c = str [strpos];
- if ('0' <= c && c <= '9') {
- strpos++;
- pc++;
+ case RxOp.NoCategoryEcmaWordReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (!(('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' || c == '_'))) {
+ pc += 1;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos --;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
continue;
}
}
return false;
- case RxOp.CategoryEcmaWhiteSpace:
- if (strpos < string_end) {
- int c = str [strpos];
- if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v') {
- strpos++;
- pc++;
+ case RxOp.NoCategoryEcmaWhiteSpaceReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (!((c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'))) {
+ pc += 1;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos --;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
continue;
}
}
return false;
- case RxOp.CategoryUnicodeSpecials:
- if (strpos < string_end) {
- int c = str [strpos];
- if ('\uFEFF' <= c && c <= '\uFEFF' || '\uFFF0' <= c && c <= '\uFFFD') {
- strpos++;
- pc++;
+ case RxOp.NoCategoryUnicodeSpecialsReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (!(('\uFEFF' <= c && c <= '\uFEFF' || '\uFFF0' <= c && c <= '\uFFFD'))) {
+ pc += 1;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos --;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
continue;
}
}
return false;
- case RxOp.CategoryUnicode:
- if (strpos < string_end && Char.GetUnicodeCategory (str [strpos]) == (UnicodeCategory)program [pc + 1]) {
- strpos++;
- pc += 2;
- continue;
+ case RxOp.NoCategoryUnicodeReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (!((Char.GetUnicodeCategory (c) == (UnicodeCategory)program [pc + 1]))) {
+ pc += 2;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos --;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoCategoryGeneralReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ if (!((CategoryUtils.IsCategory ((Category)program [pc + 1], c)))) {
+ pc += 2;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos --;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoBitmapReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ int c2 = (int)c; c2 -= program [pc + 1]; length = program [pc + 2];
+ if (!((c2 >= 0 && c2 < (length << 3) && (program [pc + 3 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
+ pc += 3 + program [pc + 2];
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos --;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoUnicodeBitmapReverse:
+ if (strpos > 0) {
+ char c = str [strpos - 1];
+ int c2 = (int)c; c2 -= (program [pc + 1] | ((int)program [pc + 2] << 8)); length = (program [pc + 3] | ((int)program [pc + 4] << 8));
+ if (!((c2 >= 0 && c2 < (length << 3) && (program [pc + 5 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
+ pc += 5 + (program [pc + 3] | ((int)program [pc + 4] << 8));
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos --;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoCharIgnoreCaseReverse:
+ if (strpos > 0) {
+ char c = Char.ToLower (str [strpos - 1]);
+ if (!((c == program [pc + 1]))) {
+ pc += 2;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos --;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoRangeIgnoreCaseReverse:
+ if (strpos > 0) {
+ char c = Char.ToLower (str [strpos - 1]);
+ if (!((c >= program [pc + 1] && c <= program [pc + 2]))) {
+ pc += 3;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos --;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoUnicodeRangeIgnoreCaseReverse:
+ if (strpos > 0) {
+ char c = Char.ToLower (str [strpos - 1]);
+ if (!((c >= (program [pc + 1] | ((int)program [pc + 2] << 8))) && (c <= (program [pc + 3] | ((int)program [pc + 4] << 8))))) {
+ pc += 5;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos --;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoUnicodeCharIgnoreCaseReverse:
+ if (strpos > 0) {
+ char c = Char.ToLower (str [strpos - 1]);
+ if (!((c == (program [pc + 1] | ((int)program [pc + 2] << 8))))) {
+ pc += 3;
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos --;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoBitmapIgnoreCaseReverse:
+ if (strpos > 0) {
+ char c = Char.ToLower (str [strpos - 1]);
+ int c2 = (int)c; c2 -= program [pc + 1]; length = program [pc + 2];
+ if (!((c2 >= 0 && c2 < (length << 3) && (program [pc + 3 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
+ pc += 3 + program [pc + 2];
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos --;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
+ }
+ return false;
+ case RxOp.NoUnicodeBitmapIgnoreCaseReverse:
+ if (strpos > 0) {
+ char c = Char.ToLower (str [strpos - 1]);
+ int c2 = (int)c; c2 -= (program [pc + 1] | ((int)program [pc + 2] << 8)); length = (program [pc + 3] | ((int)program [pc + 4] << 8));
+ if (!((c2 >= 0 && c2 < (length << 3) && (program [pc + 5 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0))) {
+ pc += 5 + (program [pc + 3] | ((int)program [pc + 4] << 8));
+ if (char_group_end == 0 || (pc + 1 == char_group_end)) {
+ strpos --;
+ if (pc + 1 == char_group_end)
+ goto test_char_group_passed;
+ }
+ continue;
+ }
}
return false;
+
+ // END OF GENERATED CODE
+
case RxOp.Branch: {
int res = 0;
if (EvalByteCode (pc + 3, strpos, ref res)) {
strpos_result = res;
return true;
}
- //Console.WriteLine ("branch offset: {0}", program [pc + 1] | (program [pc + 2] << 8));
- pc += program [pc + 1] | (program [pc + 2] << 8);
+ //Console.WriteLine ("branch offset: {0}", program [pc + 1] | ((int)program [pc + 2] << 8));
+ pc += program [pc + 1] | ((int)program [pc + 2] << 8);
continue;
}
case RxOp.Repeat:
case RxOp.RepeatLazy: {
+ /*
+ * Repetation is modelled by two opcodes: Repeat and Until which
+ * contain the the qualified regex between them, i.e.:
+ * Repeat, <bytecode for the inner regex>, Until, <Tail expr>
+ * It is processed as follows:
+ * Repeat, [Until, <inner expr>]*, <Tail>
+ * This means that nested quantifiers are processed a bit
+ * strangely: when the inner quantifier fails to match, its
+ * tail is processed which includes the outer Until.
+ *
+ * This code is from the old interpreter.cs.
+ *
+ * FIXME: Rethink this.
+ */
+
+ int res = 0;
+
+ this.repeat = new RepeatContext (
+ this.repeat, // previous context
+ ReadInt (program, pc + 3), // minimum
+ ReadInt (program, pc + 7), // maximum
+ (RxOp)program [pc] == RxOp.RepeatLazy, // lazy
+ pc + 11 // subexpression
+ );
+
+ int until = pc + (program [pc + 1] | ((int)program [pc + 2] << 8));
+ if (!EvalByteCode (until, strpos, ref res)) {
+ this.repeat = this.repeat.Previous;
+ return false;
+ }
+
+ strpos = res;
+ strpos_result = strpos;
+ return true;
+ }
+ case RxOp.Until: {
+ RepeatContext current = this.repeat;
+ int res = 0;
+
+ //
+ // Can we avoid recursion?
+ //
+ // Backtracking can be forced in nested quantifiers from the tail of this quantifier.
+ // Thus, we cannot, in general, use a simple loop on repeat.Expression to handle
+ // quantifiers.
+ //
+ // If 'deep' was unmolested, that implies that there was no nested quantifiers.
+ // Thus, we can safely avoid recursion.
+ //
+ if (deep == current)
+ goto Pass;
+
+ start = current.Start;
+ int start_count = current.Count;
+
+ // First match at least 'start' items without backtracking
+ while (!current.IsMinimum) {
+ ++ current.Count;
+ current.Start = strpos;
+ deep = current;
+ if (!EvalByteCode (current.Expression, strpos, ref res)) {
+ current.Start = start;
+ current.Count = start_count;
+ goto Fail;
+ }
+ strpos = res;
+ if (deep != current) // recursive mode
+ goto Pass;
+ }
+
+ if (strpos == current.Start) {
+ // degenerate match ... match tail or fail
+ this.repeat = current.Previous;
+ deep = null;
+ if (EvalByteCode (pc + 1, strpos, ref res)) {
+ strpos = res;
+ goto Pass;
+ }
+ this.repeat = current;
+ goto Fail;
+ }
+
+ if (current.IsLazy) {
+ for (;;) {
+ // match tail first ...
+ this.repeat = current.Previous;
+ deep = null;
+ int cp = Checkpoint ();
+ if (EvalByteCode (pc + 1, strpos, ref res)) {
+ strpos = res;
+ goto Pass;
+ }
+
+ Backtrack (cp);
+
+ // ... then match more
+ this.repeat = current;
+ if (current.IsMaximum)
+ goto Fail;
+ ++ current.Count;
+ current.Start = strpos;
+ deep = current;
+ if (!EvalByteCode (current.Expression, strpos, ref res)) {
+ current.Start = start;
+ current.Count = start_count;
+ goto Fail;
+ }
+ strpos = res;
+ if (deep != current) // recursive mode
+ goto Pass;
+ // Degenerate match: ptr has not moved since the last (failed) tail match.
+ // So, next and subsequent tail matches will fail.
+ if (strpos == current.Start)
+ goto Fail;
+ }
+ } else {
+ int stack_size = stack.Count;
+
+ // match greedily as much as possible
+ while (!current.IsMaximum) {
+ int cp = Checkpoint ();
+ int old_ptr = strpos;
+ int old_start = current.Start;
+
+ ++ current.Count;
+ if (trace_rx)
+ Console.WriteLine ("recurse with count {0}.", current.Count);
+ current.Start = strpos;
+ deep = current;
+ if (!EvalByteCode (current.Expression, strpos, ref res)) {
+ -- current.Count;
+ current.Start = old_start;
+ Backtrack (cp);
+ break;
+ }
+ strpos = res;
+ if (deep != current) {
+ // recursive mode: no more backtracking, truncate the stack
+ stack.Count = stack_size;
+ goto Pass;
+ }
+ stack.Push (cp);
+ stack.Push (old_ptr);
+
+ // Degenerate match: no point going on
+ if (strpos == current.Start)
+ break;
+ }
+
+ if (trace_rx)
+ Console.WriteLine ("matching tail: {0} pc={1}", strpos, pc + 1);
+ // then, match the tail, backtracking as necessary.
+ this.repeat = current.Previous;
+ for (;;) {
+ deep = null;
+ if (EvalByteCode (pc + 1, strpos, ref res)) {
+ strpos = res;
+ stack.Count = stack_size;
+ goto Pass;
+ }
+ if (stack.Count == stack_size) {
+ this.repeat = current;
+ goto Fail;
+ }
+
+ --current.Count;
+ strpos = stack.Pop ();
+ Backtrack (stack.Pop ());
+ if (trace_rx)
+ Console.WriteLine ("backtracking to {0} expr={1} pc={2}", strpos, current.Expression, pc);
+ }
+ }
+ }
+
+ case RxOp.FastRepeat:
+ case RxOp.FastRepeatLazy: {
+ /*
+ * A FastRepeat is a simplified version of Repeat which does
+ * not contain another repeat inside, so backtracking is
+ * easier.
+ */
+ bool lazy = program [pc] == (byte)RxOp.FastRepeatLazy;
int res = 0;
- start = ReadInt (program, pc + 3);
- end = ReadInt (program, pc + 7);
- //Console.WriteLine ("min: {0}, max: {1}", start, end);
- length = 0;
- while (length < end) {
- if (!EvalByteCode (pc + 11, strpos, ref res)) {
- if (length >= start) {
+ int tail = pc + (program [pc + 1] | ((int)program [pc + 2] << 8));
+ start = ReadInt (program, pc + 3);
+ end = ReadInt (program, pc + 7);
+ //Console.WriteLine ("min: {0}, max: {1} tail: {2}", start, end, tail);
+ length = 0;
+
+ deep = null;
+
+ // First match at least 'start' items
+ while (length < start) {
+ if (!EvalByteCode (pc + 11, strpos, ref res))
+ return false;
+ strpos = res;
+ length++;
+ }
+
+ if (lazy) {
+ while (true) {
+ // Match the tail
+ int cp = Checkpoint ();
+ if (EvalByteCode (tail, strpos, ref res)) {
+ strpos = res;
goto repeat_success;
}
- return false;
+ Backtrack (cp);
+
+ if (length >= end)
+ return false;
+
+ // Match an item
+ if (!EvalByteCode (pc + 11, strpos, ref res))
+ return false;
+ strpos = res;
+ length ++;
+ }
+ } else {
+ // Then match as many items as possible, recording
+ // backtracking information
+ int old_stack_size = stack.Count;
+ while (length < end) {
+ int cp = Checkpoint ();
+ if (!EvalByteCode (pc + 11, strpos, ref res)) {
+ Backtrack (cp);
+ break;
+ }
+ stack.Push (cp);
+ stack.Push (strpos);
+ strpos = res;
+ length++;
+ }
+
+ if (tail <= pc)
+ throw new Exception ();
+
+ // Then, match the tail, backtracking as necessary.
+ while (true) {
+ if (EvalByteCode (tail, strpos, ref res)) {
+ strpos = res;
+ stack.Count = old_stack_size;
+ goto repeat_success;
+ }
+ if (stack.Count == old_stack_size)
+ return false;
+
+ // Backtrack
+ strpos = stack.Pop ();
+ Backtrack (stack.Pop ());
+ if (trace_rx)
+ Console.WriteLine ("backtracking to: {0}", strpos);
}
- strpos = res;
- length++;
}
- if (length != end)
- return false;
- repeat_success:
- pc += program [pc + 1] | (program [pc + 2] << 8);
- continue;
+
+ repeat_success:
+ // We matched the tail too so just return
+ goto Pass;
}
+
default:
Console.WriteLine ("evaluating: {0} at pc: {1}, strpos: {2}", (RxOp)program [pc], pc, strpos);
throw new NotSupportedException ();
}
- }
+ continue;
+
+ Pass:
+ strpos_result = strpos;
+ return true;
+ Fail:
+ return false;
+ test_char_group_passed:
+ pc = char_group_end;
+ char_group_end = 0;
+ continue;
+ } // end of while (true)
}
}
}
-