-//\r
-// assembly: System\r
-// namespace: System.Text.RegularExpressions\r
-// file: interpreter.cs\r
-//\r
-// author: Dan Lewis (dlewis@gmx.co.uk)\r
-// (c) 2002\r
-\r
-using System;\r
-using System.Collections;\r
-using System.Globalization;\r
-\r
-namespace System.Text.RegularExpressions {\r
-\r
- class Interpreter : IMachine {\r
- public Interpreter (ushort[] program) {\r
- this.program = program;\r
- this.checkpoints = new Stack ();\r
- this.qs = null;\r
-\r
- // process info block\r
-\r
- if ((OpCode)program[0] != OpCode.Info)\r
- throw NewInterpretException ("Can't find info block.");\r
-\r
- this.group_count = program[1] + 1;\r
- this.match_min = program[2];\r
- this.match_max = program[3];\r
-\r
- // setup\r
-\r
- this.captures = new Capture[group_count];\r
- this.program_start = 4;\r
- }\r
-\r
- // IMachine implementation\r
-\r
- public Match Scan (Regex regex, string text, int start, int end) {\r
- this.text = text;\r
- this.text_end = end;\r
- this.scan_ptr = start;\r
-\r
- if (Eval (Mode.Match, ref scan_ptr, program_start))\r
- return new Match (regex, this, end, captures);\r
-\r
- return Match.Empty;\r
- }\r
-\r
- // private methods\r
-\r
- private void Reset () {\r
- for (int i = 0; i < group_count; ++ i)\r
- captures[i] = new Capture (text);\r
- \r
- checkpoints.Clear ();\r
- checkpoint = 0;\r
- fast = repeat = null;\r
- }\r
-\r
- private bool Eval (Mode mode, ref int ref_ptr, int pc) {\r
- int ptr = ref_ptr;\r
- Begin:\r
- for (;;) {\r
- ushort word = program[pc];\r
- OpCode op = (OpCode)(word & 0x00ff);\r
- OpFlags flags = (OpFlags)(word & 0xff00);\r
-\r
- switch (op) {\r
- case OpCode.Anchor: {\r
- int skip = program[pc + 1];\r
-\r
- int anch_offset = program[pc + 2];\r
- int anch_ptr = ptr + anch_offset;\r
- int anch_end = text_end - match_min + anch_offset; // maximum anchor position\r
-\r
- // the general case for an anchoring expression is at the bottom, however we\r
- // do some checks for the common cases before to save processing time. the current\r
- // optimizer only outputs three types of anchoring expressions: fixed position,\r
- // fixed substring, and no anchor.\r
-\r
- OpCode anch_op = (OpCode)(program[pc + 3] & 0x00ff);\r
- if (anch_op == OpCode.Position && skip == 6) { // position anchor\r
- // Anchor\r
- // Position\r
- // True\r
-\r
- switch ((Position)program[pc + 4]) {\r
- case Position.StartOfString:\r
- if (anch_ptr == 0) {\r
- ptr = 0;\r
- if (TryMatch (ref ptr, pc + skip))\r
- goto Pass;\r
- }\r
- break;\r
- \r
- case Position.StartOfLine:\r
- if (anch_ptr == 0) {\r
- ptr = 0;\r
- if (TryMatch (ref ptr, pc + skip))\r
- goto Pass;\r
-\r
- ++ anch_ptr;\r
- }\r
-\r
- while (anch_ptr <= anch_end) {\r
- if (text[anch_ptr - 1] == '\n') {\r
- ptr = anch_ptr - anch_offset;\r
- if (TryMatch (ref ptr, pc + skip))\r
- goto Pass;\r
- }\r
-\r
- ++ anch_ptr;\r
- }\r
- break;\r
- \r
- case Position.StartOfScan:\r
- if (anch_ptr == scan_ptr) {\r
- ptr = scan_ptr - anch_offset;\r
- if (TryMatch (ref ptr, pc + skip))\r
- goto Pass;\r
- }\r
- break;\r
-\r
- default:\r
- // FIXME\r
- break;\r
- }\r
- }\r
- else if (qs != null ||\r
- (anch_op == OpCode.String && skip == 6 + program[pc + 4])) { // substring anchor\r
- // Anchor\r
- // String\r
- // True\r
-\r
- if (qs == null) {\r
- bool ignore = ((OpFlags)program[pc + 3] & OpFlags.IgnoreCase) != 0;\r
- string substring = GetString (pc + 3);\r
-\r
- qs = new QuickSearch (substring, ignore);\r
- }\r
-\r
- while (anch_ptr <= anch_end) {\r
- anch_ptr = qs.Search (text, anch_ptr, anch_end);\r
- if (anch_ptr < 0)\r
- break;\r
-\r
- ptr = anch_ptr - anch_offset;\r
- if (TryMatch (ref ptr, pc + skip))\r
- goto Pass;\r
-\r
- ++ anch_ptr;\r
- }\r
- }\r
- else if (anch_op == OpCode.True) { // no anchor\r
- // Anchor\r
- // True\r
-\r
- while (anch_ptr <= anch_end) {\r
- ptr = anch_ptr;\r
- if (TryMatch (ref ptr, pc + skip))\r
- goto Pass;\r
-\r
- ++ anch_ptr;\r
- }\r
- }\r
- else { // general case\r
- // Anchor\r
- // <expr>\r
- // True\r
-\r
- while (anch_ptr <= anch_end) {\r
- ptr = anch_ptr;\r
- if (Eval (Mode.Match, ref ptr, pc + 3)) {\r
- // anchor expression passed: try real expression at the correct offset\r
-\r
- ptr = anch_ptr - anch_offset;\r
- if (TryMatch (ref ptr, pc + skip))\r
- goto Pass;\r
- }\r
-\r
- ++ anch_ptr;\r
- }\r
- }\r
-\r
- goto Fail;\r
- }\r
- \r
- case OpCode.False: {\r
- goto Fail;\r
- }\r
-\r
- case OpCode.True: {\r
- goto Pass;\r
- }\r
-\r
- case OpCode.Position: {\r
- if (!IsPosition ((Position)program[pc + 1], ptr))\r
- goto Fail;\r
- pc += 2;\r
- break;\r
- }\r
-\r
- case OpCode.String: {\r
- bool reverse = (flags & OpFlags.RightToLeft) != 0;\r
- bool ignore = (flags & OpFlags.IgnoreCase) != 0;\r
- int len = program[pc + 1];\r
-\r
- if (reverse) {\r
- ptr -= len;\r
- if (ptr < 0)\r
- goto Fail;\r
- }\r
- else if (ptr + len > text_end)\r
- goto Fail;\r
-\r
- pc += 2;\r
- for (int i = 0; i < len; ++ i) {\r
- char c = text[ptr + i];\r
- if (ignore)\r
- c = Char.ToLower (c);\r
-\r
- if (c != (char)program[pc ++])\r
- goto Fail;\r
- }\r
-\r
- if (!reverse)\r
- ptr += len;\r
- break;\r
- }\r
-\r
- case OpCode.Reference: {\r
- bool reverse = (flags & OpFlags.RightToLeft) != 0;\r
- bool ignore = (flags & OpFlags.IgnoreCase) != 0;\r
- Capture cap = captures[program[pc + 1]].GetLastDefined ();\r
- if (cap == null)\r
- goto Fail;\r
-\r
- int str = cap.Index;\r
- int len = cap.Length;\r
-\r
- if (reverse) {\r
- ptr -= len;\r
- if (ptr < 0)\r
- goto Fail;\r
- }\r
- else if (ptr + len > text_end)\r
- goto Fail;\r
-\r
- pc += 2;\r
- for (int i = 0; i < len; ++ i) {\r
- if (ignore) {\r
- if (Char.ToLower (text[ptr + i]) != Char.ToLower (text[str + i]))\r
- goto Fail;\r
- }\r
- else {\r
- if (text[ptr + i] != text[str + i])\r
- goto Fail;\r
- }\r
- }\r
-\r
- if (!reverse)\r
- ptr += len;\r
- break;\r
- }\r
-\r
- case OpCode.Character: case OpCode.Category:\r
- case OpCode.Range: case OpCode.Set: {\r
- if (!EvalChar (mode, ref ptr, ref pc, false))\r
- goto Fail;\r
- break;\r
- }\r
-\r
- case OpCode.In: {\r
- int target = pc + program[pc + 1];\r
- pc += 2;\r
- if (!EvalChar (mode, ref ptr, ref pc, true))\r
- goto Fail;\r
-\r
- pc = target;\r
- break;\r
- }\r
-\r
- case OpCode.Open: {\r
- Open (program[pc + 1], ptr);\r
- pc += 2;\r
- break;\r
- }\r
-\r
- case OpCode.Close: {\r
- Close (program[pc + 1], ptr);\r
- pc += 2;\r
- break;\r
- }\r
-\r
- case OpCode.Balance: {\r
- Balance (program[pc + 1], program[pc + 2], ptr);\r
- break;\r
- }\r
-\r
- case OpCode.IfDefined: {\r
- Capture cap = captures[program[pc + 2]];\r
- if (cap.GetLastDefined () == null)\r
- pc += program[pc + 1];\r
- else\r
- pc += 3;\r
- break;\r
- }\r
-\r
- case OpCode.Sub: {\r
- if (!Eval (Mode.Match, ref ptr, pc + 2))\r
- goto Fail;\r
-\r
- pc += program[pc + 1];\r
- break;\r
- }\r
-\r
- case OpCode.Test: {\r
- int cp = Checkpoint ();\r
- int test_ptr = ptr;\r
- if (Eval (Mode.Match, ref test_ptr, pc + 3))\r
- pc += program[pc + 1];\r
- else {\r
- Backtrack (cp);\r
- pc += program[pc + 2];\r
- }\r
- break;\r
- }\r
-\r
- case OpCode.Branch: {\r
- OpCode branch_op;\r
- do {\r
- int cp = Checkpoint ();\r
- if (Eval (Mode.Match, ref ptr, pc + 2))\r
- goto Pass;\r
- \r
- Backtrack (cp);\r
- \r
- pc += program[pc + 1];\r
- branch_op = (OpCode)(program[pc] & 0xff);\r
- } while (branch_op != OpCode.False);\r
-\r
- goto Fail;\r
- }\r
-\r
- case OpCode.Jump: {\r
- pc += program[pc + 1];\r
- break;\r
- }\r
-\r
- case OpCode.Repeat: {\r
- this.repeat = new RepeatContext (\r
- this.repeat, // previous context\r
- program[pc + 2], // minimum\r
- program[pc + 3], // maximum\r
- (flags & OpFlags.Lazy) != 0, // lazy\r
- pc + 4 // subexpression\r
- );\r
-\r
- if (Eval (Mode.Match, ref ptr, pc + program[pc + 1]))\r
- goto Pass;\r
- else {\r
- this.repeat = this.repeat.Previous;\r
- goto Fail;\r
- }\r
- }\r
-\r
- case OpCode.Until: {\r
- RepeatContext current = this.repeat;\r
- int start = current.Start;\r
-\r
- if (!current.IsMinimum) {\r
- ++ current.Count;\r
- current.Start = ptr;\r
- if (Eval (Mode.Match, ref ptr, repeat.Expression))\r
- goto Pass;\r
-\r
- current.Start = start;\r
- -- current.Count;\r
- goto Fail;\r
- }\r
-\r
- if (ptr == current.Start) {\r
- // degenerate match ... match tail or fail\r
-\r
- this.repeat = current.Previous;\r
- if (Eval (Mode.Match, ref ptr, pc + 1))\r
- goto Pass;\r
- \r
- goto Fail;\r
- }\r
-\r
- if (current.IsLazy) {\r
- // match tail first ...\r
-\r
- this.repeat = current.Previous;\r
- int cp = Checkpoint ();\r
- if (Eval (Mode.Match, ref ptr, pc + 1))\r
- goto Pass;\r
-\r
- Backtrack (cp);\r
-\r
- // ... then match more\r
-\r
- this.repeat = current;\r
- if (!current.IsMaximum) {\r
- ++ current.Count;\r
- current.Start = ptr;\r
- if (Eval (Mode.Match, ref ptr, current.Expression))\r
- goto Pass;\r
-\r
- current.Start = start;\r
- -- current.Count;\r
- goto Fail;\r
- }\r
-\r
- return false;\r
- }\r
- else {\r
- // match more first ...\r
-\r
- if (!current.IsMaximum) {\r
- int cp = Checkpoint ();\r
- ++ current.Count;\r
- current.Start = ptr;\r
- if (Eval (Mode.Match, ref ptr, current.Expression))\r
- goto Pass;\r
-\r
- current.Start = start;\r
- -- current.Count;\r
- Backtrack (cp);\r
- }\r
-\r
- // ... then match tail\r
-\r
- this.repeat = current.Previous;\r
- if (Eval (Mode.Match, ref ptr, pc + 1))\r
- goto Pass;\r
-\r
- this.repeat = current;\r
- goto Fail;\r
- }\r
- }\r
-\r
- case OpCode.FastRepeat: {\r
- this.fast = new RepeatContext (\r
- fast,\r
- program[pc + 2], // minimum\r
- program[pc + 3], // maximum\r
- (flags & OpFlags.Lazy) != 0, // lazy\r
- pc + 4 // subexpression\r
- );\r
- fast.Start = ptr;\r
-\r
- int cp = Checkpoint ();\r
-\r
- pc += program[pc + 1]; // tail expression\r
- ushort tail_word = program[pc];\r
-\r
- int c1, c2; // first character of tail operator\r
- int coff; // 0 or -1 depending on direction\r
-\r
- OpCode tail_op = (OpCode)(tail_word & 0xff);\r
- if (tail_op == OpCode.Character || tail_op == OpCode.String) {\r
- OpFlags tail_flags = (OpFlags)(tail_word & 0xff00);\r
-\r
- if (tail_op == OpCode.String)\r
- c1 = program[pc + 2]; // first char of string\r
- else\r
- c1 = program[pc + 1]; // character\r
- \r
- if ((tail_flags & OpFlags.IgnoreCase) != 0)\r
- c2 = Char.ToUpper ((char)c1); // ignore case\r
- else\r
- c2 = c1;\r
-\r
- if ((tail_flags & OpFlags.RightToLeft) != 0)\r
- coff = -1; // reverse\r
- else\r
- coff = 0;\r
- }\r
- else {\r
- c1 = c2 = -1;\r
- coff = 0;\r
- }\r
-\r
- if (fast.IsLazy) {\r
- if (!fast.IsMinimum && !Eval (Mode.Count, ref ptr, fast.Expression)) {\r
- //Console.WriteLine ("lazy fast: failed mininum.");\r
- fast = fast.Previous;\r
- goto Fail;\r
- }\r
- \r
- while (true) {\r
- int p = ptr + coff;\r
- if ((c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) &&\r
- Eval (Mode.Match, ref ptr, pc))\r
- break;\r
-\r
- if (fast.IsMaximum) {\r
- //Console.WriteLine ("lazy fast: failed with maximum.");\r
- fast = fast.Previous;\r
- goto Fail;\r
- }\r
-\r
- Backtrack (cp);\r
- if (!Eval (Mode.Count, ref ptr, fast.Expression)) {\r
- //Console.WriteLine ("lazy fast: no more.");\r
- fast = fast.Previous;\r
- goto Fail;\r
- }\r
- }\r
- fast = fast.Previous;\r
- goto Pass;\r
- }\r
- else {\r
- if (!Eval (Mode.Count, ref ptr, fast.Expression)) {\r
- fast = fast.Previous;\r
- goto Fail;\r
- }\r
- \r
- int width;\r
- if (fast.Count > 0)\r
- width = (ptr - fast.Start) / fast.Count;\r
- else\r
- width = 0;\r
-\r
- while (true) {\r
- int p = ptr + coff;\r
- if ((c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) &&\r
- Eval (Mode.Match, ref ptr, pc))\r
- break;\r
-\r
- -- fast.Count;\r
- if (!fast.IsMinimum) {\r
- fast = fast.Previous;\r
- goto Fail;\r
- }\r
-\r
- ptr -= width;\r
- Backtrack (cp);\r
- }\r
- fast = fast.Previous;\r
- goto Pass;\r
- }\r
- }\r
-\r
- case OpCode.Info: {\r
- throw NewInterpretException ("Info block found in pattern.");\r
- }\r
- }\r
- }\r
- Pass:\r
- ref_ptr = ptr;\r
-\r
- switch (mode) {\r
- case Mode.Match:\r
- return true;\r
-\r
- case Mode.Count: {\r
- ++ fast.Count;\r
- if (fast.IsMaximum || (fast.IsLazy && fast.IsMinimum))\r
- return true;\r
-\r
- pc = fast.Expression;\r
- goto Begin;\r
- }\r
- }\r
-\r
- Fail:\r
- switch (mode) {\r
- case Mode.Match:\r
- return false;\r
-\r
- case Mode.Count: {\r
- if (!fast.IsLazy && fast.IsMinimum)\r
- return true;\r
-\r
- ref_ptr = fast.Start;\r
- return false;\r
- }\r
- }\r
-\r
- return false;\r
- }\r
-\r
- private bool EvalChar (Mode mode, ref int ptr, ref int pc, bool multi) {\r
- bool consumed = false;\r
- char c = '\0';\r
- bool negate;\r
- bool ignore;\r
- do {\r
- ushort word = program[pc];\r
- OpCode op = (OpCode)(word & 0x00ff);\r
- OpFlags flags = (OpFlags)(word & 0xff00);\r
-\r
- ++ pc;\r
-\r
- ignore = (flags & OpFlags.IgnoreCase) != 0;\r
- \r
- // consume character: the direction of an In construct is\r
- // determined by the direction of its first op\r
-\r
- if (!consumed) {\r
- if ((flags & OpFlags.RightToLeft) != 0) {\r
- if (ptr <= 0)\r
- return false;\r
-\r
- c = text[-- ptr];\r
- }\r
- else {\r
- if (ptr >= text_end)\r
- return false;\r
-\r
- c = text[ptr ++];\r
- }\r
-\r
- if (ignore)\r
- c = Char.ToLower (c);\r
-\r
- consumed = true;\r
- }\r
-\r
- // negate flag\r
-\r
- negate = (flags & OpFlags.Negate) != 0;\r
-\r
- // execute op\r
- \r
- switch (op) {\r
- case OpCode.True:\r
- return true;\r
-\r
- case OpCode.False:\r
- return false;\r
- \r
- case OpCode.Character: {\r
- if (c == (char)program[pc ++])\r
- return !negate;\r
- break;\r
- }\r
-\r
- case OpCode.Category: {\r
- if (CategoryUtils.IsCategory ((Category)program[pc ++], c))\r
- return !negate;\r
-\r
- break;\r
- }\r
- \r
- case OpCode.Range: {\r
- int lo = (char)program[pc ++];\r
- int hi = (char)program[pc ++];\r
- if (lo <= c && c <= hi)\r
- return !negate;\r
- break;\r
- }\r
-\r
- case OpCode.Set: {\r
- int lo = (char)program[pc ++];\r
- int len = (char)program[pc ++];\r
- int bits = pc;\r
- pc += len;\r
-\r
- int i = (int)c - lo;\r
- if (i < 0 || i >= len << 4)\r
- break;\r
-\r
- if ((program[bits + (i >> 4)] & (1 << (i & 0xf))) != 0)\r
- return !negate;\r
- break;\r
- }\r
- }\r
- } while (multi);\r
-\r
- return negate;\r
- }\r
-\r
- private bool TryMatch (ref int ref_ptr, int pc) {\r
- Reset ();\r
- \r
- int ptr = ref_ptr;\r
- captures[0].Open (ptr);\r
- if (Eval (Mode.Match, ref ptr, pc)) {\r
- captures[0].Close (ptr);\r
- ref_ptr = ptr;\r
- return true;\r
- }\r
-\r
- return false;\r
- }\r
- \r
- private bool IsPosition (Position pos, int ptr) {\r
- switch (pos) {\r
- case Position.Start: case Position.StartOfString:\r
- return ptr == 0;\r
-\r
- case Position.StartOfLine:\r
- return ptr == 0 || text[ptr - 1] == '\n';\r
- \r
- case Position.StartOfScan:\r
- return ptr == scan_ptr;\r
- \r
- case Position.End:\r
- return ptr == text_end ||\r
- (ptr == text_end - 1 && text[ptr] == '\n');\r
-\r
- case Position.EndOfLine:\r
- return ptr == text_end || text[ptr] == '\n';\r
- \r
- case Position.EndOfString:\r
- return ptr == text_end;\r
- \r
- case Position.Boundary:\r
- if (text_end == 0)\r
- return false;\r
-\r
- if (ptr == 0)\r
- return IsWordChar (text[ptr]);\r
- else if (ptr == text_end)\r
- return IsWordChar (text[ptr - 1]);\r
- else\r
- return IsWordChar (text[ptr]) != IsWordChar (text[ptr - 1]);\r
-\r
- case Position.NonBoundary:\r
- if (text_end == 0)\r
- return false;\r
-\r
- if (ptr == 0)\r
- return !IsWordChar (text[ptr]);\r
- else if (ptr == text_end)\r
- return !IsWordChar (text[ptr - 1]);\r
- else\r
- return IsWordChar (text[ptr]) == IsWordChar (text[ptr - 1]);\r
- \r
- default:\r
- return false;\r
- }\r
- }\r
-\r
- private bool IsWordChar (char c) {\r
- return CategoryUtils.IsCategory (Category.Word, c);\r
- }\r
-\r
- private string GetString (int pc) {\r
- int len = program[pc + 1];\r
- int str = pc + 2;\r
-\r
- char[] cs = new char[len];\r
- for (int i = 0; i < len; ++ i)\r
- cs[i] = (char)program[str ++];\r
-\r
- return new string (cs);\r
- }\r
-\r
- // capture management\r
-\r
- private void Open (int gid, int ptr) {\r
- Capture cap = captures[gid];\r
- if (cap.IsDefined || cap.Checkpoint < checkpoint) {\r
- cap = new Capture (cap, checkpoint);\r
- captures[gid] = cap;\r
- }\r
-\r
- cap.Open (ptr);\r
- }\r
-\r
- private void Close (int gid, int ptr) {\r
- captures[gid].Close (ptr);\r
- }\r
-\r
- private void Balance (int gid, int balance_gid, int ptr) {\r
- Capture balance = captures[balance_gid];\r
- if (!balance.IsDefined)\r
- throw NewInterpretException ("Invalid state - balancing group not closed.");\r
-\r
- if (gid > 0) {\r
- Open (gid, balance.Index + balance.Length);\r
- Close (gid, ptr);\r
- }\r
-\r
- captures[balance_gid] = balance.Previous;\r
- }\r
-\r
- private int Checkpoint () {\r
- checkpoints.Push (captures);\r
- captures = (Capture[])captures.Clone ();\r
- checkpoint = checkpoints.Count;\r
-\r
- return checkpoint;\r
- }\r
-\r
- private void Backtrack (int cp) {\r
- if (cp > checkpoints.Count)\r
- throw NewInterpretException ("Can't backtrack forwards");\r
-\r
- while (checkpoints.Count > cp)\r
- checkpoints.Pop ();\r
-\r
- captures = (Capture[])checkpoints.Peek ();\r
- checkpoint = cp;\r
-\r
- // TODO optimize this\r
- }\r
-\r
- private Exception NewInterpretException (string msg) {\r
- return new ApplicationException (msg);\r
- }\r
-\r
- // interpreter attributes\r
-\r
- private ushort[] program; // regex program\r
- private int program_start; // first instruction after info block\r
- private string text; // input text\r
- private int text_end; // end of input text (last character + 1)\r
- private int group_count; // number of capturing groups\r
- private int match_min, match_max; // match width information\r
- private QuickSearch qs; // fast substring matcher\r
-\r
- // match state\r
- \r
- private int scan_ptr; // start of scan\r
-\r
- private Capture[] captures; // current captures\r
-\r
- private int checkpoint; // last checkpoint\r
- private Stack checkpoints; // checkpointed captures\r
- \r
- private RepeatContext repeat; // current repeat context\r
- private RepeatContext fast; // fast repeat context\r
-\r
- // private classes\r
-\r
- private class RepeatContext {\r
- public RepeatContext (RepeatContext previous, int min, int max, bool lazy, int expr_pc) {\r
- this.previous = previous;\r
- this.min = min;\r
- this.max = max;\r
- this.lazy = lazy;\r
- this.expr_pc = expr_pc;\r
- \r
- this.start = -1;\r
- this.count = 0;\r
- }\r
-\r
- public int Count {\r
- get { return count; }\r
- set { count = value; }\r
- }\r
-\r
- public int Start {\r
- get { return start; }\r
- set { start = value; }\r
- }\r
-\r
- public bool IsMinimum {\r
- get { return min <= count; }\r
- }\r
-\r
- public bool IsMaximum {\r
- get { return max <= count; }\r
- }\r
-\r
- public bool IsLazy {\r
- get { return lazy; }\r
- }\r
-\r
- public int Expression {\r
- get { return expr_pc; }\r
- }\r
-\r
- public RepeatContext Previous {\r
- get { return previous; }\r
- }\r
- \r
- private int start;\r
- private int min, max;\r
- private bool lazy;\r
- private int expr_pc;\r
- private RepeatContext previous;\r
-\r
- private int count;\r
- }\r
-\r
- private enum Mode {\r
- Search,\r
- Match,\r
- Count\r
- }\r
- }\r
-}\r
+//
+// assembly: System
+// namespace: System.Text.RegularExpressions
+// file: interpreter.cs
+//
+// author: Dan Lewis (dlewis@gmx.co.uk)
+// (c) 2002
+
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the
+// "Software"), to deal in the Software without restriction, including
+// without limitation the rights to use, copy, modify, merge, publish,
+// distribute, sublicense, and/or sell copies of the Software, and to
+// permit persons to whom the Software is furnished to do so, subject to
+// the following conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
+using System;
+using System.Collections;
+using System.Diagnostics;
+using System.Globalization;
+
+namespace System.Text.RegularExpressions {
+
+ class Interpreter : IMachine {
+ private int ReadProgramCount (int ptr)
+ {
+ int ret = program [ptr + 1];
+ ret <<= 16;
+ ret += program [ptr];
+ return ret;
+ }
+
+ public Interpreter (ushort[] program) {
+ this.program = program;
+ this.qs = null;
+
+ // process info block
+
+ Debug.Assert ((OpCode)program[0] == OpCode.Info, "Regex", "Cant' find info block");
+
+ this.group_count = ReadProgramCount (1) + 1;
+ this.match_min = ReadProgramCount (3);
+ //this.match_max = ReadProgramCount (5);
+
+ // setup
+
+ this.program_start = 7;
+ this.groups = new int [group_count];
+ }
+
+ // IMachine implementation
+
+ public Match Scan (Regex regex, string text, int start, int end) {
+ this.text = text;
+ this.text_end = end;
+ this.scan_ptr = start;
+
+ if (Eval (Mode.Match, ref scan_ptr, program_start))
+ return GenerateMatch (regex);
+
+ return Match.Empty;
+ }
+
+ // private methods
+
+ private void Reset () {
+ ResetGroups ();
+ fast = repeat = null;
+ }
+
+ private bool Eval (Mode mode, ref int ref_ptr, int pc) {
+ int ptr = ref_ptr;
+ Begin:
+ for (;;) {
+ ushort word = program[pc];
+ OpCode op = (OpCode)(word & 0x00ff);
+ OpFlags flags = (OpFlags)(word & 0xff00);
+
+ switch (op) {
+ case OpCode.Anchor: {
+ int skip = program[pc + 1];
+
+ int anch_offset = program[pc + 2];
+ bool anch_reverse = (flags & OpFlags.RightToLeft) != 0;
+ int anch_ptr = anch_reverse ? ptr - anch_offset : ptr + anch_offset;
+ int anch_end = text_end - match_min + anch_offset; // maximum anchor position
+
+
+ int anch_begin = 0;
+
+
+ // the general case for an anchoring expression is at the bottom, however we
+ // do some checks for the common cases before to save processing time. the current
+ // optimizer only outputs three types of anchoring expressions: fixed position,
+ // fixed substring, and no anchor.
+
+ OpCode anch_op = (OpCode)(program[pc + 3] & 0x00ff);
+ if (anch_op == OpCode.Position && skip == 6) { // position anchor
+ // Anchor
+ // Position
+ // True
+
+ switch ((Position)program[pc + 4]) {
+ case Position.StartOfString:
+ if (anch_reverse || anch_offset == 0) {
+ if (anch_reverse)
+ ptr = anch_offset;
+ if (TryMatch (ref ptr, pc + skip))
+ goto Pass;
+ }
+ break;
+
+ case Position.StartOfLine:
+ if (anch_ptr == 0) {
+ ptr = 0;
+ if (TryMatch (ref ptr, pc + skip))
+ goto Pass;
+
+ ++ anch_ptr;
+ }
+
+ while ((anch_reverse && anch_ptr >= 0) || (!anch_reverse && anch_ptr <= anch_end)) {
+ if (anch_ptr == 0 || text[anch_ptr - 1] == '\n') {
+ if (anch_reverse)
+ ptr = anch_ptr == anch_end ? anch_ptr : anch_ptr + anch_offset;
+ else
+ ptr = anch_ptr == 0 ? anch_ptr : anch_ptr - anch_offset;
+ if (TryMatch (ref ptr, pc + skip))
+ goto Pass;
+ }
+
+ if (anch_reverse)
+ -- anch_ptr;
+ else
+ ++ anch_ptr;
+ }
+ break;
+
+ case Position.StartOfScan:
+ if (anch_ptr == scan_ptr) {
+ ptr = anch_reverse ? scan_ptr + anch_offset : scan_ptr - anch_offset;
+ if (TryMatch (ref ptr, pc + skip))
+ goto Pass;
+ }
+ break;
+
+ default:
+ // FIXME
+ break;
+ }
+ }
+ else if (qs != null ||
+ (anch_op == OpCode.String && skip == 6 + program[pc + 4])) { // substring anchor
+ // Anchor
+ // String
+ // True
+
+ bool reverse = ((OpFlags)program[pc + 3] & OpFlags.RightToLeft) != 0;
+ string substring = GetString (pc + 3);
+
+ if (qs == null) {
+ bool ignore = ((OpFlags)program[pc + 3] & OpFlags.IgnoreCase) != 0;
+
+ qs = new QuickSearch (substring, ignore, reverse);
+ }
+ while ((anch_reverse && anch_ptr >= anch_begin)
+ || (!anch_reverse && anch_ptr <= anch_end)) {
+
+ if (reverse)
+ {
+ anch_ptr = qs.Search (text, anch_ptr, anch_begin);
+ if (anch_ptr != -1)
+ anch_ptr += substring.Length ;
+
+ }
+ else
+ anch_ptr = qs.Search (text, anch_ptr, anch_end);
+ if (anch_ptr < 0)
+ break;
+
+ ptr = reverse ? anch_ptr + anch_offset : anch_ptr - anch_offset;
+ if (TryMatch (ref ptr, pc + skip))
+ goto Pass;
+
+ if (reverse)
+ anch_ptr -= 2;
+ else
+ ++ anch_ptr;
+ }
+ }
+ else if (anch_op == OpCode.True) { // no anchor
+ // Anchor
+ // True
+
+
+ while ((anch_reverse && anch_ptr >= anch_begin)
+ || (!anch_reverse && anch_ptr <= anch_end)) {
+
+ ptr = anch_ptr;
+ if (TryMatch (ref ptr, pc + skip))
+ goto Pass;
+ if (anch_reverse)
+ -- anch_ptr;
+ else
+ ++ anch_ptr;
+ }
+ }
+ else { // general case
+ // Anchor
+ // <expr>
+ // True
+
+ while ((anch_reverse && anch_ptr >= anch_begin)
+ || (!anch_reverse && anch_ptr <= anch_end)) {
+
+ ptr = anch_ptr;
+ if (Eval (Mode.Match, ref ptr, pc + 3)) {
+ // anchor expression passed: try real expression at the correct offset
+
+ ptr = anch_reverse ? anch_ptr + anch_offset : anch_ptr - anch_offset;
+ if (TryMatch (ref ptr, pc + skip))
+ goto Pass;
+ }
+
+ if (anch_reverse)
+ -- anch_ptr;
+ else
+ ++ anch_ptr;
+ }
+ }
+
+ goto Fail;
+ }
+
+ case OpCode.False: {
+ goto Fail;
+ }
+
+ case OpCode.True: {
+ goto Pass;
+ }
+
+ case OpCode.Position: {
+ if (!IsPosition ((Position)program[pc + 1], ptr))
+ goto Fail;
+ pc += 2;
+ break;
+ }
+
+ case OpCode.String: {
+ bool reverse = (flags & OpFlags.RightToLeft) != 0;
+ bool ignore = (flags & OpFlags.IgnoreCase) != 0;
+ int len = program[pc + 1];
+
+ if (reverse) {
+ ptr -= len;
+ if (ptr < 0)
+ goto Fail;
+ }
+ else
+ if (ptr + len > text_end)
+ goto Fail;
+
+ pc += 2;
+ for (int i = 0; i < len; ++ i) {
+ char c = text[ptr + i];
+ if (ignore)
+ c = Char.ToLower (c);
+
+ if (c != (char)program[pc ++])
+ goto Fail;
+ }
+
+ if (!reverse)
+ ptr += len;
+ break;
+ }
+
+ case OpCode.Reference: {
+ bool reverse = (flags & OpFlags.RightToLeft) != 0;
+ bool ignore = (flags & OpFlags.IgnoreCase) != 0;
+ int m = GetLastDefined (program [pc + 1]);
+ if (m < 0)
+ goto Fail;
+
+ int str = marks [m].Index;
+ int len = marks [m].Length;
+
+ if (reverse) {
+ ptr -= len;
+ if (ptr < 0)
+ goto Fail;
+ }
+ else if (ptr + len > text_end)
+ goto Fail;
+
+ pc += 2;
+ for (int i = 0; i < len; ++ i) {
+ if (ignore) {
+ if (Char.ToLower (text[ptr + i]) != Char.ToLower (text[str + i]))
+ goto Fail;
+ }
+ else {
+ if (text[ptr + i] != text[str + i])
+ goto Fail;
+ }
+ }
+
+ if (!reverse)
+ ptr += len;
+ break;
+ }
+
+ case OpCode.Character: case OpCode.Category: case OpCode.NotCategory:
+ case OpCode.Range: case OpCode.Set: {
+ if (!EvalChar (mode, ref ptr, ref pc, false))
+ goto Fail;
+ break;
+ }
+
+ case OpCode.In: {
+ int target = pc + program[pc + 1];
+ pc += 2;
+ if (!EvalChar (mode, ref ptr, ref pc, true))
+ goto Fail;
+
+ pc = target;
+ break;
+ }
+
+ case OpCode.Open: {
+ Open (program[pc + 1], ptr);
+ pc += 2;
+ break;
+ }
+
+ case OpCode.Close: {
+ Close (program[pc + 1], ptr);
+ pc += 2;
+ break;
+ }
+
+ case OpCode.BalanceStart: {
+
+ int start = ptr; //point before the balancing group
+
+ if (!Eval (Mode.Match, ref ptr, pc + 5))
+ goto Fail;
+
+
+
+ if(!Balance (program[pc + 1], program[pc + 2], (program[pc + 3] == 1 ? true : false) , start)) {
+ goto Fail;
+ }
+
+
+ pc += program[pc + 4];
+ break;
+ }
+
+ case OpCode.Balance: {
+ goto Pass;
+ }
+
+ case OpCode.IfDefined: {
+ int m = GetLastDefined (program [pc + 2]);
+ if (m < 0)
+ pc += program[pc + 1];
+ else
+ pc += 3;
+ break;
+ }
+
+ case OpCode.Sub: {
+ if (!Eval (Mode.Match, ref ptr, pc + 2))
+ goto Fail;
+
+ pc += program[pc + 1];
+ break;
+ }
+
+ case OpCode.Test: {
+ int cp = Checkpoint ();
+ int test_ptr = ptr;
+ if (Eval (Mode.Match, ref test_ptr, pc + 3))
+ pc += program[pc + 1];
+ else {
+ Backtrack (cp);
+ pc += program[pc + 2];
+ }
+ break;
+ }
+
+ case OpCode.Branch: {
+ OpCode branch_op;
+ do {
+ int cp = Checkpoint ();
+ if (Eval (Mode.Match, ref ptr, pc + 2))
+ goto Pass;
+
+ Backtrack (cp);
+
+ pc += program[pc + 1];
+ branch_op = (OpCode)(program[pc] & 0xff);
+ } while (branch_op != OpCode.False);
+
+ goto Fail;
+ }
+
+ case OpCode.Jump: {
+ pc += program[pc + 1];
+ break;
+ }
+
+ case OpCode.Repeat: {
+ this.repeat = new RepeatContext (
+ this.repeat, // previous context
+ ReadProgramCount (pc + 2), // minimum
+ ReadProgramCount (pc + 4), // maximum
+ (flags & OpFlags.Lazy) != 0, // lazy
+ pc + 6 // subexpression
+ );
+
+ if (Eval (Mode.Match, ref ptr, pc + program[pc + 1]))
+ goto Pass;
+ else {
+ this.repeat = this.repeat.Previous;
+ goto Fail;
+ }
+ }
+
+ case OpCode.Until: {
+ RepeatContext current = this.repeat;
+
+ //
+ // Can we avoid recursion?
+ //
+ // Backtracking can be forced in nested quantifiers from the tail of this quantifier.
+ // Thus, we cannot, in general, use a simple loop on repeat.Expression to handle
+ // quantifiers.
+ //
+ // If 'deep' was unmolested, that implies that there was no nested quantifiers.
+ // Thus, we can safely avoid recursion.
+ //
+ if (deep == current)
+ goto Pass;
+
+ int start = current.Start;
+ int start_count = current.Count;
+
+ while (!current.IsMinimum) {
+ ++ current.Count;
+ current.Start = ptr;
+ deep = current;
+ if (!Eval (Mode.Match, ref ptr, current.Expression)) {
+ current.Start = start;
+ current.Count = start_count;
+ goto Fail;
+ }
+ if (deep != current) // recursive mode
+ goto Pass;
+ }
+
+ if (ptr == current.Start) {
+ // degenerate match ... match tail or fail
+ this.repeat = current.Previous;
+ deep = null;
+ if (Eval (Mode.Match, ref ptr, pc + 1))
+ goto Pass;
+
+ this.repeat = current;
+ goto Fail;
+ }
+
+ if (current.IsLazy) {
+ for (;;) {
+ // match tail first ...
+ this.repeat = current.Previous;
+ deep = null;
+ int cp = Checkpoint ();
+ if (Eval (Mode.Match, ref ptr, pc + 1))
+ goto Pass;
+
+ Backtrack (cp);
+
+ // ... then match more
+ this.repeat = current;
+ if (current.IsMaximum)
+ goto Fail;
+ ++ current.Count;
+ current.Start = ptr;
+ deep = current;
+ if (!Eval (Mode.Match, ref ptr, current.Expression)) {
+ current.Start = start;
+ current.Count = start_count;
+ goto Fail;
+ }
+ if (deep != current) // recursive mode
+ goto Pass;
+ // Degenerate match: ptr has not moved since the last (failed) tail match.
+ // So, next and subsequent tail matches will fail.
+ if (ptr == current.Start)
+ goto Fail;
+ }
+ } else {
+ int stack_size = stack.Count;
+
+ // match greedily as much as possible
+ while (!current.IsMaximum) {
+ int cp = Checkpoint ();
+ int old_ptr = ptr;
+ int old_start = current.Start;
+
+ ++ current.Count;
+ current.Start = ptr;
+ deep = current;
+ if (!Eval (Mode.Match, ref ptr, current.Expression)) {
+ -- current.Count;
+ current.Start = old_start;
+ Backtrack (cp);
+ break;
+ }
+ if (deep != current) {
+ // recursive mode: no more backtracking, truncate the stack
+ stack.Count = stack_size;
+ goto Pass;
+ }
+ stack.Push (cp);
+ stack.Push (old_ptr);
+
+ // Degenerate match: no point going on
+ if (ptr == current.Start)
+ break;
+ }
+
+ // then, match the tail, backtracking as necessary.
+ this.repeat = current.Previous;
+ for (;;) {
+ deep = null;
+ if (Eval (Mode.Match, ref ptr, pc + 1)) {
+ stack.Count = stack_size;
+ goto Pass;
+ }
+ if (stack.Count == stack_size) {
+ this.repeat = current;
+ goto Fail;
+ }
+
+ --current.Count;
+ ptr = stack.Pop ();
+ Backtrack (stack.Pop ());
+ }
+ }
+ }
+
+ case OpCode.FastRepeat: {
+ this.fast = new RepeatContext (
+ fast,
+ ReadProgramCount (pc + 2), // minimum
+ ReadProgramCount (pc + 4), // maximum
+ (flags & OpFlags.Lazy) != 0, // lazy
+ pc + 6 // subexpression
+ );
+
+ fast.Start = ptr;
+
+ int cp = Checkpoint ();
+
+ pc += program[pc + 1]; // tail expression
+ ushort tail_word = program[pc];
+
+ int c1 = -1; // first character of tail operator
+ int c2 = -1; // ... and the same character, in upper case if ignoring case
+ int coff = 0; // 0 or -1 depending on direction
+
+ OpCode tail_op = (OpCode)(tail_word & 0xff);
+ if (tail_op == OpCode.Character || tail_op == OpCode.String) {
+ OpFlags tail_flags = (OpFlags)(tail_word & 0xff00);
+
+ if ((tail_flags & OpFlags.Negate) != 0)
+ goto skip;
+
+ if (tail_op == OpCode.String)
+ {
+ int offset = 0;
+
+ if ((tail_flags & OpFlags.RightToLeft) != 0)
+ {
+ offset = program[pc + 1] - 1 ;
+ }
+
+ c1 = program[pc + 2 + offset]; // first char of string
+ }
+ else
+ c1 = program[pc + 1]; // character
+
+ if ((tail_flags & OpFlags.IgnoreCase) != 0)
+ c2 = Char.ToUpper ((char)c1); // ignore case
+ else
+ c2 = c1;
+
+ if ((tail_flags & OpFlags.RightToLeft) != 0)
+ coff = -1; // reverse
+ else
+ coff = 0;
+ }
+
+ skip:
+ if (fast.IsLazy) {
+ if (!fast.IsMinimum && !Eval (Mode.Count, ref ptr, fast.Expression)) {
+ //Console.WriteLine ("lazy fast: failed mininum.");
+ fast = fast.Previous;
+ goto Fail;
+ }
+
+ while (true) {
+ int p = ptr + coff;
+ if (c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) {
+ deep = null;
+ if (Eval (Mode.Match, ref ptr, pc))
+ break;
+ }
+
+ if (fast.IsMaximum) {
+ //Console.WriteLine ("lazy fast: failed with maximum.");
+ fast = fast.Previous;
+ goto Fail;
+ }
+
+ Backtrack (cp);
+ if (!Eval (Mode.Count, ref ptr, fast.Expression)) {
+ //Console.WriteLine ("lazy fast: no more.");
+ fast = fast.Previous;
+ goto Fail;
+ }
+ }
+ fast = fast.Previous;
+ goto Pass;
+ }
+ else {
+ if (!Eval (Mode.Count, ref ptr, fast.Expression)) {
+ fast = fast.Previous;
+ goto Fail;
+ }
+
+ int width;
+ if (fast.Count > 0)
+ width = (ptr - fast.Start) / fast.Count;
+ else
+ width = 0;
+
+ while (true) {
+ int p = ptr + coff;
+ if (c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) {
+ deep = null;
+ if (Eval (Mode.Match, ref ptr, pc))
+ break;
+ }
+
+ -- fast.Count;
+ if (!fast.IsMinimum) {
+ fast = fast.Previous;
+ goto Fail;
+ }
+
+ ptr -= width;
+ Backtrack (cp);
+ }
+ fast = fast.Previous;
+ goto Pass;
+ }
+ }
+
+ case OpCode.Info: {
+ Debug.Assert (false, "Regex", "Info block found in pattern");
+ goto Fail;
+ }
+ }
+ }
+ Pass:
+ ref_ptr = ptr;
+
+ switch (mode) {
+ case Mode.Match:
+ return true;
+
+ case Mode.Count: {
+ ++ fast.Count;
+ if (fast.IsMaximum || (fast.IsLazy && fast.IsMinimum))
+ return true;
+
+ pc = fast.Expression;
+ goto Begin;
+ }
+ }
+
+ Fail:
+ switch (mode) {
+ case Mode.Match:
+ return false;
+
+ case Mode.Count: {
+ if (!fast.IsLazy && fast.IsMinimum)
+ return true;
+
+ ref_ptr = fast.Start;
+ return false;
+ }
+ }
+
+ return false;
+ }
+
+ private bool EvalChar (Mode mode, ref int ptr, ref int pc, bool multi) {
+ bool consumed = false;
+ char c = '\0';
+ bool negate;
+ bool ignore;
+
+ do {
+ ushort word = program[pc];
+ OpCode op = (OpCode)(word & 0x00ff);
+ OpFlags flags = (OpFlags)(word & 0xff00);
+
+ ++ pc;
+
+ ignore = (flags & OpFlags.IgnoreCase) != 0;
+
+ // consume character: the direction of an In construct is
+ // determined by the direction of its first op
+
+ if (!consumed) {
+ if ((flags & OpFlags.RightToLeft) != 0) {
+ if (ptr <= 0)
+ return false;
+
+ c = text[-- ptr];
+ }
+ else {
+ if (ptr >= text_end)
+ return false;
+
+ c = text[ptr ++];
+ }
+
+ if (ignore)
+ c = Char.ToLower (c);
+
+ consumed = true;
+ }
+
+ // negate flag
+
+ negate = (flags & OpFlags.Negate) != 0;
+
+ // execute op
+
+ switch (op) {
+ case OpCode.True:
+ return true;
+
+ case OpCode.False:
+ return false;
+
+ case OpCode.Character: {
+ if (c == (char)program[pc ++])
+ return !negate;
+ break;
+ }
+
+ case OpCode.Category: {
+ if (CategoryUtils.IsCategory ((Category)program[pc ++], c))
+ return !negate;
+ break;
+ }
+
+ case OpCode.NotCategory: {
+ if (!CategoryUtils.IsCategory ((Category)program[pc ++], c))
+ return !negate;
+ break;
+ }
+
+ case OpCode.Range: {
+ int lo = (char)program[pc ++];
+ int hi = (char)program[pc ++];
+ if (lo <= c && c <= hi)
+ return !negate;
+ break;
+ }
+
+ case OpCode.Set: {
+ int lo = (char)program[pc ++];
+ int len = (char)program[pc ++];
+ int bits = pc;
+ pc += len;
+
+ int i = (int)c - lo;
+ if (i < 0 || i >= len << 4)
+ break;
+
+
+ if ((program[bits + (i >> 4)] & (1 << (i & 0xf))) != 0)
+ return !negate;
+ break;
+ }
+ }
+ } while (multi);
+
+ return negate;
+ }
+
+ private bool TryMatch (ref int ref_ptr, int pc) {
+ Reset ();
+
+ int ptr = ref_ptr;
+ marks [groups [0]].Start = ptr;
+ if (Eval (Mode.Match, ref ptr, pc)) {
+ marks [groups [0]].End = ptr;
+ ref_ptr = ptr;
+ return true;
+ }
+
+ return false;
+ }
+
+ private bool IsPosition (Position pos, int ptr) {
+ switch (pos) {
+ case Position.Start: case Position.StartOfString:
+ return ptr == 0;
+
+ case Position.StartOfLine:
+ return ptr == 0 || text[ptr - 1] == '\n';
+
+ case Position.StartOfScan:
+ return ptr == scan_ptr;
+
+ case Position.End:
+ return ptr == text_end ||
+ (ptr == text_end - 1 && text[ptr] == '\n');
+
+ case Position.EndOfLine:
+ return ptr == text_end || text[ptr] == '\n';
+
+ case Position.EndOfString:
+ return ptr == text_end;
+
+ case Position.Boundary:
+ if (text_end == 0)
+ return false;
+
+ if (ptr == 0)
+ return IsWordChar (text[ptr]);
+ else if (ptr == text_end)
+ return IsWordChar (text[ptr - 1]);
+ else
+ return IsWordChar (text[ptr]) != IsWordChar (text[ptr - 1]);
+
+ case Position.NonBoundary:
+ if (text_end == 0)
+ return false;
+
+ if (ptr == 0)
+ return !IsWordChar (text[ptr]);
+ else if (ptr == text_end)
+ return !IsWordChar (text[ptr - 1]);
+ else
+ return IsWordChar (text[ptr]) == IsWordChar (text[ptr - 1]);
+
+ default:
+ return false;
+ }
+ }
+
+ private bool IsWordChar (char c) {
+ return CategoryUtils.IsCategory (Category.Word, c);
+ }
+
+ private string GetString (int pc) {
+ int len = program[pc + 1];
+ int str = pc + 2;
+
+ char[] cs = new char[len];
+ for (int i = 0; i < len; ++ i)
+ cs[i] = (char)program[str ++];
+
+ return new string (cs);
+ }
+
+ // capture management
+
+ private void Open (int gid, int ptr) {
+ int m = groups [gid];
+ if (m < mark_start || marks [m].IsDefined) {
+ m = CreateMark (m);
+ groups [gid] = m;
+ }
+
+ marks [m].Start = ptr;
+ }
+
+ private void Close (int gid, int ptr) {
+ marks [groups [gid]].End = ptr;
+ }
+
+ private bool Balance (int gid, int balance_gid, bool capture, int ptr) {
+ int b = groups [balance_gid];
+
+ if(b == -1 || marks[b].Index < 0) {
+ //Group not previously matched
+ return false;
+ }
+
+ Debug.Assert (marks [b].IsDefined, "Regex", "Balancng group not closed");
+
+ if (gid > 0 && capture){
+ Open (gid, marks [b].Index + marks [b].Length);
+ Close (gid, ptr);
+ }
+
+ groups [balance_gid] = marks[b].Previous;
+
+ return true;
+ }
+
+ private int Checkpoint () {
+ mark_start = mark_end;
+ return mark_start;
+ }
+
+ private void Backtrack (int cp) {
+ Debug.Assert (cp > mark_start, "Regex", "Attempt to backtrack forwards");
+
+ for (int i = 0; i < groups.Length; ++ i) {
+ int m = groups [i];
+ while (cp <= m)
+ m = marks [m].Previous;
+
+ groups [i] = m;
+ }
+ }
+
+ private void ResetGroups () {
+ int n = groups.Length;
+ if (marks == null)
+ marks = new Mark [n * 10];
+
+ for (int i = 0; i < n; ++ i) {
+ groups [i] = i;
+
+ marks [i].Start = -1;
+ marks [i].End = -1;
+ marks [i].Previous = -1;
+ }
+
+ mark_start = 0;
+ mark_end = n;
+ }
+
+ private int GetLastDefined (int gid) {
+ int m = groups [gid];
+ while (m >= 0 && !marks [m].IsDefined)
+ m = marks [m].Previous;
+
+ return m;
+ }
+
+ private int CreateMark (int previous) {
+ if (mark_end == marks.Length) {
+ Mark [] dest = new Mark [marks.Length * 2];
+ marks.CopyTo (dest, 0);
+ marks = dest;
+ }
+
+ int m = mark_end ++;
+ marks [m].Start = marks [m].End = -1;
+ marks [m].Previous = previous;
+
+ return m;
+ }
+
+ private void GetGroupInfo (int gid, out int first_mark_index, out int n_caps)
+ {
+ first_mark_index = -1;
+ n_caps = 0;
+ for (int m = groups [gid]; m >= 0; m = marks [m].Previous) {
+ if (!marks [m].IsDefined)
+ continue;
+ if (first_mark_index < 0)
+ first_mark_index = m;
+ ++n_caps;
+ }
+ }
+
+ private void PopulateGroup (Group g, int first_mark_index, int n_caps)
+ {
+ int i = 1;
+ for (int m = marks [first_mark_index].Previous; m >= 0; m = marks [m].Previous) {
+ if (!marks [m].IsDefined)
+ continue;
+ Capture cap = new Capture (text, marks [m].Index, marks [m].Length);
+ g.Captures.SetValue (cap, n_caps - 1 - i);
+ ++i;
+ }
+ }
+
+ private Match GenerateMatch (Regex regex)
+ {
+ int n_caps, first_mark_index;
+ Group g;
+ GetGroupInfo (0, out first_mark_index, out n_caps);
+ Match retval = new Match (regex, this, text, text_end, groups.Length,
+ marks [first_mark_index].Index, marks [first_mark_index].Length, n_caps);
+ PopulateGroup (retval, first_mark_index, n_caps);
+
+ for (int gid = 1; gid < groups.Length; ++ gid) {
+ GetGroupInfo (gid, out first_mark_index, out n_caps);
+ if (first_mark_index < 0) {
+ g = Group.Fail;
+ } else {
+ g = new Group (text, marks [first_mark_index].Index, marks [first_mark_index].Length, n_caps);
+ PopulateGroup (g, first_mark_index, n_caps);
+ }
+ retval.Groups.SetValue (g, gid);
+ }
+ return retval;
+ }
+
+ // interpreter attributes
+
+ private ushort[] program; // regex program
+ private int program_start; // first instruction after info block
+ private string text; // input text
+ private int text_end; // end of input text (last character + 1)
+ private int group_count; // number of capturing groups
+ private int match_min;//, match_max; // match width information
+ private QuickSearch qs; // fast substring matcher
+
+ // match state
+
+ private int scan_ptr; // start of scan
+
+ private RepeatContext repeat; // current repeat context
+ private RepeatContext fast; // fast repeat context
+
+ // Repeat/Until handling
+ private IntStack stack = new IntStack (); // utility stack
+ private RepeatContext deep; // points to the most-nested repeat context
+
+ private Mark[] marks = null; // mark stack
+ private int mark_start; // start of current checkpoint
+ private int mark_end; // end of checkpoint/next free mark
+
+ private int[] groups; // current group definitions
+
+ // private classes
+
+ private struct IntStack {
+ int [] values;
+ int count;
+ public int Pop ()
+ {
+ return values [--count];
+ }
+ public void Push (int value)
+ {
+ if (values == null) {
+ values = new int [8];
+ } else if (count == values.Length) {
+ int new_size = values.Length;
+ new_size += new_size >> 1;
+ int [] new_values = new int [new_size];
+ for (int i = 0; i < count; ++i)
+ new_values [i] = values [i];
+ values = new_values;
+ }
+ values [count++] = value;
+ }
+ public int Top {
+ get { return values [count - 1]; }
+ }
+ public int Count {
+ get { return count; }
+ set {
+ if (value > count)
+ throw new SystemException ("can only truncate the stack");
+ count = value;
+ }
+ }
+ }
+
+ private class RepeatContext {
+ public RepeatContext (RepeatContext previous, int min, int max, bool lazy, int expr_pc) {
+ this.previous = previous;
+ this.min = min;
+ this.max = max;
+ this.lazy = lazy;
+ this.expr_pc = expr_pc;
+
+ this.start = -1;
+ this.count = 0;
+ }
+
+ public int Count {
+ get { return count; }
+ set { count = value; }
+ }
+
+ public int Start {
+ get { return start; }
+ set { start = value; }
+ }
+
+ public bool IsMinimum {
+ get { return min <= count; }
+ }
+
+ public bool IsMaximum {
+ get { return max <= count; }
+ }
+
+ public bool IsLazy {
+ get { return lazy; }
+ }
+
+ public int Expression {
+ get { return expr_pc; }
+ }
+
+ public RepeatContext Previous {
+ get { return previous; }
+ }
+
+ private int start;
+ private int min, max;
+ private bool lazy;
+ private int expr_pc;
+ private RepeatContext previous;
+
+ private int count;
+ }
+
+ private enum Mode {
+ Search,
+ Match,
+ Count
+ }
+ }
+}