3 // namespace: System.Text.RegularExpressions
6 // author: Dan Lewis (dlewis@gmx.co.uk)
10 // Permission is hereby granted, free of charge, to any person obtaining
11 // a copy of this software and associated documentation files (the
12 // "Software"), to deal in the Software without restriction, including
13 // without limitation the rights to use, copy, modify, merge, publish,
14 // distribute, sublicense, and/or sell copies of the Software, and to
15 // permit persons to whom the Software is furnished to do so, subject to
16 // the following conditions:
18 // The above copyright notice and this permission notice shall be
19 // included in all copies or substantial portions of the Software.
21 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 using System.Diagnostics;
32 using System.Collections;
34 namespace System.Text.RegularExpressions {
35 abstract class LinkRef {
37 static int next_label = 1;
38 int label = next_label++;
40 public override string ToString ()
42 return "L" + label.ToString ();
49 IMachineFactory GetMachineFactory ();
51 // instruction emission
58 void EmitCharacter (char c, bool negate, bool ignore, bool reverse);
59 void EmitCategory (Category cat, bool negate, bool reverse);
60 void EmitNotCategory (Category cat, bool negate, bool reverse);
61 void EmitRange (char lo, char hi, bool negate, bool ignore, bool reverse);
62 void EmitSet (char lo, BitArray set, bool negate, bool ignore, bool reverse);
66 void EmitString (string str, bool ignore, bool reverse);
67 void EmitPosition (Position pos);
68 void EmitOpen (int gid);
69 void EmitClose (int gid);
70 void EmitBalanceStart(int gid, int balance, bool capture, LinkRef tail);
72 void EmitReference (int gid, bool ignore, bool reverse);
76 void EmitIfDefined (int gid, LinkRef tail);
77 void EmitSub (LinkRef tail);
78 void EmitTest (LinkRef yes, LinkRef tail);
79 void EmitBranch (LinkRef next);
80 void EmitJump (LinkRef target);
81 void EmitRepeat (int min, int max, bool lazy, LinkRef until);
82 void EmitUntil (LinkRef repeat);
83 void EmitIn (LinkRef tail);
84 void EmitInfo (int count, int min, int max);
85 void EmitFastRepeat (int min, int max, bool lazy, LinkRef tail);
86 void EmitAnchor (bool reverse, int offset, LinkRef tail);
88 // event for the CILCompiler
90 void EmitAlternationEnd();
93 void ResolveLink (LinkRef link);
96 class InterpreterFactory : IMachineFactory {
97 public InterpreterFactory (ushort[] pattern) {
98 this.pattern = pattern;
101 public IMachine NewInstance () {
102 return new Interpreter (pattern);
105 public int GroupCount {
106 get { return pattern[1]; }
114 public IDictionary Mapping {
115 get { return mapping; }
116 set { mapping = value; }
119 public string [] NamesMapping {
120 get { return namesMapping; }
121 set { namesMapping = value; }
124 private IDictionary mapping;
125 private ushort[] pattern;
126 private string [] namesMapping;
130 class PatternCompiler : ICompiler {
131 public static ushort EncodeOp (OpCode op, OpFlags flags) {
132 return (ushort)((int)op | ((int)flags & 0xff00));
135 [Conditional ("TRACE_REGEX")]
136 static void TraceRegexp (string fmt, params object[] args)
138 Console.Write ("\t");
139 Console.WriteLine (fmt, args);
142 [Conditional ("TRACE_REGEX")]
143 static void TraceRegexpLabel (LinkRef lref)
145 Console.Write ("{0}:", lref);
148 public static void DecodeOp (ushort word, out OpCode op, out OpFlags flags) {
149 op = (OpCode)(word & 0x00ff);
150 flags = (OpFlags)(word & 0xff00);
153 public PatternCompiler () {
154 pgm = new ArrayList ();
157 // ICompiler implementation
159 public void Reset () {
163 public IMachineFactory GetMachineFactory () {
164 ushort[] image = new ushort[pgm.Count];
167 return new InterpreterFactory (image);
170 public void EmitFalse () {
172 TraceRegexp ("false");
175 public void EmitTrue () {
178 TraceRegexp ("true");
181 void EmitCount (int count)
183 uint ucount = (uint) count;
184 Emit ((ushort) (ucount & 0xFFFF)); // lo 16bits
185 Emit ((ushort) (ucount >> 16)); // hi
188 public void EmitCharacter (char c, bool negate, bool ignore, bool reverse) {
189 Emit (OpCode.Character, MakeFlags (negate, ignore, reverse, false));
192 c = Char.ToLower (c);
196 TraceRegexp ("character {0} negate {1} ignore {2} reverse {3}", c, negate, ignore, reverse);
199 public void EmitCategory (Category cat, bool negate, bool reverse) {
200 Emit (OpCode.Category, MakeFlags (negate, false, reverse, false));
203 TraceRegexp ("category {0} negate {1} reverse {2}", cat, negate, reverse);
206 public void EmitNotCategory (Category cat, bool negate, bool reverse) {
207 Emit (OpCode.NotCategory, MakeFlags (negate, false, reverse, false));
210 TraceRegexp ("not category {0} negate {1} reverse {2}", cat, negate, reverse);
213 public void EmitRange (char lo, char hi, bool negate, bool ignore, bool reverse) {
214 Emit (OpCode.Range, MakeFlags (negate, ignore, reverse, false));
218 TraceRegexp ("char range '{0}' - '{1}' negate {2} ignore {3} reverse {4}", lo, hi, negate, ignore, reverse);
221 public void EmitSet (char lo, BitArray set, bool negate, bool ignore, bool reverse) {
222 Emit (OpCode.Set, MakeFlags (negate, ignore, reverse, false));
225 int len = (set.Length + 0xf) >> 4;
229 while (len -- != 0) {
231 for (int i = 0; i < 16; ++ i) {
236 word |= (ushort)(1 << i);
242 TraceRegexp ("set lo '{0}' - '{1}' negate {2} ignore {3} reverse {4}", lo, set, negate, ignore, reverse);
245 public void EmitString (string str, bool ignore, bool reverse) {
246 Emit (OpCode.String, MakeFlags (false, ignore, reverse, false));
247 int len = str.Length;
251 str = str.ToLower ();
253 for (int i = 0; i < len; ++ i)
254 Emit ((ushort)str[i]);
255 TraceRegexp ("string '{0}' ignore {1} reverse {2}", str, ignore, reverse);
258 public void EmitPosition (Position pos) {
259 Emit (OpCode.Position, 0);
262 TraceRegexp ("position {0}", pos);
265 public void EmitOpen (int gid) {
269 TraceRegexp ("open {0}", gid);
272 public void EmitClose (int gid) {
276 TraceRegexp ("close {0}", gid);
281 public void EmitBalanceStart (int gid, int balance, bool capture, LinkRef tail) {
283 Emit (OpCode.BalanceStart);
285 Emit ((ushort)balance);
286 Emit ((ushort)(capture ? 1 : 0));
289 TraceRegexp ("balance start gid {0} balance {1} capture {2} tail {3}", gid, balance, capture, tail);
292 public void EmitBalance () {
293 Emit (OpCode.Balance);
295 TraceRegexp ("balance");
298 public void EmitReference (int gid, bool ignore, bool reverse) {
299 Emit (OpCode.Reference, MakeFlags (false, ignore, reverse, false));
302 TraceRegexp ("reference gid {0} ignore {1} reverse {2}", gid, ignore, reverse);
305 public void EmitIfDefined (int gid, LinkRef tail) {
307 Emit (OpCode.IfDefined);
311 TraceRegexp ("if defined gid {1} tail {2}", gid, tail);
314 public void EmitSub (LinkRef tail) {
319 TraceRegexp ("sub {0}", tail);
322 public void EmitTest (LinkRef yes, LinkRef tail) {
329 TraceRegexp ("test yes {0} tail {1}", yes, tail);
332 public void EmitBranch (LinkRef next) {
334 Emit (OpCode.Branch, 0);
337 TraceRegexp ("branch next {0}", next);
340 public void EmitJump (LinkRef target) {
342 Emit (OpCode.Jump, 0);
345 TraceRegexp ("jmp target {0}", target);
348 public void EmitRepeat (int min, int max, bool lazy, LinkRef until) {
350 Emit (OpCode.Repeat, MakeFlags (false, false, false, lazy));
355 TraceRegexp ("repeat min {0} max {1} lazy {2} until {3}", min, max, lazy, until);
358 public void EmitUntil (LinkRef repeat) {
359 ResolveLink (repeat);
362 TraceRegexp ("end until {0}", repeat);
365 public void EmitFastRepeat (int min, int max, bool lazy, LinkRef tail) {
367 Emit (OpCode.FastRepeat, MakeFlags (false, false, false, lazy));
372 TraceRegexp ("repeat-fast min {0} max {1} lazy {2} tail {3}", min, max, lazy, tail);
375 public void EmitIn (LinkRef tail) {
380 TraceRegexp ("in tail {0}", tail);
383 public void EmitAnchor (bool reverse, int offset, LinkRef tail) {
385 Emit (OpCode.Anchor, MakeFlags(false, false, reverse, false));
387 Emit ((ushort)offset);
389 TraceRegexp ("anchor reverse {0} offset {1} tail {2}", reverse, offset, tail);
392 public void EmitInfo (int count, int min, int max) {
398 TraceRegexp ("info group count {0} match_min {1} match_max {2}", count, min, max);
401 public LinkRef NewLink () {
402 return new PatternLinkStack ();
405 public void ResolveLink (LinkRef lref) {
406 PatternLinkStack stack = (PatternLinkStack)lref;
409 pgm[stack.OffsetAddress] = (ushort)stack.GetOffset (CurrentAddress);
411 TraceRegexpLabel (lref);
414 public void EmitBranchEnd(){}
415 public void EmitAlternationEnd(){}
419 private static OpFlags MakeFlags (bool negate, bool ignore, bool reverse, bool lazy) {
421 if (negate) flags |= OpFlags.Negate;
422 if (ignore) flags |= OpFlags.IgnoreCase;
423 if (reverse) flags |= OpFlags.RightToLeft;
424 if (lazy) flags |= OpFlags.Lazy;
429 private void Emit (OpCode op) {
430 Emit (op, (OpFlags)0);
433 private void Emit (OpCode op, OpFlags flags) {
434 Emit (EncodeOp (op, flags));
437 private void Emit (ushort word) {
441 private int CurrentAddress {
442 get { return pgm.Count; }
445 private void BeginLink (LinkRef lref) {
446 PatternLinkStack stack = (PatternLinkStack)lref;
447 stack.BaseAddress = CurrentAddress;
450 private void EmitLink (LinkRef lref) {
451 PatternLinkStack stack = (PatternLinkStack)lref;
452 stack.OffsetAddress = CurrentAddress;
453 Emit ((ushort)0); // placeholder
458 private class PatternLinkStack : LinkStack {
459 public PatternLinkStack () {
462 public int BaseAddress {
463 set { link.base_addr = value; }
466 public int OffsetAddress {
467 get { return link.offset_addr; }
468 set { link.offset_addr = value; }
471 public int GetOffset (int target_addr) {
472 return target_addr - link.base_addr;
475 // LinkStack implementation
477 protected override object GetCurrent () { return link; }
478 protected override void SetCurrent (object l) { link = (Link)l; }
480 private struct Link {
481 public int base_addr;
482 public int offset_addr;
488 private ArrayList pgm;
491 abstract class LinkStack : LinkRef {
492 public LinkStack () {
493 stack = new Stack ();
496 public void Push () {
497 stack.Push (GetCurrent ());
501 if (stack.Count > 0) {
502 SetCurrent (stack.Pop ());
509 protected abstract object GetCurrent ();
510 protected abstract void SetCurrent (object l);
515 //Used by CILCompiler and Interpreter
516 internal struct Mark {
517 public int Start, End;
520 public bool IsDefined {
521 get { return Start >= 0 && End >= 0; }
525 get { return Start < End ? Start : End; }
529 get { return Start < End ? End - Start : Start - End; }