3 // namespace: System.Text.RegularExpressions
\r
6 // author: Dan Lewis (dlewis@gmx.co.uk)
\r
10 using System.Collections;
\r
12 namespace System.Text.RegularExpressions {
\r
13 abstract class LinkRef {
\r
17 interface ICompiler {
\r
19 IMachineFactory GetMachineFactory ();
\r
21 // instruction emission
\r
26 // character matching
\r
28 void EmitCharacter (char c, bool negate, bool ignore, bool reverse);
\r
29 void EmitCategory (Category cat, bool negate, bool reverse);
\r
30 void EmitRange (char lo, char hi, bool negate, bool ignore, bool reverse);
\r
31 void EmitSet (char lo, BitArray set, bool negate, bool ignore, bool reverse);
\r
35 void EmitString (string str, bool ignore, bool reverse);
\r
36 void EmitPosition (Position pos);
\r
37 void EmitOpen (int gid);
\r
38 void EmitClose (int gid);
\r
39 void EmitBalance (int gid, int balance);
\r
40 void EmitReference (int gid, bool ignore, bool reverse);
\r
44 void EmitIfDefined (int gid, LinkRef tail);
\r
45 void EmitSub (LinkRef tail);
\r
46 void EmitTest (LinkRef yes, LinkRef tail);
\r
47 void EmitBranch (LinkRef next);
\r
48 void EmitJump (LinkRef target);
\r
49 void EmitRepeat (int min, int max, bool lazy, LinkRef until);
\r
50 void EmitUntil (LinkRef repeat);
\r
51 void EmitIn (LinkRef tail);
\r
52 void EmitInfo (int count, int min, int max);
\r
53 void EmitFastRepeat (int min, int max, bool lazy, LinkRef tail);
\r
54 void EmitAnchor (bool reverse, int offset, LinkRef tail);
\r
56 // event for the CILCompiler
\r
57 void EmitBranchEnd();
\r
58 void EmitAlternationEnd();
\r
61 void ResolveLink (LinkRef link);
\r
64 class InterpreterFactory : IMachineFactory {
\r
65 public InterpreterFactory (ushort[] pattern) {
\r
66 this.pattern = pattern;
\r
69 public IMachine NewInstance () {
\r
70 return new Interpreter (pattern);
\r
73 public int GroupCount {
\r
74 get { return pattern[0]; }
\r
77 public IDictionary Mapping {
\r
78 get { return mapping; }
\r
79 set { mapping = value; }
\r
82 private IDictionary mapping;
\r
83 private ushort[] pattern;
\r
86 class PatternCompiler : ICompiler {
\r
87 public static ushort EncodeOp (OpCode op, OpFlags flags) {
\r
88 return (ushort)((int)op | ((int)flags & 0xff00));
\r
91 public static void DecodeOp (ushort word, out OpCode op, out OpFlags flags) {
\r
92 op = (OpCode)(word & 0x00ff);
\r
93 flags = (OpFlags)(word & 0xff00);
\r
96 public PatternCompiler () {
\r
97 pgm = new ArrayList ();
\r
100 // ICompiler implementation
\r
102 public void Reset () {
\r
106 public IMachineFactory GetMachineFactory () {
\r
107 ushort[] image = new ushort[pgm.Count];
\r
108 pgm.CopyTo (image);
\r
110 return new InterpreterFactory (image);
\r
113 public void EmitFalse () {
\r
114 Emit (OpCode.False);
\r
117 public void EmitTrue () {
\r
118 Emit (OpCode.True);
\r
121 public void EmitCharacter (char c, bool negate, bool ignore, bool reverse) {
\r
122 Emit (OpCode.Character, MakeFlags (negate, ignore, reverse, false));
\r
125 c = Char.ToLower (c);
\r
130 public void EmitCategory (Category cat, bool negate, bool reverse) {
\r
131 Emit (OpCode.Category, MakeFlags (negate, false, reverse, false));
\r
132 Emit ((ushort)cat);
\r
135 public void EmitRange (char lo, char hi, bool negate, bool ignore, bool reverse) {
\r
136 Emit (OpCode.Range, MakeFlags (negate, ignore, reverse, false));
\r
141 public void EmitSet (char lo, BitArray set, bool negate, bool ignore, bool reverse) {
\r
142 Emit (OpCode.Set, MakeFlags (negate, ignore, reverse, false));
\r
145 int len = (set.Length + 0xf) >> 4;
\r
146 Emit ((ushort)len);
\r
149 while (len -- != 0) {
\r
151 for (int i = 0; i < 16; ++ i) {
\r
152 if (b >= set.Length)
\r
156 word |= (ushort)(1 << i);
\r
163 public void EmitString (string str, bool ignore, bool reverse) {
\r
164 Emit (OpCode.String, MakeFlags (false, ignore, reverse, false));
\r
165 int len = str.Length;
\r
166 Emit ((ushort)len);
\r
169 str = str.ToLower ();
\r
171 for (int i = 0; i < len; ++ i)
\r
172 Emit ((ushort)str[i]);
\r
175 public void EmitPosition (Position pos) {
\r
176 Emit (OpCode.Position, 0);
\r
177 Emit ((ushort)pos);
\r
180 public void EmitOpen (int gid) {
\r
181 Emit (OpCode.Open);
\r
182 Emit ((ushort)gid);
\r
185 public void EmitClose (int gid) {
\r
186 Emit (OpCode.Close);
\r
187 Emit ((ushort)gid);
\r
190 public void EmitBalance (int gid, int balance) {
\r
191 Emit (OpCode.Balance);
\r
192 Emit ((ushort)gid);
\r
193 Emit ((ushort)balance);
\r
196 public void EmitReference (int gid, bool ignore, bool reverse) {
\r
197 Emit (OpCode.Reference, MakeFlags (false, ignore, reverse, false));
\r
198 Emit ((ushort)gid);
\r
201 public void EmitIfDefined (int gid, LinkRef tail) {
\r
203 Emit (OpCode.IfDefined);
\r
205 Emit ((ushort)gid);
\r
208 public void EmitSub (LinkRef tail) {
\r
214 public void EmitTest (LinkRef yes, LinkRef tail) {
\r
217 Emit (OpCode.Test);
\r
222 public void EmitBranch (LinkRef next) {
\r
224 Emit (OpCode.Branch, 0);
\r
228 public void EmitJump (LinkRef target) {
\r
229 BeginLink (target);
\r
230 Emit (OpCode.Jump, 0);
\r
234 public void EmitRepeat (int min, int max, bool lazy, LinkRef until) {
\r
236 Emit (OpCode.Repeat, MakeFlags (false, false, false, lazy));
\r
238 Emit ((ushort)min);
\r
239 Emit ((ushort)max);
\r
242 public void EmitUntil (LinkRef repeat) {
\r
243 ResolveLink (repeat);
\r
244 Emit (OpCode.Until);
\r
247 public void EmitFastRepeat (int min, int max, bool lazy, LinkRef tail) {
\r
249 Emit (OpCode.FastRepeat, MakeFlags (false, false, false, lazy));
\r
251 Emit ((ushort)min);
\r
252 Emit ((ushort)max);
\r
255 public void EmitIn (LinkRef tail) {
\r
261 public void EmitAnchor (bool reverse, int offset, LinkRef tail) {
\r
263 Emit (OpCode.Anchor, MakeFlags(false, false, reverse, false));
\r
265 Emit ((ushort)offset);
\r
268 public void EmitInfo (int count, int min, int max) {
\r
269 Emit (OpCode.Info);
\r
270 Emit ((ushort)count);
\r
271 Emit ((ushort)min);
\r
272 Emit ((ushort)max);
\r
275 public LinkRef NewLink () {
\r
276 return new PatternLinkStack ();
\r
279 public void ResolveLink (LinkRef lref) {
\r
280 PatternLinkStack stack = (PatternLinkStack)lref;
\r
282 while (stack.Pop ())
\r
283 pgm[stack.OffsetAddress] = (ushort)stack.GetOffset (CurrentAddress);
\r
286 public void EmitBranchEnd(){}
\r
287 public void EmitAlternationEnd(){}
\r
291 private static OpFlags MakeFlags (bool negate, bool ignore, bool reverse, bool lazy) {
\r
293 if (negate) flags |= OpFlags.Negate;
\r
294 if (ignore) flags |= OpFlags.IgnoreCase;
\r
295 if (reverse) flags |= OpFlags.RightToLeft;
\r
296 if (lazy) flags |= OpFlags.Lazy;
\r
301 private void Emit (OpCode op) {
\r
302 Emit (op, (OpFlags)0);
\r
305 private void Emit (OpCode op, OpFlags flags) {
\r
306 Emit (EncodeOp (op, flags));
\r
309 private void Emit (ushort word) {
\r
313 private int CurrentAddress {
\r
314 get { return pgm.Count; }
\r
317 private void BeginLink (LinkRef lref) {
\r
318 PatternLinkStack stack = (PatternLinkStack)lref;
\r
319 stack.BaseAddress = CurrentAddress;
\r
322 private void EmitLink (LinkRef lref) {
\r
323 PatternLinkStack stack = (PatternLinkStack)lref;
\r
324 stack.OffsetAddress = CurrentAddress;
\r
325 Emit ((ushort)0); // placeholder
\r
330 private class PatternLinkStack : LinkStack {
\r
331 public PatternLinkStack () {
\r
334 public int BaseAddress {
\r
335 set { link.base_addr = value; }
\r
338 public int OffsetAddress {
\r
339 get { return link.offset_addr; }
\r
340 set { link.offset_addr = value; }
\r
343 public int GetOffset (int target_addr) {
\r
344 return target_addr - link.base_addr;
\r
347 // LinkStack implementation
\r
349 protected override object GetCurrent () { return link; }
\r
350 protected override void SetCurrent (object l) { link = (Link)l; }
\r
352 private struct Link {
\r
353 public int base_addr;
\r
354 public int offset_addr;
\r
360 private ArrayList pgm;
\r
363 abstract class LinkStack : LinkRef {
\r
364 public LinkStack () {
\r
365 stack = new Stack ();
\r
368 public void Push () {
\r
369 stack.Push (GetCurrent ());
\r
372 public bool Pop () {
\r
373 if (stack.Count > 0) {
\r
374 SetCurrent (stack.Pop ());
\r
381 protected abstract object GetCurrent ();
\r
382 protected abstract void SetCurrent (object l);
\r
384 private Stack stack;
\r
387 //Used by CILCompiler and Interpreter
\r
388 internal struct Mark {
\r
389 public int Start, End;
\r
390 public int Previous;
\r
392 public bool IsDefined {
\r
393 get { return Start >= 0 && End >= 0; }
\r
397 get { return Start < End ? Start : End; }
\r
400 public int Length {
\r
401 get { return Start < End ? End - Start : Start - End; }
\r