3 // namespace: System.Text.RegularExpressions
6 // author: Dan Lewis (dlewis@gmx.co.uk)
10 using System.Collections;
12 namespace System.Text.RegularExpressions {
13 abstract class LinkRef {
19 IMachineFactory GetMachineFactory ();
21 // instruction emission
28 void EmitCharacter (char c, bool negate, bool ignore, bool reverse);
29 void EmitCategory (Category cat, bool negate, bool reverse);
30 void EmitRange (char lo, char hi, bool negate, bool ignore, bool reverse);
31 void EmitSet (char lo, BitArray set, bool negate, bool ignore, bool reverse);
35 void EmitString (string str, bool ignore, bool reverse);
36 void EmitPosition (Position pos);
37 void EmitOpen (int gid);
38 void EmitClose (int gid);
39 void EmitBalanceStart(int gid, int balance, bool capture, LinkRef tail);
41 void EmitReference (int gid, bool ignore, bool reverse);
45 void EmitIfDefined (int gid, LinkRef tail);
46 void EmitSub (LinkRef tail);
47 void EmitTest (LinkRef yes, LinkRef tail);
48 void EmitBranch (LinkRef next);
49 void EmitJump (LinkRef target);
50 void EmitRepeat (int min, int max, bool lazy, LinkRef until);
51 void EmitUntil (LinkRef repeat);
52 void EmitIn (LinkRef tail);
53 void EmitInfo (int count, int min, int max);
54 void EmitFastRepeat (int min, int max, bool lazy, LinkRef tail);
55 void EmitAnchor (bool reverse, int offset, LinkRef tail);
57 // event for the CILCompiler
59 void EmitAlternationEnd();
62 void ResolveLink (LinkRef link);
65 class InterpreterFactory : IMachineFactory {
66 public InterpreterFactory (ushort[] pattern) {
67 this.pattern = pattern;
70 public IMachine NewInstance () {
71 return new Interpreter (pattern);
74 public int GroupCount {
75 get { return pattern[0]; }
78 public IDictionary Mapping {
79 get { return mapping; }
80 set { mapping = value; }
83 private IDictionary mapping;
84 private ushort[] pattern;
87 class PatternCompiler : ICompiler {
88 public static ushort EncodeOp (OpCode op, OpFlags flags) {
89 return (ushort)((int)op | ((int)flags & 0xff00));
92 public static void DecodeOp (ushort word, out OpCode op, out OpFlags flags) {
93 op = (OpCode)(word & 0x00ff);
94 flags = (OpFlags)(word & 0xff00);
97 public PatternCompiler () {
98 pgm = new ArrayList ();
101 // ICompiler implementation
103 public void Reset () {
107 public IMachineFactory GetMachineFactory () {
108 ushort[] image = new ushort[pgm.Count];
111 return new InterpreterFactory (image);
114 public void EmitFalse () {
118 public void EmitTrue () {
122 public void EmitCharacter (char c, bool negate, bool ignore, bool reverse) {
123 Emit (OpCode.Character, MakeFlags (negate, ignore, reverse, false));
126 c = Char.ToLower (c);
131 public void EmitCategory (Category cat, bool negate, bool reverse) {
132 Emit (OpCode.Category, MakeFlags (negate, false, reverse, false));
136 public void EmitRange (char lo, char hi, bool negate, bool ignore, bool reverse) {
137 Emit (OpCode.Range, MakeFlags (negate, ignore, reverse, false));
142 public void EmitSet (char lo, BitArray set, bool negate, bool ignore, bool reverse) {
143 Emit (OpCode.Set, MakeFlags (negate, ignore, reverse, false));
146 int len = (set.Length + 0xf) >> 4;
150 while (len -- != 0) {
152 for (int i = 0; i < 16; ++ i) {
157 word |= (ushort)(1 << i);
164 public void EmitString (string str, bool ignore, bool reverse) {
165 Emit (OpCode.String, MakeFlags (false, ignore, reverse, false));
166 int len = str.Length;
170 str = str.ToLower ();
172 for (int i = 0; i < len; ++ i)
173 Emit ((ushort)str[i]);
176 public void EmitPosition (Position pos) {
177 Emit (OpCode.Position, 0);
181 public void EmitOpen (int gid) {
186 public void EmitClose (int gid) {
193 public void EmitBalanceStart (int gid, int balance, bool capture, LinkRef tail) {
195 Emit (OpCode.BalanceStart);
197 Emit ((ushort)balance);
198 Emit ((ushort)(capture ? 1 : 0));
202 public void EmitBalance () {
203 Emit (OpCode.Balance);
206 public void EmitReference (int gid, bool ignore, bool reverse) {
207 Emit (OpCode.Reference, MakeFlags (false, ignore, reverse, false));
211 public void EmitIfDefined (int gid, LinkRef tail) {
213 Emit (OpCode.IfDefined);
218 public void EmitSub (LinkRef tail) {
224 public void EmitTest (LinkRef yes, LinkRef tail) {
232 public void EmitBranch (LinkRef next) {
234 Emit (OpCode.Branch, 0);
238 public void EmitJump (LinkRef target) {
240 Emit (OpCode.Jump, 0);
244 public void EmitRepeat (int min, int max, bool lazy, LinkRef until) {
246 Emit (OpCode.Repeat, MakeFlags (false, false, false, lazy));
252 public void EmitUntil (LinkRef repeat) {
253 ResolveLink (repeat);
257 public void EmitFastRepeat (int min, int max, bool lazy, LinkRef tail) {
259 Emit (OpCode.FastRepeat, MakeFlags (false, false, false, lazy));
265 public void EmitIn (LinkRef tail) {
271 public void EmitAnchor (bool reverse, int offset, LinkRef tail) {
273 Emit (OpCode.Anchor, MakeFlags(false, false, reverse, false));
275 Emit ((ushort)offset);
278 public void EmitInfo (int count, int min, int max) {
280 Emit ((ushort)count);
285 public LinkRef NewLink () {
286 return new PatternLinkStack ();
289 public void ResolveLink (LinkRef lref) {
290 PatternLinkStack stack = (PatternLinkStack)lref;
293 pgm[stack.OffsetAddress] = (ushort)stack.GetOffset (CurrentAddress);
296 public void EmitBranchEnd(){}
297 public void EmitAlternationEnd(){}
301 private static OpFlags MakeFlags (bool negate, bool ignore, bool reverse, bool lazy) {
303 if (negate) flags |= OpFlags.Negate;
304 if (ignore) flags |= OpFlags.IgnoreCase;
305 if (reverse) flags |= OpFlags.RightToLeft;
306 if (lazy) flags |= OpFlags.Lazy;
311 private void Emit (OpCode op) {
312 Emit (op, (OpFlags)0);
315 private void Emit (OpCode op, OpFlags flags) {
316 Emit (EncodeOp (op, flags));
319 private void Emit (ushort word) {
323 private int CurrentAddress {
324 get { return pgm.Count; }
327 private void BeginLink (LinkRef lref) {
328 PatternLinkStack stack = (PatternLinkStack)lref;
329 stack.BaseAddress = CurrentAddress;
332 private void EmitLink (LinkRef lref) {
333 PatternLinkStack stack = (PatternLinkStack)lref;
334 stack.OffsetAddress = CurrentAddress;
335 Emit ((ushort)0); // placeholder
340 private class PatternLinkStack : LinkStack {
341 public PatternLinkStack () {
344 public int BaseAddress {
345 set { link.base_addr = value; }
348 public int OffsetAddress {
349 get { return link.offset_addr; }
350 set { link.offset_addr = value; }
353 public int GetOffset (int target_addr) {
354 return target_addr - link.base_addr;
357 // LinkStack implementation
359 protected override object GetCurrent () { return link; }
360 protected override void SetCurrent (object l) { link = (Link)l; }
362 private struct Link {
363 public int base_addr;
364 public int offset_addr;
370 private ArrayList pgm;
373 abstract class LinkStack : LinkRef {
374 public LinkStack () {
375 stack = new Stack ();
378 public void Push () {
379 stack.Push (GetCurrent ());
383 if (stack.Count > 0) {
384 SetCurrent (stack.Pop ());
391 protected abstract object GetCurrent ();
392 protected abstract void SetCurrent (object l);
397 //Used by CILCompiler and Interpreter
398 internal struct Mark {
399 public int Start, End;
402 public bool IsDefined {
403 get { return Start >= 0 && End >= 0; }
407 get { return Start < End ? Start : End; }
411 get { return Start < End ? End - Start : Start - End; }