3 // namespace: System.Text.RegularExpressions
6 // author: Dan Lewis (dlewis@gmx.co.uk)
10 // Permission is hereby granted, free of charge, to any person obtaining
11 // a copy of this software and associated documentation files (the
12 // "Software"), to deal in the Software without restriction, including
13 // without limitation the rights to use, copy, modify, merge, publish,
14 // distribute, sublicense, and/or sell copies of the Software, and to
15 // permit persons to whom the Software is furnished to do so, subject to
16 // the following conditions:
18 // The above copyright notice and this permission notice shall be
19 // included in all copies or substantial portions of the Software.
21 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 using System.Collections;
33 namespace System.Text.RegularExpressions {
34 abstract class LinkRef {
40 IMachineFactory GetMachineFactory ();
42 // instruction emission
49 void EmitCharacter (char c, bool negate, bool ignore, bool reverse);
50 void EmitCategory (Category cat, bool negate, bool reverse);
51 void EmitNotCategory (Category cat, bool negate, bool reverse);
52 void EmitRange (char lo, char hi, bool negate, bool ignore, bool reverse);
53 void EmitSet (char lo, BitArray set, bool negate, bool ignore, bool reverse);
57 void EmitString (string str, bool ignore, bool reverse);
58 void EmitPosition (Position pos);
59 void EmitOpen (int gid);
60 void EmitClose (int gid);
61 void EmitBalanceStart(int gid, int balance, bool capture, LinkRef tail);
63 void EmitReference (int gid, bool ignore, bool reverse);
67 void EmitIfDefined (int gid, LinkRef tail);
68 void EmitSub (LinkRef tail);
69 void EmitTest (LinkRef yes, LinkRef tail);
70 void EmitBranch (LinkRef next);
71 void EmitJump (LinkRef target);
72 void EmitRepeat (int min, int max, bool lazy, LinkRef until);
73 void EmitUntil (LinkRef repeat);
74 void EmitIn (LinkRef tail);
75 void EmitInfo (int count, int min, int max);
76 void EmitFastRepeat (int min, int max, bool lazy, LinkRef tail);
77 void EmitAnchor (bool reverse, int offset, LinkRef tail);
79 // event for the CILCompiler
81 void EmitAlternationEnd();
84 void ResolveLink (LinkRef link);
87 class InterpreterFactory : IMachineFactory {
88 public InterpreterFactory (ushort[] pattern) {
89 this.pattern = pattern;
92 public IMachine NewInstance () {
93 return new Interpreter (pattern);
96 public int GroupCount {
97 get { return pattern[1]; }
100 public IDictionary Mapping {
101 get { return mapping; }
102 set { mapping = value; }
105 public string [] NamesMapping {
106 get { return namesMapping; }
107 set { namesMapping = value; }
110 private IDictionary mapping;
111 private ushort[] pattern;
112 private string [] namesMapping;
115 class PatternCompiler : ICompiler {
116 public static ushort EncodeOp (OpCode op, OpFlags flags) {
117 return (ushort)((int)op | ((int)flags & 0xff00));
120 public static void DecodeOp (ushort word, out OpCode op, out OpFlags flags) {
121 op = (OpCode)(word & 0x00ff);
122 flags = (OpFlags)(word & 0xff00);
125 public PatternCompiler () {
126 pgm = new ArrayList ();
129 // ICompiler implementation
131 public void Reset () {
135 public IMachineFactory GetMachineFactory () {
136 ushort[] image = new ushort[pgm.Count];
139 return new InterpreterFactory (image);
142 public void EmitFalse () {
146 public void EmitTrue () {
150 void EmitCount (int count)
152 uint ucount = (uint) count;
153 Emit ((ushort) (ucount & 0xFFFF)); // lo 16bits
154 Emit ((ushort) (ucount >> 16)); // hi
157 public void EmitCharacter (char c, bool negate, bool ignore, bool reverse) {
158 Emit (OpCode.Character, MakeFlags (negate, ignore, reverse, false));
161 c = Char.ToLower (c);
166 public void EmitCategory (Category cat, bool negate, bool reverse) {
167 Emit (OpCode.Category, MakeFlags (negate, false, reverse, false));
171 public void EmitNotCategory (Category cat, bool negate, bool reverse) {
172 Emit (OpCode.NotCategory, MakeFlags (negate, false, reverse, false));
176 public void EmitRange (char lo, char hi, bool negate, bool ignore, bool reverse) {
177 Emit (OpCode.Range, MakeFlags (negate, ignore, reverse, false));
182 public void EmitSet (char lo, BitArray set, bool negate, bool ignore, bool reverse) {
183 Emit (OpCode.Set, MakeFlags (negate, ignore, reverse, false));
186 int len = (set.Length + 0xf) >> 4;
190 while (len -- != 0) {
192 for (int i = 0; i < 16; ++ i) {
197 word |= (ushort)(1 << i);
204 public void EmitString (string str, bool ignore, bool reverse) {
205 Emit (OpCode.String, MakeFlags (false, ignore, reverse, false));
206 int len = str.Length;
210 str = str.ToLower ();
212 for (int i = 0; i < len; ++ i)
213 Emit ((ushort)str[i]);
216 public void EmitPosition (Position pos) {
217 Emit (OpCode.Position, 0);
221 public void EmitOpen (int gid) {
226 public void EmitClose (int gid) {
233 public void EmitBalanceStart (int gid, int balance, bool capture, LinkRef tail) {
235 Emit (OpCode.BalanceStart);
237 Emit ((ushort)balance);
238 Emit ((ushort)(capture ? 1 : 0));
242 public void EmitBalance () {
243 Emit (OpCode.Balance);
246 public void EmitReference (int gid, bool ignore, bool reverse) {
247 Emit (OpCode.Reference, MakeFlags (false, ignore, reverse, false));
251 public void EmitIfDefined (int gid, LinkRef tail) {
253 Emit (OpCode.IfDefined);
258 public void EmitSub (LinkRef tail) {
264 public void EmitTest (LinkRef yes, LinkRef tail) {
272 public void EmitBranch (LinkRef next) {
274 Emit (OpCode.Branch, 0);
278 public void EmitJump (LinkRef target) {
280 Emit (OpCode.Jump, 0);
284 public void EmitRepeat (int min, int max, bool lazy, LinkRef until) {
286 Emit (OpCode.Repeat, MakeFlags (false, false, false, lazy));
292 public void EmitUntil (LinkRef repeat) {
293 ResolveLink (repeat);
297 public void EmitFastRepeat (int min, int max, bool lazy, LinkRef tail) {
299 Emit (OpCode.FastRepeat, MakeFlags (false, false, false, lazy));
305 public void EmitIn (LinkRef tail) {
311 public void EmitAnchor (bool reverse, int offset, LinkRef tail) {
313 Emit (OpCode.Anchor, MakeFlags(false, false, reverse, false));
315 Emit ((ushort)offset);
318 public void EmitInfo (int count, int min, int max) {
325 public LinkRef NewLink () {
326 return new PatternLinkStack ();
329 public void ResolveLink (LinkRef lref) {
330 PatternLinkStack stack = (PatternLinkStack)lref;
333 pgm[stack.OffsetAddress] = (ushort)stack.GetOffset (CurrentAddress);
336 public void EmitBranchEnd(){}
337 public void EmitAlternationEnd(){}
341 private static OpFlags MakeFlags (bool negate, bool ignore, bool reverse, bool lazy) {
343 if (negate) flags |= OpFlags.Negate;
344 if (ignore) flags |= OpFlags.IgnoreCase;
345 if (reverse) flags |= OpFlags.RightToLeft;
346 if (lazy) flags |= OpFlags.Lazy;
351 private void Emit (OpCode op) {
352 Emit (op, (OpFlags)0);
355 private void Emit (OpCode op, OpFlags flags) {
356 Emit (EncodeOp (op, flags));
359 private void Emit (ushort word) {
363 private int CurrentAddress {
364 get { return pgm.Count; }
367 private void BeginLink (LinkRef lref) {
368 PatternLinkStack stack = (PatternLinkStack)lref;
369 stack.BaseAddress = CurrentAddress;
372 private void EmitLink (LinkRef lref) {
373 PatternLinkStack stack = (PatternLinkStack)lref;
374 stack.OffsetAddress = CurrentAddress;
375 Emit ((ushort)0); // placeholder
380 private class PatternLinkStack : LinkStack {
381 public PatternLinkStack () {
384 public int BaseAddress {
385 set { link.base_addr = value; }
388 public int OffsetAddress {
389 get { return link.offset_addr; }
390 set { link.offset_addr = value; }
393 public int GetOffset (int target_addr) {
394 return target_addr - link.base_addr;
397 // LinkStack implementation
399 protected override object GetCurrent () { return link; }
400 protected override void SetCurrent (object l) { link = (Link)l; }
402 private struct Link {
403 public int base_addr;
404 public int offset_addr;
410 private ArrayList pgm;
413 abstract class LinkStack : LinkRef {
414 public LinkStack () {
415 stack = new Stack ();
418 public void Push () {
419 stack.Push (GetCurrent ());
423 if (stack.Count > 0) {
424 SetCurrent (stack.Pop ());
431 protected abstract object GetCurrent ();
432 protected abstract void SetCurrent (object l);
437 //Used by CILCompiler and Interpreter
438 internal struct Mark {
439 public int Start, End;
442 public bool IsDefined {
443 get { return Start >= 0 && End >= 0; }
447 get { return Start < End ? Start : End; }
451 get { return Start < End ? End - Start : Start - End; }