// // assembly: System // namespace: System.Text.RegularExpressions // file: compiler.cs // // author: Dan Lewis (dlewis@gmx.co.uk) // (c) 2002 using System; using System.Collections; namespace System.Text.RegularExpressions { abstract class LinkRef { // empty } interface ICompiler { void Reset (); IMachineFactory GetMachineFactory (); // instruction emission void EmitFalse (); void EmitTrue (); // character matching void EmitCharacter (char c, bool negate, bool ignore, bool reverse); void EmitCategory (Category cat, bool negate, bool reverse); void EmitRange (char lo, char hi, bool negate, bool ignore, bool reverse); void EmitSet (char lo, BitArray set, bool negate, bool ignore, bool reverse); // other operators void EmitString (string str, bool ignore, bool reverse); void EmitPosition (Position pos); void EmitOpen (int gid); void EmitClose (int gid); void EmitBalance (int gid, int balance); void EmitReference (int gid, bool ignore, bool reverse); // constructs void EmitIfDefined (int gid, LinkRef tail); void EmitSub (LinkRef tail); void EmitTest (LinkRef yes, LinkRef tail); void EmitBranch (LinkRef next); void EmitJump (LinkRef target); void EmitRepeat (int min, int max, bool lazy, LinkRef until); void EmitUntil (LinkRef repeat); void EmitIn (LinkRef tail); void EmitInfo (int count, int min, int max); void EmitFastRepeat (int min, int max, bool lazy, LinkRef tail); void EmitAnchor (bool reverse, int offset, LinkRef tail); // event for the CILCompiler void EmitBranchEnd(); void EmitAlternationEnd(); LinkRef NewLink (); void ResolveLink (LinkRef link); } class InterpreterFactory : IMachineFactory { public InterpreterFactory (ushort[] pattern) { this.pattern = pattern; } public IMachine NewInstance () { return new Interpreter (pattern); } public int GroupCount { get { return pattern[0]; } } public IDictionary Mapping { get { return mapping; } set { mapping = value; } } private IDictionary mapping; private ushort[] pattern; } class PatternCompiler : ICompiler { public static ushort EncodeOp (OpCode op, OpFlags flags) { return (ushort)((int)op | ((int)flags & 0xff00)); } public static void DecodeOp (ushort word, out OpCode op, out OpFlags flags) { op = (OpCode)(word & 0x00ff); flags = (OpFlags)(word & 0xff00); } public PatternCompiler () { pgm = new ArrayList (); } // ICompiler implementation public void Reset () { pgm.Clear (); } public IMachineFactory GetMachineFactory () { ushort[] image = new ushort[pgm.Count]; pgm.CopyTo (image); return new InterpreterFactory (image); } public void EmitFalse () { Emit (OpCode.False); } public void EmitTrue () { Emit (OpCode.True); } public void EmitCharacter (char c, bool negate, bool ignore, bool reverse) { Emit (OpCode.Character, MakeFlags (negate, ignore, reverse, false)); if (ignore) c = Char.ToLower (c); Emit ((ushort)c); } public void EmitCategory (Category cat, bool negate, bool reverse) { Emit (OpCode.Category, MakeFlags (negate, false, reverse, false)); Emit ((ushort)cat); } public void EmitRange (char lo, char hi, bool negate, bool ignore, bool reverse) { Emit (OpCode.Range, MakeFlags (negate, ignore, reverse, false)); Emit ((ushort)lo); Emit ((ushort)hi); } public void EmitSet (char lo, BitArray set, bool negate, bool ignore, bool reverse) { Emit (OpCode.Set, MakeFlags (negate, ignore, reverse, false)); Emit ((ushort)lo); int len = (set.Length + 0xf) >> 4; Emit ((ushort)len); int b = 0; while (len -- != 0) { ushort word = 0; for (int i = 0; i < 16; ++ i) { if (b >= set.Length) break; if (set[b ++]) word |= (ushort)(1 << i); } Emit (word); } } public void EmitString (string str, bool ignore, bool reverse) { Emit (OpCode.String, MakeFlags (false, ignore, reverse, false)); int len = str.Length; Emit ((ushort)len); if (ignore) str = str.ToLower (); for (int i = 0; i < len; ++ i) Emit ((ushort)str[i]); } public void EmitPosition (Position pos) { Emit (OpCode.Position, 0); Emit ((ushort)pos); } public void EmitOpen (int gid) { Emit (OpCode.Open); Emit ((ushort)gid); } public void EmitClose (int gid) { Emit (OpCode.Close); Emit ((ushort)gid); } public void EmitBalance (int gid, int balance) { Emit (OpCode.Balance); Emit ((ushort)gid); Emit ((ushort)balance); } public void EmitReference (int gid, bool ignore, bool reverse) { Emit (OpCode.Reference, MakeFlags (false, ignore, reverse, false)); Emit ((ushort)gid); } public void EmitIfDefined (int gid, LinkRef tail) { BeginLink (tail); Emit (OpCode.IfDefined); EmitLink (tail); Emit ((ushort)gid); } public void EmitSub (LinkRef tail) { BeginLink (tail); Emit (OpCode.Sub); EmitLink (tail); } public void EmitTest (LinkRef yes, LinkRef tail) { BeginLink (yes); BeginLink (tail); Emit (OpCode.Test); EmitLink (yes); EmitLink (tail); } public void EmitBranch (LinkRef next) { BeginLink (next); Emit (OpCode.Branch, 0); EmitLink (next); } public void EmitJump (LinkRef target) { BeginLink (target); Emit (OpCode.Jump, 0); EmitLink (target); } public void EmitRepeat (int min, int max, bool lazy, LinkRef until) { BeginLink (until); Emit (OpCode.Repeat, MakeFlags (false, false, false, lazy)); EmitLink (until); Emit ((ushort)min); Emit ((ushort)max); } public void EmitUntil (LinkRef repeat) { ResolveLink (repeat); Emit (OpCode.Until); } public void EmitFastRepeat (int min, int max, bool lazy, LinkRef tail) { BeginLink (tail); Emit (OpCode.FastRepeat, MakeFlags (false, false, false, lazy)); EmitLink (tail); Emit ((ushort)min); Emit ((ushort)max); } public void EmitIn (LinkRef tail) { BeginLink (tail); Emit (OpCode.In); EmitLink (tail); } public void EmitAnchor (bool reverse, int offset, LinkRef tail) { BeginLink (tail); Emit (OpCode.Anchor, MakeFlags(false, false, reverse, false)); EmitLink (tail); Emit ((ushort)offset); } public void EmitInfo (int count, int min, int max) { Emit (OpCode.Info); Emit ((ushort)count); Emit ((ushort)min); Emit ((ushort)max); } public LinkRef NewLink () { return new PatternLinkStack (); } public void ResolveLink (LinkRef lref) { PatternLinkStack stack = (PatternLinkStack)lref; while (stack.Pop ()) pgm[stack.OffsetAddress] = (ushort)stack.GetOffset (CurrentAddress); } public void EmitBranchEnd(){} public void EmitAlternationEnd(){} // private members private static OpFlags MakeFlags (bool negate, bool ignore, bool reverse, bool lazy) { OpFlags flags = 0; if (negate) flags |= OpFlags.Negate; if (ignore) flags |= OpFlags.IgnoreCase; if (reverse) flags |= OpFlags.RightToLeft; if (lazy) flags |= OpFlags.Lazy; return flags; } private void Emit (OpCode op) { Emit (op, (OpFlags)0); } private void Emit (OpCode op, OpFlags flags) { Emit (EncodeOp (op, flags)); } private void Emit (ushort word) { pgm.Add (word); } private int CurrentAddress { get { return pgm.Count; } } private void BeginLink (LinkRef lref) { PatternLinkStack stack = (PatternLinkStack)lref; stack.BaseAddress = CurrentAddress; } private void EmitLink (LinkRef lref) { PatternLinkStack stack = (PatternLinkStack)lref; stack.OffsetAddress = CurrentAddress; Emit ((ushort)0); // placeholder stack.Push (); } private class PatternLinkStack : LinkStack { public PatternLinkStack () { } public int BaseAddress { set { link.base_addr = value; } } public int OffsetAddress { get { return link.offset_addr; } set { link.offset_addr = value; } } public int GetOffset (int target_addr) { return target_addr - link.base_addr; } // LinkStack implementation protected override object GetCurrent () { return link; } protected override void SetCurrent (object l) { link = (Link)l; } private struct Link { public int base_addr; public int offset_addr; } Link link; } private ArrayList pgm; } abstract class LinkStack : LinkRef { public LinkStack () { stack = new Stack (); } public void Push () { stack.Push (GetCurrent ()); } public bool Pop () { if (stack.Count > 0) { SetCurrent (stack.Pop ()); return true; } return false; } protected abstract object GetCurrent (); protected abstract void SetCurrent (object l); private Stack stack; } //Used by CILCompiler and Interpreter internal struct Mark { public int Start, End; public int Previous; public bool IsDefined { get { return Start >= 0 && End >= 0; } } public int Index { get { return Start < End ? Start : End; } } public int Length { get { return Start < End ? End - Start : Start - End; } } } }