--- /dev/null
+//\r
+// assembly: System\r
+// namespace: System.Text.RegularExpressions\r
+// file: arch.cs\r
+//\r
+// author: Dan Lewis (dlewis@gmx.co.uk)\r
+// (c) 2002\r
+\r
+using System;\r
+\r
+namespace System.Text.RegularExpressions {\r
+\r
+ enum OpCode : ushort {\r
+ False = 0, // always fails\r
+ True, // always succeeds\r
+\r
+ // matching\r
+\r
+ Position, // zero-width position assertion\r
+ String, // match string literal\r
+ Reference, // back reference\r
+\r
+ // character matching\r
+\r
+ Character, // match character exactly\r
+ Category, // match character from category\r
+ Range, // match character from range\r
+ Set, // match character from set\r
+ In, // match character from group of tests\r
+\r
+ // capturing\r
+\r
+ Open, // open group\r
+ Close, // close group\r
+ Balance, // balance groups\r
+\r
+ // control flow\r
+\r
+ IfDefined, // conditional on capture\r
+ Sub, // non-backtracking subexpression\r
+ Test, // non-backtracking lookahead/behind\r
+ Branch, // alternative expression\r
+ Jump, // unconditional goto\r
+ Repeat, // new repeat context\r
+ Until, // repeat subexpression within context\r
+ FastRepeat, // repeat simple subexpression\r
+ Anchor, // anchoring expression\r
+\r
+ // miscellaneous\r
+ \r
+ Info // pattern information\r
+ }\r
+\r
+ [Flags]\r
+ enum OpFlags : ushort {\r
+ None = 0x000,\r
+ Negate = 0x100, // succeed on mismatch\r
+ IgnoreCase = 0x200, // case insensitive matching\r
+ RightToLeft = 0x400, // right-to-left matching\r
+ Lazy = 0x800 // minimizing repeat\r
+ }\r
+\r
+ enum Position : ushort {\r
+ Any, // anywhere\r
+ Start, // start of string \A\r
+ StartOfString, // start of string \A\r
+ StartOfLine, // start of line ^\r
+ StartOfScan, // start of scan \G\r
+ End, // end or before newline at end \Z\r
+ EndOfString, // end of string \z\r
+ EndOfLine, // end of line $\r
+ Boundary, // word boundary \b\r
+ NonBoundary // not word boundary \B\r
+ };\r
+ \r
+ // see category.cs for Category enum\r
+\r
+ interface IMachine {\r
+ Match Scan (Regex regex, string text, int start, int end);\r
+ }\r
+\r
+ interface IMachineFactory {\r
+ IMachine NewInstance ();\r
+ }\r
+\r
+ // Anchor SKIP OFFSET\r
+ //\r
+ // Flags: [RightToLeft] ??\r
+ // SKIP: relative address of tail expression\r
+ // OFFSET: offset of anchor from start of pattern\r
+ //\r
+ // Usage:\r
+ //\r
+ // Anchor :1 OFFSET\r
+ // <expr>\r
+ // True\r
+ // 1: <tail>\r
+ //\r
+ // Notes:\r
+ //\r
+ // In practice, the anchoring expression is only going to be\r
+ // Position (StartOfString, StartOfLine, StartOfScan) or String.\r
+ // This is because the optimizer looks for position anchors at the\r
+ // start of the expression, and if that fails it looks for the\r
+ // longest substring. If an expression has neither a position\r
+ // anchor or a longest substring anchor, then the anchoring expression\r
+ // is left empty. Since an empty expression will anchor at any\r
+ // position in any string, the entire input string will be scanned.\r
+\r
+ // String LEN STR...\r
+ //\r
+ // Flags: [RightToLeft, IgnoreCase]\r
+ // LEN: length of string\r
+ // STR: string characters\r
+\r
+ // Branch SKIP\r
+ //\r
+ // SKIP: relative address of next branch\r
+ //\r
+ // Branch :1\r
+ // <alt expr 1>\r
+ // Jump :4\r
+ // 1: Branch :2\r
+ // <alt expr 2>\r
+ // Jump :4\r
+ // 2: Branch :3\r
+ // <alt expr 3>\r
+ // Jump :4\r
+ // 3: False\r
+ // 4: <tail>\r
+\r
+ // Repeat SKIP MIN MAX\r
+ //\r
+ // Flags: [Lazy]\r
+ // SKIP: relative address of Until instruction\r
+ // MIN: minimum iterations\r
+ // MAX: maximum iterations (0xffff is infinity)\r
+ //\r
+ // Repeat :1 MIN MAX\r
+ // <expr>\r
+ // Until\r
+ // 1: <tail>\r
+\r
+ // FastRepeat SKIP MIN MAX\r
+ //\r
+ // Flags: [Lazy]\r
+ // SKIP: relative address of tail expression\r
+ // MIN: minimum iterations\r
+ // MAX: maximum iterations (0xffff is infinity)\r
+ //\r
+ // FastRepeat :1 MIN MAX\r
+ // <expr>\r
+ // True\r
+ // 1: <tail>\r
+ //\r
+ // Notes:\r
+ //\r
+ // The subexpression of a FastRepeat construct must not contain any\r
+ // complex operators. These include: Open, Close, Balance, Repeat,\r
+ // FastRepeat, Sub, Test. In addition, the subexpression must have\r
+ // been determined to have a fixed width.\r
+ \r
+ // Sub SKIP\r
+ //\r
+ // SKIP: relative address of tail expression\r
+ //\r
+ // Sub :1\r
+ // <expr>\r
+ // 1: <tail>\r
+ //\r
+ // Notes:\r
+ //\r
+ // The Sub operator invokes an independent subexpression. This means\r
+ // that the subexpression will match only once and so will not\r
+ // participate in any backtracking.\r
+\r
+ // Test TSKIP FSKIP\r
+ //\r
+ // TSKIP: relative address of true expression\r
+ // FSKIP: relative address of false expression\r
+ //\r
+ // Usage: (?(?=test)true|false)\r
+ //\r
+ // Test :1 :2\r
+ // <test expr>\r
+ // 1: <true expr>\r
+ // Jump\r
+ // 2: <false epxr>\r
+ // <tail>\r
+ //\r
+ // Usage: (?(?=test)true)\r
+ //\r
+ // Test :1 :2\r
+ // <test expr>\r
+ // 1: <true expr>\r
+ // 2: <tail>\r
+ //\r
+ // Usage: (?=test)\r
+ //\r
+ // Test :1 :2\r
+ // <test expr>\r
+ // 1: <true expr>\r
+ // Jump 3:\r
+ // 2: False\r
+ // 3: <tail>\r
+ //\r
+ // Notes:\r
+ //\r
+ // For negative lookaheads, just swap the values of TSKIP and\r
+ // FSKIP. For lookbehinds, the test expression must be compiled\r
+ // in reverse. The test expression is always executed as an\r
+ // independent subexpression, so its behaviour is non-backtracking\r
+ // (like a Sub clause.)\r
+\r
+ // IfDefined SKIP GID\r
+ //\r
+ // SKIP: relative address of else expression\r
+ // GID: number of group to check\r
+ //\r
+ // Usage: (?(gid)true)\r
+ //\r
+ // IfDefined :1\r
+ // <true expr>\r
+ // 1: <tail>\r
+ //\r
+ // Usage: (?(gid)true|false)\r
+ //\r
+ // IfDefined :1\r
+ // <true expr>\r
+ // Jump :2\r
+ // 1: <false expr>\r
+ // 2: <tail>\r
+\r
+ // Jump SKIP\r
+ //\r
+ // SKIP: relative address of target expression\r
+ //\r
+ // Jump :1\r
+ // ...\r
+ // :1 <target expr>\r
+\r
+ // Character CHAR\r
+ //\r
+ // Flags: [Negate, IgnoreCase, RightToLeft]\r
+ // CHAR: exact character to match\r
+\r
+ // Category CAT\r
+ //\r
+ // Flags: [Negate, RightToLeft]\r
+ // CAT: category to match (see Category enum)\r
+\r
+ // Range LO HI\r
+ //\r
+ // Flags: [Negate, IgnoreCase, RightToLeft]\r
+ // LO: lowest character in range\r
+ // HI: higest character in range\r
+\r
+ // Set LO LEN SET...\r
+ //\r
+ // Flags: [Negate, IgnoreCase, RightToLeft]\r
+ // LO: lowest character in set\r
+ // LEN: number of words in set\r
+ // SET: bit array representing characters in set\r
+ //\r
+ // Notes:\r
+ //\r
+ // Each word in the set represents 16 characters, so the first word\r
+ // defines membership for characters LO to LO + 15, the second for\r
+ // LO + 16 to LO + 31, and so on up to LO + (LEN * 16 - 1). It is\r
+ // up to the compiler to provide a compact representation for sparse\r
+ // unicode sets. The simple way is to use Set 0 4096. Other methods\r
+ // involve paritioning the set and placing the components into an\r
+ // In block.\r
+\r
+ // In SKIP\r
+ //\r
+ // SKIP: relative address of tail expression\r
+ //\r
+ // Usage: [expr]\r
+ //\r
+ // In :1\r
+ // <expr>\r
+ // True\r
+ // :1 <tail>\r
+ //\r
+ // Usage: [^expr]\r
+ //\r
+ // In :1\r
+ // <expr>\r
+ // False\r
+ // :1 <tail>\r
+ //\r
+ // Notes:\r
+ //\r
+ // The In instruction consumes a single character, using the flags\r
+ // of the first instruction in the subexpression to determine its\r
+ // IgnoreCase and RightToLeft properties. The subexpression is then\r
+ // applied to the single character as a disjunction. If any instruction\r
+ // in the subexpression succeeds, the entire In construct succeeds\r
+ // and matching continues with the tail.\r
+\r
+ // Position POS\r
+ //\r
+ // POS: position to match (see Position enum)\r
+\r
+ // Open GID\r
+ //\r
+ // GID: number of group to open\r
+\r
+ // Close GID\r
+ //\r
+ // GID: number of group to close\r
+ \r
+ // Balance GID BAL\r
+ //\r
+ // GID: number of capturing group (0 if none)\r
+ // BAL: number of group to undefine\r
+\r
+ // Info GROUPS MIN MAX\r
+ //\r
+ // GROUPS: number of capturing groups\r
+ // MIN: minimum width of pattern\r
+ // MAX: maximum width of pattern (0xffff means undefined)\r
+\r
+ // False\r
+\r
+ // True\r
+\r
+ // Reference GID\r
+ //\r
+ // Flags: [IgnoreCase, RightToLeft]\r
+ // GID: number of group to reference\r
+}\r
--- /dev/null
+//\r
+// assembly: System\r
+// namespace: System.Text.RegularExpressions\r
+// file: cache.cs\r
+//\r
+// author: Dan Lewis (dlewis@gmx.co.uk)\r
+// (c) 2002\r
+\r
+using System;\r
+using System.Collections;\r
+\r
+namespace System.Text.RegularExpressions {\r
+\r
+ class FactoryCache {\r
+ public FactoryCache (int capacity) {\r
+ this.capacity = capacity;\r
+ this.factories = new Hashtable (capacity);\r
+ this.mru_list = new MRUList ();\r
+ }\r
+\r
+ public void Add (string pattern, RegexOptions options, IMachineFactory factory) {\r
+ lock (this) {\r
+ Key k = new Key (pattern, options);\r
+\r
+ while (factories.Count >= capacity) {\r
+ object victim = mru_list.Evict ();\r
+ if (victim != null)\r
+ factories.Remove ((Key)victim);\r
+ }\r
+ \r
+ factories[k] = factory;\r
+ mru_list.Use (k);\r
+ }\r
+ }\r
+\r
+ public IMachineFactory Lookup (string pattern, RegexOptions options) {\r
+ lock (this) {\r
+ Key k = new Key (pattern, options);\r
+ if (factories.Contains (k)) {\r
+ mru_list.Use (k);\r
+ return (IMachineFactory)factories[k];\r
+ }\r
+ }\r
+\r
+ return null;\r
+ }\r
+\r
+ private int capacity;\r
+ private Hashtable factories;\r
+ private MRUList mru_list;\r
+\r
+ struct Key {\r
+ public string pattern;\r
+ public RegexOptions options;\r
+\r
+ public Key (string pattern, RegexOptions options) {\r
+ this.pattern = pattern;\r
+ this.options = options;\r
+ }\r
+ \r
+ public new int GetHashCode () {\r
+ return pattern.GetHashCode () ^ (int)options;\r
+ }\r
+\r
+ public new bool Equals (object o) {\r
+ if (o == null || o.GetType () != this.GetType ())\r
+ return false;\r
+\r
+ Key k = (Key)o;\r
+ return options == k.options && pattern.Equals (k.pattern);\r
+ }\r
+\r
+ public new string ToString () {\r
+ return "('" + pattern + "', [" + options + "])";\r
+ }\r
+ }\r
+ }\r
+\r
+ class MRUList {\r
+ public MRUList () {\r
+ head = tail = null;\r
+ }\r
+\r
+ public void Use (object o) {\r
+ Node node;\r
+\r
+ if (head == null) {\r
+ node = new Node (o);\r
+ head = tail = node;\r
+ return;\r
+ }\r
+\r
+ node = head;\r
+ while (node != null && !o.Equals (node.value))\r
+ node = node.previous;\r
+\r
+ if (node == null)\r
+ node = new Node (o);\r
+ else {\r
+ if (node == head)\r
+ return;\r
+\r
+ if (node == tail)\r
+ tail = node.next;\r
+ else\r
+ node.previous.next = node.next;\r
+\r
+ node.next.previous = node.previous;\r
+ }\r
+\r
+ head.next = node;\r
+ node.previous = head;\r
+ node.next = null;\r
+ head = node;\r
+ }\r
+\r
+ public object Evict () {\r
+ if (tail == null)\r
+ return null;\r
+\r
+ object o = tail.value;\r
+ tail = tail.next;\r
+ tail.previous = null;\r
+ return o;\r
+ }\r
+\r
+ private Node head, tail;\r
+\r
+ private class Node {\r
+ public object value;\r
+ public Node previous, next;\r
+\r
+ public Node (object value) {\r
+ this.value = value;\r
+ }\r
+ }\r
+ }\r
+}\r
--- /dev/null
+//\r
+// assembly: System\r
+// namespace: System.Text.RegularExpressions\r
+// file: category.cs\r
+//\r
+// author: Dan Lewis (dlewis@gmx.co.uk)\r
+// (c) 2002\r
+\r
+using System;\r
+using System.Globalization;\r
+\r
+namespace System.Text.RegularExpressions {\r
+\r
+ enum Category : ushort {\r
+ None,\r
+\r
+ // canonical classes\r
+ \r
+ Any, // any character except newline .\r
+ AnySingleline, // any character . (s option)\r
+ Word, // any word character \w\r
+ Digit, // any digit character \d\r
+ WhiteSpace, // any whitespace character \s\r
+ \r
+ // ECMAScript classes\r
+\r
+\r
+ EcmaAny,\r
+ EcmaAnySingleline,\r
+ EcmaWord, // [a-zA-Z_0-9]\r
+ EcmaDigit, // [0-9]\r
+ EcmaWhiteSpace, // [ \f\n\r\t\v]\r
+\r
+ // unicode categories\r
+ \r
+ UnicodeL, // Letter\r
+ UnicodeM, // Mark\r
+ UnicodeN, // Number\r
+ UnicodeZ, // Separator\r
+ UnicodeP, // Punctuation\r
+ UnicodeS, // Symbol\r
+ UnicodeC, // Other\r
+\r
+ UnicodeLu, // UppercaseLetter\r
+ UnicodeLl, // LowercaseLetter\r
+ UnicodeLt, // TitlecaseLetter\r
+ UnicodeLm, // ModifierLetter\r
+ UnicodeLo, // OtherLetter\r
+ UnicodeMn, // NonspacingMark\r
+ UnicodeMe, // EnclosingMark\r
+ UnicodeMc, // SpacingMark\r
+ UnicodeNd, // DecimalNumber\r
+ UnicodeNl, // LetterNumber\r
+ UnicodeNo, // OtherNumber\r
+ UnicodeZs, // SpaceSeparator\r
+ UnicodeZl, // LineSeparator\r
+ UnicodeZp, // ParagraphSeparator\r
+ UnicodePd, // DashPunctuation\r
+ UnicodePs, // OpenPunctuation\r
+ UnicodePi, // InitialPunctuation\r
+ UnicodePe, // ClosePunctuation\r
+ UnicodePf, // FinalPunctuation\r
+ UnicodePc, // ConnectorPunctuation\r
+ UnicodePo, // OtherPunctuation\r
+ UnicodeSm, // MathSymbol\r
+ UnicodeSc, // CurrencySymbol\r
+ UnicodeSk, // ModifierSymbol\r
+ UnicodeSo, // OtherSymbol\r
+ UnicodeCc, // Control\r
+ UnicodeCf, // Format\r
+ UnicodeCo, // PrivateUse\r
+ UnicodeCs, // Surrogate\r
+ UnicodeCn, // Unassigned\r
+\r
+ // unicode block ranges\r
+\r
+ // notes: the categories marked with a star are valid unicode block ranges,\r
+ // but don't seem to be accepted by the MS parser using the /p{...} format.\r
+ // any ideas?\r
+\r
+ UnicodeBasicLatin,\r
+ UnicodeLatin1Supplement, // *\r
+ UnicodeLatinExtendedA, // *\r
+ UnicodeLatinExtendedB, // *\r
+ UnicodeIPAExtensions,\r
+ UnicodeSpacingModifierLetters,\r
+ UnicodeCombiningDiacriticalMarks,\r
+ UnicodeGreek,\r
+ UnicodeCyrillic,\r
+ UnicodeArmenian,\r
+ UnicodeHebrew,\r
+ UnicodeArabic,\r
+ UnicodeSyriac,\r
+ UnicodeThaana,\r
+ UnicodeDevanagari,\r
+ UnicodeBengali,\r
+ UnicodeGurmukhi,\r
+ UnicodeGujarati,\r
+ UnicodeOriya,\r
+ UnicodeTamil,\r
+ UnicodeTelugu,\r
+ UnicodeKannada,\r
+ UnicodeMalayalam,\r
+ UnicodeSinhala,\r
+ UnicodeThai,\r
+ UnicodeLao,\r
+ UnicodeTibetan,\r
+ UnicodeMyanmar,\r
+ UnicodeGeorgian,\r
+ UnicodeHangulJamo,\r
+ UnicodeEthiopic,\r
+ UnicodeCherokee,\r
+ UnicodeUnifiedCanadianAboriginalSyllabics,\r
+ UnicodeOgham,\r
+ UnicodeRunic,\r
+ UnicodeKhmer,\r
+ UnicodeMongolian,\r
+ UnicodeLatinExtendedAdditional,\r
+ UnicodeGreekExtended,\r
+ UnicodeGeneralPunctuation,\r
+ UnicodeSuperscriptsandSubscripts,\r
+ UnicodeCurrencySymbols,\r
+ UnicodeCombiningMarksforSymbols,\r
+ UnicodeLetterlikeSymbols,\r
+ UnicodeNumberForms,\r
+ UnicodeArrows,\r
+ UnicodeMathematicalOperators,\r
+ UnicodeMiscellaneousTechnical,\r
+ UnicodeControlPictures,\r
+ UnicodeOpticalCharacterRecognition,\r
+ UnicodeEnclosedAlphanumerics,\r
+ UnicodeBoxDrawing,\r
+ UnicodeBlockElements,\r
+ UnicodeGeometricShapes,\r
+ UnicodeMiscellaneousSymbols,\r
+ UnicodeDingbats,\r
+ UnicodeBraillePatterns,\r
+ UnicodeCJKRadicalsSupplement,\r
+ UnicodeKangxiRadicals,\r
+ UnicodeIdeographicDescriptionCharacters,\r
+ UnicodeCJKSymbolsandPunctuation,\r
+ UnicodeHiragana,\r
+ UnicodeKatakana,\r
+ UnicodeBopomofo,\r
+ UnicodeHangulCompatibilityJamo,\r
+ UnicodeKanbun,\r
+ UnicodeBopomofoExtended,\r
+ UnicodeEnclosedCJKLettersandMonths,\r
+ UnicodeCJKCompatibility,\r
+ UnicodeCJKUnifiedIdeographsExtensionA,\r
+ UnicodeCJKUnifiedIdeographs,\r
+ UnicodeYiSyllables,\r
+ UnicodeYiRadicals,\r
+ UnicodeHangulSyllables,\r
+ UnicodeHighSurrogates,\r
+ UnicodeHighPrivateUseSurrogates,\r
+ UnicodeLowSurrogates,\r
+ UnicodePrivateUse,\r
+ UnicodeCJKCompatibilityIdeographs,\r
+ UnicodeAlphabeticPresentationForms,\r
+ UnicodeArabicPresentationFormsA, // *\r
+ UnicodeCombiningHalfMarks,\r
+ UnicodeCJKCompatibilityForms,\r
+ UnicodeSmallFormVariants,\r
+ UnicodeArabicPresentationFormsB, // *\r
+ UnicodeSpecials,\r
+ UnicodeHalfwidthandFullwidthForms,\r
+ \r
+ UnicodeOldItalic,\r
+ UnicodeGothic,\r
+ UnicodeDeseret,\r
+ UnicodeByzantineMusicalSymbols,\r
+ UnicodeMusicalSymbols,\r
+ UnicodeMathematicalAlphanumericSymbols,\r
+ UnicodeCJKUnifiedIdeographsExtensionB,\r
+ UnicodeCJKCompatibilityIdeographsSupplement,\r
+ UnicodeTags\r
+ }\r
+\r
+ class CategoryUtils {\r
+ public static Category CategoryFromName (string name) {\r
+ try {\r
+ if (name.Substring (0, 2).Equals ("Is")) // remove prefix from block range\r
+ name = name.Substring (2);\r
+\r
+ return (Category)Enum.Parse (typeof (Category), "Unicode" + name);\r
+ }\r
+ catch (ArgumentException) {\r
+ return Category.None;\r
+ }\r
+ }\r
+ \r
+ public static bool IsCategory (Category cat, char c) {\r
+ switch (cat) {\r
+ case Category.None:\r
+ return false;\r
+ \r
+ case Category.Any:\r
+ return c != '\n';\r
+\r
+ case Category.AnySingleline:\r
+ return true;\r
+\r
+ case Category.Word:\r
+ return\r
+ Char.IsLetterOrDigit (c) ||\r
+ IsCategory (UnicodeCategory.ConnectorPunctuation, c);\r
+\r
+ case Category.Digit:\r
+ return Char.IsDigit (c);\r
+\r
+ case Category.WhiteSpace:\r
+ return Char.IsWhiteSpace (c);\r
+\r
+ // ECMA categories\r
+\r
+ case Category.EcmaAny:\r
+ return c != '\n';\r
+ \r
+ case Category.EcmaAnySingleline:\r
+ return true;\r
+\r
+ case Category.EcmaWord:\r
+ return\r
+ 'a' <= c && c <= 'z' ||\r
+ 'A' <= c && c <= 'Z' ||\r
+ '0' <= c && c <= '9' ||\r
+ '_' == c;\r
+\r
+ case Category.EcmaDigit:\r
+ return\r
+ '0' <= c && c <= 9;\r
+ \r
+ case Category.EcmaWhiteSpace:\r
+ return\r
+ c == ' ' ||\r
+ c == '\f' ||\r
+ c == '\n' ||\r
+ c == '\r' ||\r
+ c == '\t' ||\r
+ c == '\v';\r
+\r
+ // Unicode categories...\r
+\r
+ // letter\r
+ \r
+ case Category.UnicodeLu: return IsCategory (UnicodeCategory.UppercaseLetter, c);\r
+ case Category.UnicodeLl: return IsCategory (UnicodeCategory.LowercaseLetter, c);\r
+ case Category.UnicodeLt: return IsCategory (UnicodeCategory.TitlecaseLetter, c);\r
+ case Category.UnicodeLm: return IsCategory (UnicodeCategory.ModifierLetter, c);\r
+ case Category.UnicodeLo: return IsCategory (UnicodeCategory.OtherLetter, c);\r
+\r
+ // mark\r
+\r
+ case Category.UnicodeMn: return IsCategory (UnicodeCategory.NonSpacingMark, c);\r
+ case Category.UnicodeMe: return IsCategory (UnicodeCategory.EnclosingMark, c);\r
+ case Category.UnicodeMc: return IsCategory (UnicodeCategory.SpacingCombiningMark, c);\r
+ case Category.UnicodeNd: return IsCategory (UnicodeCategory.DecimalDigitNumber, c);\r
+\r
+ // number\r
+\r
+ case Category.UnicodeNl: return IsCategory (UnicodeCategory.LetterNumber, c);\r
+ case Category.UnicodeNo: return IsCategory (UnicodeCategory.OtherNumber, c);\r
+\r
+ // separator\r
+\r
+ case Category.UnicodeZs: return IsCategory (UnicodeCategory.SpaceSeparator, c);\r
+ case Category.UnicodeZl: return IsCategory (UnicodeCategory.LineSeparator, c);\r
+ case Category.UnicodeZp: return IsCategory (UnicodeCategory.ParagraphSeparator, c);\r
+\r
+ // punctuation\r
+\r
+ case Category.UnicodePd: return IsCategory (UnicodeCategory.DashPunctuation, c);\r
+ case Category.UnicodePs: return IsCategory (UnicodeCategory.OpenPunctuation, c);\r
+ case Category.UnicodePi: return IsCategory (UnicodeCategory.InitialQuotePunctuation, c);\r
+ case Category.UnicodePe: return IsCategory (UnicodeCategory.ClosePunctuation, c);\r
+ case Category.UnicodePf: return IsCategory (UnicodeCategory.FinalQuotePunctuation, c);\r
+ case Category.UnicodePc: return IsCategory (UnicodeCategory.ConnectorPunctuation, c);\r
+ case Category.UnicodePo: return IsCategory (UnicodeCategory.OtherPunctuation, c);\r
+\r
+ // symbol\r
+\r
+ case Category.UnicodeSm: return IsCategory (UnicodeCategory.MathSymbol, c);\r
+ case Category.UnicodeSc: return IsCategory (UnicodeCategory.CurrencySymbol, c);\r
+ case Category.UnicodeSk: return IsCategory (UnicodeCategory.ModifierSymbol, c);\r
+ case Category.UnicodeSo: return IsCategory (UnicodeCategory.OtherSymbol, c);\r
+\r
+ // other\r
+\r
+ case Category.UnicodeCc: return IsCategory (UnicodeCategory.Control, c);\r
+ case Category.UnicodeCf: return IsCategory (UnicodeCategory.Format, c);\r
+ case Category.UnicodeCo: return IsCategory (UnicodeCategory.PrivateUse, c);\r
+ case Category.UnicodeCs: return IsCategory (UnicodeCategory.Surrogate, c);\r
+ case Category.UnicodeCn: return IsCategory (UnicodeCategory.OtherNotAssigned, c); \r
+\r
+ case Category.UnicodeL: // letter\r
+ return\r
+ IsCategory (UnicodeCategory.UppercaseLetter, c) ||\r
+ IsCategory (UnicodeCategory.LowercaseLetter, c) ||\r
+ IsCategory (UnicodeCategory.TitlecaseLetter, c) ||\r
+ IsCategory (UnicodeCategory.ModifierLetter, c) ||\r
+ IsCategory (UnicodeCategory.OtherLetter, c);\r
+ \r
+ case Category.UnicodeM: // mark\r
+ return\r
+ IsCategory (UnicodeCategory.NonSpacingMark, c) ||\r
+ IsCategory (UnicodeCategory.EnclosingMark, c) ||\r
+ IsCategory (UnicodeCategory.SpacingCombiningMark, c);\r
+\r
+ case Category.UnicodeN: // number\r
+ return\r
+ IsCategory (UnicodeCategory.DecimalDigitNumber, c) ||\r
+ IsCategory (UnicodeCategory.LetterNumber, c) ||\r
+ IsCategory (UnicodeCategory.OtherNumber, c);\r
+\r
+ case Category.UnicodeZ: // separator\r
+ return\r
+ IsCategory (UnicodeCategory.SpaceSeparator, c) ||\r
+ IsCategory (UnicodeCategory.LineSeparator, c) ||\r
+ IsCategory (UnicodeCategory.ParagraphSeparator, c);\r
+ \r
+ case Category.UnicodeP: // punctuation\r
+ return\r
+ IsCategory (UnicodeCategory.DashPunctuation, c) ||\r
+ IsCategory (UnicodeCategory.OpenPunctuation, c) ||\r
+ IsCategory (UnicodeCategory.InitialQuotePunctuation, c) ||\r
+ IsCategory (UnicodeCategory.ClosePunctuation, c) ||\r
+ IsCategory (UnicodeCategory.FinalQuotePunctuation, c) ||\r
+ IsCategory (UnicodeCategory.ConnectorPunctuation, c) ||\r
+ IsCategory (UnicodeCategory.OtherPunctuation, c);\r
+ \r
+ case Category.UnicodeS: // symbol\r
+ return\r
+ IsCategory (UnicodeCategory.MathSymbol, c) ||\r
+ IsCategory (UnicodeCategory.CurrencySymbol, c) ||\r
+ IsCategory (UnicodeCategory.ModifierSymbol, c) ||\r
+ IsCategory (UnicodeCategory.OtherSymbol, c);\r
+ \r
+ case Category.UnicodeC: // other\r
+ return\r
+ IsCategory (UnicodeCategory.Control, c) ||\r
+ IsCategory (UnicodeCategory.Format, c) ||\r
+ IsCategory (UnicodeCategory.PrivateUse, c) ||\r
+ IsCategory (UnicodeCategory.Surrogate, c) ||\r
+ IsCategory (UnicodeCategory.OtherNotAssigned, c);\r
+\r
+ // Unicode block ranges...\r
+\r
+ case Category.UnicodeBasicLatin:\r
+ return '\u0000' <= c && c <= '\u007F';\r
+\r
+ case Category.UnicodeLatin1Supplement:\r
+ return '\u0080' <= c && c <= '\u00FF';\r
+\r
+ case Category.UnicodeLatinExtendedA:\r
+ return '\u0100' <= c && c <= '\u017F';\r
+\r
+ case Category.UnicodeLatinExtendedB:\r
+ return '\u0180' <= c && c <= '\u024F';\r
+\r
+ case Category.UnicodeIPAExtensions:\r
+ return '\u0250' <= c && c <= '\u02AF';\r
+\r
+ case Category.UnicodeSpacingModifierLetters:\r
+ return '\u02B0' <= c && c <= '\u02FF';\r
+\r
+ case Category.UnicodeCombiningDiacriticalMarks:\r
+ return '\u0300' <= c && c <= '\u036F';\r
+\r
+ case Category.UnicodeGreek:\r
+ return '\u0370' <= c && c <= '\u03FF';\r
+\r
+ case Category.UnicodeCyrillic:\r
+ return '\u0400' <= c && c <= '\u04FF';\r
+\r
+ case Category.UnicodeArmenian:\r
+ return '\u0530' <= c && c <= '\u058F';\r
+\r
+ case Category.UnicodeHebrew:\r
+ return '\u0590' <= c && c <= '\u05FF';\r
+\r
+ case Category.UnicodeArabic:\r
+ return '\u0600' <= c && c <= '\u06FF';\r
+\r
+ case Category.UnicodeSyriac:\r
+ return '\u0700' <= c && c <= '\u074F';\r
+\r
+ case Category.UnicodeThaana:\r
+ return '\u0780' <= c && c <= '\u07BF';\r
+\r
+ case Category.UnicodeDevanagari:\r
+ return '\u0900' <= c && c <= '\u097F';\r
+\r
+ case Category.UnicodeBengali:\r
+ return '\u0980' <= c && c <= '\u09FF';\r
+\r
+ case Category.UnicodeGurmukhi:\r
+ return '\u0A00' <= c && c <= '\u0A7F';\r
+\r
+ case Category.UnicodeGujarati:\r
+ return '\u0A80' <= c && c <= '\u0AFF';\r
+\r
+ case Category.UnicodeOriya:\r
+ return '\u0B00' <= c && c <= '\u0B7F';\r
+\r
+ case Category.UnicodeTamil:\r
+ return '\u0B80' <= c && c <= '\u0BFF';\r
+\r
+ case Category.UnicodeTelugu:\r
+ return '\u0C00' <= c && c <= '\u0C7F';\r
+\r
+ case Category.UnicodeKannada:\r
+ return '\u0C80' <= c && c <= '\u0CFF';\r
+\r
+ case Category.UnicodeMalayalam:\r
+ return '\u0D00' <= c && c <= '\u0D7F';\r
+\r
+ case Category.UnicodeSinhala:\r
+ return '\u0D80' <= c && c <= '\u0DFF';\r
+\r
+ case Category.UnicodeThai:\r
+ return '\u0E00' <= c && c <= '\u0E7F';\r
+\r
+ case Category.UnicodeLao:\r
+ return '\u0E80' <= c && c <= '\u0EFF';\r
+\r
+ case Category.UnicodeTibetan:\r
+ return '\u0F00' <= c && c <= '\u0FFF';\r
+\r
+ case Category.UnicodeMyanmar:\r
+ return '\u1000' <= c && c <= '\u109F';\r
+\r
+ case Category.UnicodeGeorgian:\r
+ return '\u10A0' <= c && c <= '\u10FF';\r
+\r
+ case Category.UnicodeHangulJamo:\r
+ return '\u1100' <= c && c <= '\u11FF';\r
+\r
+ case Category.UnicodeEthiopic:\r
+ return '\u1200' <= c && c <= '\u137F';\r
+\r
+ case Category.UnicodeCherokee:\r
+ return '\u13A0' <= c && c <= '\u13FF';\r
+\r
+ case Category.UnicodeUnifiedCanadianAboriginalSyllabics:\r
+ return '\u1400' <= c && c <= '\u167F';\r
+\r
+ case Category.UnicodeOgham:\r
+ return '\u1680' <= c && c <= '\u169F';\r
+\r
+ case Category.UnicodeRunic:\r
+ return '\u16A0' <= c && c <= '\u16FF';\r
+\r
+ case Category.UnicodeKhmer:\r
+ return '\u1780' <= c && c <= '\u17FF';\r
+\r
+ case Category.UnicodeMongolian:\r
+ return '\u1800' <= c && c <= '\u18AF';\r
+\r
+ case Category.UnicodeLatinExtendedAdditional:\r
+ return '\u1E00' <= c && c <= '\u1EFF';\r
+\r
+ case Category.UnicodeGreekExtended:\r
+ return '\u1F00' <= c && c <= '\u1FFF';\r
+\r
+ case Category.UnicodeGeneralPunctuation:\r
+ return '\u2000' <= c && c <= '\u206F';\r
+\r
+ case Category.UnicodeSuperscriptsandSubscripts:\r
+ return '\u2070' <= c && c <= '\u209F';\r
+\r
+ case Category.UnicodeCurrencySymbols:\r
+ return '\u20A0' <= c && c <= '\u20CF';\r
+\r
+ case Category.UnicodeCombiningMarksforSymbols:\r
+ return '\u20D0' <= c && c <= '\u20FF';\r
+\r
+ case Category.UnicodeLetterlikeSymbols:\r
+ return '\u2100' <= c && c <= '\u214F';\r
+\r
+ case Category.UnicodeNumberForms:\r
+ return '\u2150' <= c && c <= '\u218F';\r
+\r
+ case Category.UnicodeArrows:\r
+ return '\u2190' <= c && c <= '\u21FF';\r
+\r
+ case Category.UnicodeMathematicalOperators:\r
+ return '\u2200' <= c && c <= '\u22FF';\r
+\r
+ case Category.UnicodeMiscellaneousTechnical:\r
+ return '\u2300' <= c && c <= '\u23FF';\r
+\r
+ case Category.UnicodeControlPictures:\r
+ return '\u2400' <= c && c <= '\u243F';\r
+\r
+ case Category.UnicodeOpticalCharacterRecognition:\r
+ return '\u2440' <= c && c <= '\u245F';\r
+\r
+ case Category.UnicodeEnclosedAlphanumerics:\r
+ return '\u2460' <= c && c <= '\u24FF';\r
+\r
+ case Category.UnicodeBoxDrawing:\r
+ return '\u2500' <= c && c <= '\u257F';\r
+\r
+ case Category.UnicodeBlockElements:\r
+ return '\u2580' <= c && c <= '\u259F';\r
+\r
+ case Category.UnicodeGeometricShapes:\r
+ return '\u25A0' <= c && c <= '\u25FF';\r
+\r
+ case Category.UnicodeMiscellaneousSymbols:\r
+ return '\u2600' <= c && c <= '\u26FF';\r
+\r
+ case Category.UnicodeDingbats:\r
+ return '\u2700' <= c && c <= '\u27BF';\r
+\r
+ case Category.UnicodeBraillePatterns:\r
+ return '\u2800' <= c && c <= '\u28FF';\r
+\r
+ case Category.UnicodeCJKRadicalsSupplement:\r
+ return '\u2E80' <= c && c <= '\u2EFF';\r
+\r
+ case Category.UnicodeKangxiRadicals:\r
+ return '\u2F00' <= c && c <= '\u2FDF';\r
+\r
+ case Category.UnicodeIdeographicDescriptionCharacters:\r
+ return '\u2FF0' <= c && c <= '\u2FFF';\r
+\r
+ case Category.UnicodeCJKSymbolsandPunctuation:\r
+ return '\u3000' <= c && c <= '\u303F';\r
+\r
+ case Category.UnicodeHiragana:\r
+ return '\u3040' <= c && c <= '\u309F';\r
+\r
+ case Category.UnicodeKatakana:\r
+ return '\u30A0' <= c && c <= '\u30FF';\r
+\r
+ case Category.UnicodeBopomofo:\r
+ return '\u3100' <= c && c <= '\u312F';\r
+\r
+ case Category.UnicodeHangulCompatibilityJamo:\r
+ return '\u3130' <= c && c <= '\u318F';\r
+\r
+ case Category.UnicodeKanbun:\r
+ return '\u3190' <= c && c <= '\u319F';\r
+\r
+ case Category.UnicodeBopomofoExtended:\r
+ return '\u31A0' <= c && c <= '\u31BF';\r
+\r
+ case Category.UnicodeEnclosedCJKLettersandMonths:\r
+ return '\u3200' <= c && c <= '\u32FF';\r
+\r
+ case Category.UnicodeCJKCompatibility:\r
+ return '\u3300' <= c && c <= '\u33FF';\r
+\r
+ case Category.UnicodeCJKUnifiedIdeographsExtensionA:\r
+ return '\u3400' <= c && c <= '\u4DB5';\r
+\r
+ case Category.UnicodeCJKUnifiedIdeographs:\r
+ return '\u4E00' <= c && c <= '\u9FFF';\r
+\r
+ case Category.UnicodeYiSyllables:\r
+ return '\uA000' <= c && c <= '\uA48F';\r
+\r
+ case Category.UnicodeYiRadicals:\r
+ return '\uA490' <= c && c <= '\uA4CF';\r
+\r
+ case Category.UnicodeHangulSyllables:\r
+ return '\uAC00' <= c && c <= '\uD7A3';\r
+\r
+ case Category.UnicodeHighSurrogates:\r
+ return '\uD800' <= c && c <= '\uDB7F';\r
+\r
+ case Category.UnicodeHighPrivateUseSurrogates:\r
+ return '\uDB80' <= c && c <= '\uDBFF';\r
+\r
+ case Category.UnicodeLowSurrogates:\r
+ return '\uDC00' <= c && c <= '\uDFFF';\r
+\r
+ case Category.UnicodePrivateUse:\r
+ return '\uE000' <= c && c <= '\uF8FF';\r
+\r
+ case Category.UnicodeCJKCompatibilityIdeographs:\r
+ return '\uF900' <= c && c <= '\uFAFF';\r
+\r
+ case Category.UnicodeAlphabeticPresentationForms:\r
+ return '\uFB00' <= c && c <= '\uFB4F';\r
+\r
+ case Category.UnicodeArabicPresentationFormsA:\r
+ return '\uFB50' <= c && c <= '\uFDFF';\r
+\r
+ case Category.UnicodeCombiningHalfMarks:\r
+ return '\uFE20' <= c && c <= '\uFE2F';\r
+\r
+ case Category.UnicodeCJKCompatibilityForms:\r
+ return '\uFE30' <= c && c <= '\uFE4F';\r
+\r
+ case Category.UnicodeSmallFormVariants:\r
+ return '\uFE50' <= c && c <= '\uFE6F';\r
+\r
+ case Category.UnicodeArabicPresentationFormsB:\r
+ return '\uFE70' <= c && c <= '\uFEFE';\r
+\r
+ case Category.UnicodeHalfwidthandFullwidthForms:\r
+ return '\uFF00' <= c && c <= '\uFFEF';\r
+\r
+ case Category.UnicodeSpecials:\r
+ return\r
+ '\uFEFF' <= c && c <= '\uFEFF' ||\r
+ '\uFFF0' <= c && c <= '\uFFFD';\r
+\r
+ // these block ranges begin above 0x10000\r
+\r
+ case Category.UnicodeOldItalic:\r
+ case Category.UnicodeGothic:\r
+ case Category.UnicodeDeseret:\r
+ case Category.UnicodeByzantineMusicalSymbols:\r
+ case Category.UnicodeMusicalSymbols:\r
+ case Category.UnicodeMathematicalAlphanumericSymbols:\r
+ case Category.UnicodeCJKUnifiedIdeographsExtensionB:\r
+ case Category.UnicodeCJKCompatibilityIdeographsSupplement:\r
+ case Category.UnicodeTags:\r
+ return false;\r
+\r
+ default:\r
+ return false;\r
+ }\r
+ }\r
+\r
+ private static bool IsCategory (UnicodeCategory uc, char c) {\r
+ if (Char.GetUnicodeCategory (c) == uc)\r
+ return true;\r
+\r
+ return false;\r
+ }\r
+ }\r
+}\r
--- /dev/null
+//\r
+// assembly: System\r
+// namespace: System.Text.RegularExpressions\r
+// file: collections.cs\r
+//\r
+// author: Dan Lewis (dlewis@gmx.co.uk)\r
+// (c) 2002\r
+\r
+using System;\r
+using System.Collections;\r
+\r
+namespace System.Text.RegularExpressions {\r
+ public abstract class RegexCollectionBase : ICollection, IEnumerable {\r
+ public int Count {\r
+ get { return list.Count; }\r
+ }\r
+\r
+ public bool IsReadOnly {\r
+ get { return true; } // FIXME\r
+ }\r
+\r
+ public bool IsSynchronized {\r
+ get { return false; } // FIXME\r
+ }\r
+\r
+ public object SyncRoot {\r
+ get { return list; } // FIXME\r
+ }\r
+\r
+ public void CopyTo (Array array, int index) {\r
+ foreach (Object o in list) {\r
+ if (index > array.Length)\r
+ break;\r
+ \r
+ array.SetValue (o, index ++);\r
+ }\r
+ }\r
+\r
+ public IEnumerator GetEnumerator () {\r
+ return new Enumerator (list);\r
+ }\r
+\r
+ // internal methods\r
+\r
+ internal RegexCollectionBase () {\r
+ list = new ArrayList ();\r
+ }\r
+\r
+ internal void Add (Object o) {\r
+ list.Add (o);\r
+ }\r
+\r
+ // IEnumerator implementation\r
+\r
+ private class Enumerator : IEnumerator {\r
+ public Enumerator (IList list) {\r
+ this.list = list;\r
+ Reset ();\r
+ }\r
+\r
+ public object Current {\r
+ get {\r
+ if (ptr >= list.Count)\r
+ throw new InvalidOperationException ();\r
+\r
+ return list[ptr];\r
+ }\r
+ }\r
+\r
+ public bool MoveNext () {\r
+ if (ptr > list.Count)\r
+ throw new InvalidOperationException ();\r
+ \r
+ return ++ ptr < list.Count;\r
+ }\r
+\r
+ public void Reset () {\r
+ ptr = -1;\r
+ }\r
+\r
+ private IList list;\r
+ private int ptr;\r
+ }\r
+\r
+ // protected fields\r
+\r
+ protected ArrayList list;\r
+ }\r
+\r
+ public class CaptureCollection : RegexCollectionBase, ICollection, IEnumerable {\r
+ public Capture this[int i] {\r
+ get { return (Capture)list[i]; }\r
+ }\r
+\r
+ internal CaptureCollection (Capture cap) {\r
+ while (cap != null) {\r
+ if (cap.IsDefined)\r
+ Add (cap);\r
+\r
+ cap = cap.Previous;\r
+ }\r
+ \r
+ list.Reverse ();\r
+ }\r
+ }\r
+\r
+ public class GroupCollection : RegexCollectionBase, ICollection, IEnumerable {\r
+ public Group this[int i] {\r
+ get { return (Group)list[i]; }\r
+ }\r
+ \r
+ internal GroupCollection () {\r
+ }\r
+ }\r
+\r
+ public class MatchCollection : RegexCollectionBase, ICollection, IEnumerable {\r
+ public Match this[int i] {\r
+ get { return (Match)list[i]; }\r
+ }\r
+\r
+ internal MatchCollection () {\r
+ }\r
+ }\r
+}\r
--- /dev/null
+//\r
+// assembly: System\r
+// namespace: System.Text.RegularExpressions\r
+// file: compiler.cs\r
+//\r
+// author: Dan Lewis (dlewis@gmx.co.uk)\r
+// (c) 2002\r
+\r
+using System;\r
+using System.Collections;\r
+\r
+namespace System.Text.RegularExpressions {\r
+ abstract class LinkRef {\r
+ // empty\r
+ }\r
+ \r
+ interface ICompiler {\r
+ void Reset ();\r
+ IMachineFactory GetMachineFactory ();\r
+\r
+ // instruction emission\r
+\r
+ void EmitFalse ();\r
+ void EmitTrue ();\r
+\r
+ // character matching\r
+\r
+ void EmitCharacter (char c, bool negate, bool ignore, bool reverse);\r
+ void EmitCategory (Category cat, bool negate, bool reverse);\r
+ void EmitRange (char lo, char hi, bool negate, bool ignore, bool reverse);\r
+ void EmitSet (char lo, BitArray set, bool negate, bool ignore, bool reverse);\r
+\r
+ // other operators\r
+\r
+ void EmitString (string str, bool ignore, bool reverse);\r
+ void EmitPosition (Position pos);\r
+ void EmitOpen (int gid);\r
+ void EmitClose (int gid);\r
+ void EmitBalance (int gid, int balance);\r
+ void EmitReference (int gid, bool ignore, bool reverse);\r
+\r
+ // constructs\r
+\r
+ void EmitIfDefined (int gid, LinkRef tail);\r
+ void EmitSub (LinkRef tail);\r
+ void EmitTest (LinkRef yes, LinkRef tail);\r
+ void EmitBranch (LinkRef next);\r
+ void EmitJump (LinkRef target);\r
+ void EmitRepeat (int min, int max, bool lazy, LinkRef until);\r
+ void EmitUntil (LinkRef repeat);\r
+ void EmitIn (LinkRef tail);\r
+ void EmitInfo (int count, int min, int max);\r
+ void EmitFastRepeat (int min, int max, bool lazy, LinkRef tail);\r
+ void EmitAnchor (int offset, LinkRef tail);\r
+\r
+ LinkRef NewLink ();\r
+ void ResolveLink (LinkRef link);\r
+ }\r
+\r
+ class InterpreterFactory : IMachineFactory {\r
+ public InterpreterFactory (ushort[] pattern) {\r
+ this.pattern = pattern;\r
+ }\r
+ \r
+ public IMachine NewInstance () {\r
+ return new Interpreter (pattern);\r
+ }\r
+\r
+ private ushort[] pattern;\r
+ }\r
+\r
+ class PatternCompiler : ICompiler {\r
+ public static ushort EncodeOp (OpCode op, OpFlags flags) {\r
+ return (ushort)((int)op | ((int)flags & 0xff00));\r
+ }\r
+\r
+ public static void DecodeOp (ushort word, out OpCode op, out OpFlags flags) {\r
+ op = (OpCode)(word & 0x00ff);\r
+ flags = (OpFlags)(word & 0xff00);\r
+ }\r
+\r
+ public PatternCompiler () {\r
+ pgm = new ArrayList ();\r
+ }\r
+\r
+ // ICompiler implementation\r
+\r
+ public void Reset () {\r
+ pgm.Clear ();\r
+ }\r
+\r
+ public IMachineFactory GetMachineFactory () {\r
+ ushort[] image = new ushort[pgm.Count];\r
+ pgm.CopyTo (image);\r
+\r
+ return new InterpreterFactory (image);\r
+ }\r
+\r
+ public void EmitFalse () {\r
+ Emit (OpCode.False);\r
+ }\r
+\r
+ public void EmitTrue () {\r
+ Emit (OpCode.True);\r
+ }\r
+\r
+ public void EmitCharacter (char c, bool negate, bool ignore, bool reverse) {\r
+ Emit (OpCode.Character, MakeFlags (negate, ignore, reverse, false));\r
+\r
+ if (ignore)\r
+ c = Char.ToLower (c);\r
+\r
+ Emit ((ushort)c);\r
+ }\r
+\r
+ public void EmitCategory (Category cat, bool negate, bool reverse) {\r
+ Emit (OpCode.Category, MakeFlags (negate, false, reverse, false));\r
+ Emit ((ushort)cat);\r
+ }\r
+\r
+ public void EmitRange (char lo, char hi, bool negate, bool ignore, bool reverse) {\r
+ Emit (OpCode.Range, MakeFlags (negate, ignore, reverse, false));\r
+ Emit ((ushort)lo);\r
+ Emit ((ushort)hi);\r
+ }\r
+\r
+ public void EmitSet (char lo, BitArray set, bool negate, bool ignore, bool reverse) {\r
+ Emit (OpCode.Set, MakeFlags (negate, ignore, reverse, false));\r
+ Emit ((ushort)lo);\r
+\r
+ int len = (set.Length + 0xf) >> 4;\r
+ Emit ((ushort)len);\r
+\r
+ int b = 0;\r
+ while (len -- != 0) {\r
+ ushort word = 0;\r
+ for (int i = 0; i < 16; ++ i) {\r
+ if (b >= set.Length)\r
+ break;\r
+ \r
+ if (set[b ++])\r
+ word |= (ushort)(1 << i);\r
+ }\r
+\r
+ Emit (word);\r
+ }\r
+ }\r
+\r
+ public void EmitString (string str, bool ignore, bool reverse) {\r
+ Emit (OpCode.String, MakeFlags (false, ignore, reverse, false));\r
+ int len = str.Length;\r
+ Emit ((ushort)len);\r
+\r
+ if (ignore)\r
+ str = str.ToLower ();\r
+ \r
+ for (int i = 0; i < len; ++ i)\r
+ Emit ((ushort)str[i]);\r
+ }\r
+\r
+ public void EmitPosition (Position pos) {\r
+ Emit (OpCode.Position, 0);\r
+ Emit ((ushort)pos);\r
+ }\r
+\r
+ public void EmitOpen (int gid) {\r
+ Emit (OpCode.Open);\r
+ Emit ((ushort)gid);\r
+ }\r
+\r
+ public void EmitClose (int gid) {\r
+ Emit (OpCode.Close);\r
+ Emit ((ushort)gid);\r
+ }\r
+\r
+ public void EmitBalance (int gid, int balance) {\r
+ Emit (OpCode.Balance);\r
+ Emit ((ushort)gid);\r
+ Emit ((ushort)balance);\r
+ }\r
+\r
+ public void EmitReference (int gid, bool ignore, bool reverse) {\r
+ Emit (OpCode.Reference, MakeFlags (false, ignore, reverse, false));\r
+ Emit ((ushort)gid);\r
+ }\r
+\r
+ public void EmitIfDefined (int gid, LinkRef tail) {\r
+ BeginLink (tail);\r
+ Emit (OpCode.IfDefined);\r
+ EmitLink (tail);\r
+ Emit ((ushort)gid);\r
+ }\r
+\r
+ public void EmitSub (LinkRef tail) {\r
+ BeginLink (tail);\r
+ Emit (OpCode.Sub);\r
+ EmitLink (tail);\r
+ }\r
+\r
+ public void EmitTest (LinkRef yes, LinkRef tail) {\r
+ BeginLink (yes);\r
+ BeginLink (tail);\r
+ Emit (OpCode.Test);\r
+ EmitLink (yes);\r
+ EmitLink (tail);\r
+ }\r
+\r
+ public void EmitBranch (LinkRef next) {\r
+ BeginLink (next);\r
+ Emit (OpCode.Branch, 0);\r
+ EmitLink (next);\r
+ }\r
+\r
+ public void EmitJump (LinkRef target) {\r
+ BeginLink (target);\r
+ Emit (OpCode.Jump, 0);\r
+ EmitLink (target);\r
+ }\r
+\r
+ public void EmitRepeat (int min, int max, bool lazy, LinkRef until) {\r
+ BeginLink (until);\r
+ Emit (OpCode.Repeat, MakeFlags (false, false, false, lazy));\r
+ EmitLink (until);\r
+ Emit ((ushort)min);\r
+ Emit ((ushort)max);\r
+ }\r
+\r
+ public void EmitUntil (LinkRef repeat) {\r
+ ResolveLink (repeat);\r
+ Emit (OpCode.Until);\r
+ }\r
+\r
+ public void EmitFastRepeat (int min, int max, bool lazy, LinkRef tail) {\r
+ BeginLink (tail);\r
+ Emit (OpCode.FastRepeat, MakeFlags (false, false, false, lazy));\r
+ EmitLink (tail);\r
+ Emit ((ushort)min);\r
+ Emit ((ushort)max);\r
+ }\r
+\r
+ public void EmitIn (LinkRef tail) {\r
+ BeginLink (tail);\r
+ Emit (OpCode.In);\r
+ EmitLink (tail);\r
+ }\r
+\r
+ public void EmitAnchor (int offset, LinkRef tail) {\r
+ BeginLink (tail);\r
+ Emit (OpCode.Anchor);\r
+ EmitLink (tail);\r
+ Emit ((ushort)offset);\r
+ }\r
+\r
+ public void EmitInfo (int count, int min, int max) {\r
+ Emit (OpCode.Info);\r
+ Emit ((ushort)count);\r
+ Emit ((ushort)min);\r
+ Emit ((ushort)max);\r
+ }\r
+\r
+ public LinkRef NewLink () {\r
+ return new PatternLinkStack ();\r
+ }\r
+ \r
+ public void ResolveLink (LinkRef lref) {\r
+ PatternLinkStack stack = (PatternLinkStack)lref;\r
+ \r
+ while (stack.Pop ())\r
+ pgm[stack.OffsetAddress] = (ushort)stack.GetOffset (CurrentAddress);\r
+ }\r
+\r
+ // private members\r
+\r
+ private static OpFlags MakeFlags (bool negate, bool ignore, bool reverse, bool lazy) {\r
+ OpFlags flags = 0;\r
+ if (negate) flags |= OpFlags.Negate;\r
+ if (ignore) flags |= OpFlags.IgnoreCase;\r
+ if (reverse) flags |= OpFlags.RightToLeft;\r
+ if (lazy) flags |= OpFlags.Lazy;\r
+\r
+ return flags;\r
+ }\r
+ \r
+ private void Emit (OpCode op) {\r
+ Emit (op, (OpFlags)0);\r
+ }\r
+\r
+ private void Emit (OpCode op, OpFlags flags) {\r
+ Emit (EncodeOp (op, flags));\r
+ }\r
+\r
+ private void Emit (ushort word) {\r
+ pgm.Add (word);\r
+ }\r
+\r
+ private int CurrentAddress {\r
+ get { return pgm.Count; }\r
+ }\r
+\r
+ private void BeginLink (LinkRef lref) {\r
+ PatternLinkStack stack = (PatternLinkStack)lref;\r
+ stack.BaseAddress = CurrentAddress;\r
+ }\r
+\r
+ private void EmitLink (LinkRef lref) {\r
+ PatternLinkStack stack = (PatternLinkStack)lref;\r
+ stack.OffsetAddress = CurrentAddress;\r
+ Emit ((ushort)0); // placeholder\r
+ stack.Push ();\r
+ }\r
+\r
+ private class PatternLinkStack : LinkStack {\r
+ public PatternLinkStack () {\r
+ }\r
+ \r
+ public int BaseAddress {\r
+ set { link.base_addr = value; }\r
+ }\r
+\r
+ public int OffsetAddress {\r
+ get { return link.offset_addr; }\r
+ set { link.offset_addr = value; }\r
+ }\r
+\r
+ public int GetOffset (int target_addr) {\r
+ return target_addr - link.base_addr;\r
+ }\r
+\r
+ // LinkStack implementation\r
+\r
+ protected override object GetCurrent () { return link; }\r
+ protected override void SetCurrent (object l) { link = (Link)l; }\r
+\r
+ private struct Link {\r
+ public int base_addr;\r
+ public int offset_addr;\r
+ }\r
+\r
+ Link link;\r
+ }\r
+\r
+ private ArrayList pgm;\r
+ }\r
+\r
+ abstract class LinkStack : LinkRef {\r
+ public LinkStack () {\r
+ stack = new Stack ();\r
+ }\r
+\r
+ public void Push () {\r
+ stack.Push (GetCurrent ());\r
+ }\r
+\r
+ public bool Pop () {\r
+ if (stack.Count > 0) {\r
+ SetCurrent (stack.Pop ());\r
+ return true;\r
+ }\r
+\r
+ return false;\r
+ }\r
+\r
+ protected abstract object GetCurrent ();\r
+ protected abstract void SetCurrent (object l);\r
+\r
+ private Stack stack;\r
+ }\r
+}\r
--- /dev/null
+//\r
+// assembly: System\r
+// namespace: System.Text.RegularExpressions\r
+// file: debug.cs\r
+//\r
+// author: Dan Lewis (dlewis@gmx.co.uk)\r
+// (c) 2002\r
+\r
+using System;\r
+using System.Collections;\r
+\r
+namespace System.Text.RegularExpressions {\r
+\r
+ class Disassembler {\r
+ public static void DisassemblePattern (ushort[] image) {\r
+ DisassembleBlock (image, 0, 0);\r
+ }\r
+ \r
+ public static void DisassembleBlock (ushort[] image, int pc, int depth) {\r
+ OpCode op;\r
+ OpFlags flags;\r
+\r
+ for (;;) {\r
+ if (pc >= image.Length)\r
+ return;\r
+ \r
+ PatternCompiler.DecodeOp (image[pc], out op, out flags);\r
+ Console.Write (FormatAddress (pc) + ": "); // address\r
+ Console.Write (new string (' ', depth * 2)); // indent\r
+ Console.Write (DisassembleOp (image, pc)); // instruction\r
+ Console.WriteLine ();\r
+\r
+ int skip;\r
+ switch (op) {\r
+ case OpCode.False: case OpCode.True: case OpCode.Until:\r
+ skip = 1;\r
+ break;\r
+\r
+ case OpCode.Character: case OpCode.Category: case OpCode.Position:\r
+ case OpCode.Open: case OpCode.Close: case OpCode.Reference:\r
+ case OpCode.Sub: case OpCode.Branch: case OpCode.Jump: case OpCode.In:\r
+ skip = 2;\r
+ break;\r
+\r
+ case OpCode.Balance: case OpCode.IfDefined: case OpCode.Range:\r
+ case OpCode.Test: case OpCode.Anchor:\r
+ skip = 3;\r
+ break;\r
+\r
+ case OpCode.Repeat: case OpCode.FastRepeat: case OpCode.Info:\r
+ skip = 4;\r
+ break;\r
+\r
+ case OpCode.String: skip = image[pc + 1] + 2; break;\r
+ case OpCode.Set: skip = image[pc + 2] + 3; break;\r
+\r
+ default:\r
+ skip = 1;\r
+ break;\r
+ }\r
+\r
+ pc += skip;\r
+ }\r
+ }\r
+\r
+ public static string DisassembleOp (ushort[] image, int pc) {\r
+ OpCode op;\r
+ OpFlags flags;\r
+\r
+ PatternCompiler.DecodeOp (image[pc], out op, out flags);\r
+ string str = op.ToString ();\r
+ if (flags != 0)\r
+ str += "[" + flags.ToString ("f") + "]";\r
+\r
+ switch (op) {\r
+ case OpCode.False: case OpCode.True: case OpCode.Until:\r
+ default:\r
+ break;\r
+\r
+ case OpCode.Info:\r
+ str += " " + image[pc + 1];\r
+ str += " (" + image[pc + 2] + ", " + image[pc + 3] + ")";\r
+ break;\r
+ \r
+ case OpCode.Character:\r
+ str += " '" + FormatChar ((char)image[pc + 1]) + "'";\r
+ break;\r
+\r
+ case OpCode.Category:\r
+ str += " /" + (Category)image[pc + 1];\r
+ break;\r
+ \r
+ case OpCode.Range:\r
+ str += " '" + FormatChar ((char)image[pc + 1]) + "', ";\r
+ str += " '" + FormatChar ((char)image[pc + 2]) + "'";\r
+ break;\r
+\r
+ case OpCode.Set:\r
+ str += " " + FormatSet (image, pc + 1);\r
+ break;\r
+\r
+ case OpCode.String:\r
+ str += " '" + ReadString (image, pc + 1) + "'";\r
+ break;\r
+\r
+ case OpCode.Position:\r
+ str += " /" + (Position)image[pc + 1];\r
+ break;\r
+\r
+ case OpCode.Open: case OpCode.Close: case OpCode.Reference:\r
+ str += " " + image[pc + 1];\r
+ break;\r
+\r
+ case OpCode.Balance:\r
+ str += " " + image[pc + 1] + " " + image[pc + 2];\r
+ break;\r
+\r
+ case OpCode.IfDefined: case OpCode.Anchor:\r
+ str += " :" + FormatAddress (pc + image[pc + 1]);\r
+ str += " " + image[pc + 2];\r
+ break;\r
+ \r
+ case OpCode.Sub: case OpCode.Branch: case OpCode.Jump:\r
+ case OpCode.In:\r
+ str += " :" + FormatAddress (pc + image[pc + 1]);\r
+ break;\r
+\r
+ case OpCode.Test:\r
+ str += " :" + FormatAddress (pc + image[pc + 1]);\r
+ str += ", :" + FormatAddress (pc + image[pc + 2]);\r
+ break;\r
+\r
+ case OpCode.Repeat: case OpCode.FastRepeat:\r
+ str += " :" + FormatAddress (pc + image[pc + 1]);\r
+ str += " (" + image[pc + 2] + ", ";\r
+ if (image[pc + 3] == 0xffff)\r
+ str += "Inf";\r
+ else\r
+ str += image[pc + 3];\r
+ str += ")";\r
+ break;\r
+\r
+ }\r
+\r
+ return str;\r
+ }\r
+\r
+ // private static members\r
+ \r
+ private static string ReadString (ushort[] image, int pc) {\r
+ int len = image[pc];\r
+ char[] chars = new char[len];\r
+\r
+ for (int i = 0; i < len; ++ i)\r
+ chars[i] = (char)image[pc + i + 1];\r
+\r
+ return new string (chars);\r
+ }\r
+\r
+ private static string FormatAddress (int pc) {\r
+ return pc.ToString ("x4");\r
+ }\r
+\r
+ private static string FormatSet (ushort[] image, int pc) {\r
+ int lo = image[pc ++];\r
+ int hi = (image[pc ++] << 4) - 1;\r
+\r
+ string str = "[";\r
+\r
+ bool hot = false;\r
+ char a = (char)0, b;\r
+ for (int i = 0; i <= hi; ++ i) {\r
+ bool m = (image[pc + (i >> 4)] & (1 << (i & 0xf))) != 0;\r
+\r
+ if (m & !hot) { // start of range\r
+ a = (char)(lo + i);\r
+ hot = true;\r
+ }\r
+ else if (hot & (!m || i == hi)) { // end of range\r
+ b = (char)(lo + i - 1);\r
+\r
+ str += FormatChar (a);\r
+ if (b != a)\r
+ str += "-" + FormatChar (b);\r
+ \r
+ hot = false;\r
+ }\r
+ }\r
+\r
+ str += "]";\r
+ return str;\r
+ }\r
+\r
+ private static string FormatChar (char c) {\r
+ if (c == '-' || c == ']')\r
+ return "\\" + c;\r
+\r
+ if (Char.IsLetterOrDigit (c) || Char.IsSymbol (c))\r
+ return c.ToString ();\r
+ \r
+ if (Char.IsControl (c)) {\r
+ return "^" + (char)('@' + c);\r
+ }\r
+\r
+ return "\\u" + ((int)c).ToString ("x4");\r
+ }\r
+ }\r
+}\r
--- /dev/null
+//\r
+// assembly: System\r
+// namespace: System.Text.RegularExpressions\r
+// file: interpreter.cs\r
+//\r
+// author: Dan Lewis (dlewis@gmx.co.uk)\r
+// (c) 2002\r
+\r
+using System;\r
+using System.Collections;\r
+using System.Globalization;\r
+\r
+namespace System.Text.RegularExpressions {\r
+\r
+ class Interpreter : IMachine {\r
+ public Interpreter (ushort[] program) {\r
+ this.program = program;\r
+ this.checkpoints = new Stack ();\r
+ this.qs = null;\r
+\r
+ // process info block\r
+\r
+ if ((OpCode)program[0] != OpCode.Info)\r
+ throw NewInterpretException ("Can't find info block.");\r
+\r
+ this.group_count = program[1] + 1;\r
+ this.match_min = program[2];\r
+ this.match_max = program[3];\r
+\r
+ // setup\r
+\r
+ this.captures = new Capture[group_count];\r
+ this.program_start = 4;\r
+ }\r
+\r
+ // IMachine implementation\r
+\r
+ public Match Scan (Regex regex, string text, int start, int end) {\r
+ this.text = text;\r
+ this.text_end = end;\r
+ this.scan_ptr = start;\r
+\r
+ if (Eval (Mode.Match, ref scan_ptr, program_start))\r
+ return new Match (regex, this, end, captures);\r
+\r
+ return Match.Empty;\r
+ }\r
+\r
+ // private methods\r
+\r
+ private void Reset () {\r
+ for (int i = 0; i < group_count; ++ i)\r
+ captures[i] = new Capture (text);\r
+ \r
+ checkpoints.Clear ();\r
+ checkpoint = 0;\r
+ fast = repeat = null;\r
+ }\r
+\r
+ private bool Eval (Mode mode, ref int ref_ptr, int pc) {\r
+ int ptr = ref_ptr;\r
+ Begin:\r
+ for (;;) {\r
+ ushort word = program[pc];\r
+ OpCode op = (OpCode)(word & 0x00ff);\r
+ OpFlags flags = (OpFlags)(word & 0xff00);\r
+\r
+ switch (op) {\r
+ case OpCode.Anchor: {\r
+ int skip = program[pc + 1];\r
+\r
+ int anch_offset = program[pc + 2];\r
+ int anch_ptr = ptr + anch_offset;\r
+ int anch_end = text_end - match_min + anch_offset; // maximum anchor position\r
+\r
+ // the general case for an anchoring expression is at the bottom, however we\r
+ // do some checks for the common cases before to save processing time. the current\r
+ // optimizer only outputs three types of anchoring expressions: fixed position,\r
+ // fixed substring, and no anchor.\r
+\r
+ OpCode anch_op = (OpCode)(program[pc + 3] & 0x00ff);\r
+ if (anch_op == OpCode.Position && skip == 6) { // position anchor\r
+ // Anchor\r
+ // Position\r
+ // True\r
+\r
+ switch ((Position)program[pc + 4]) {\r
+ case Position.StartOfString:\r
+ if (anch_ptr == 0) {\r
+ ptr = 0;\r
+ if (TryMatch (ref ptr, pc + skip))\r
+ goto Pass;\r
+ }\r
+ break;\r
+ \r
+ case Position.StartOfLine:\r
+ if (anch_ptr == 0) {\r
+ ptr = 0;\r
+ if (TryMatch (ref ptr, pc + skip))\r
+ goto Pass;\r
+\r
+ ++ anch_ptr;\r
+ }\r
+\r
+ while (anch_ptr <= anch_end) {\r
+ if (text[anch_ptr - 1] == '\n') {\r
+ ptr = anch_ptr - anch_offset;\r
+ if (TryMatch (ref ptr, pc + skip))\r
+ goto Pass;\r
+ }\r
+\r
+ ++ anch_ptr;\r
+ }\r
+ break;\r
+ \r
+ case Position.StartOfScan:\r
+ if (anch_ptr == scan_ptr) {\r
+ ptr = scan_ptr - anch_offset;\r
+ if (TryMatch (ref ptr, pc + skip))\r
+ goto Pass;\r
+ }\r
+ break;\r
+\r
+ default:\r
+ // FIXME\r
+ break;\r
+ }\r
+ }\r
+ else if (qs != null ||\r
+ (anch_op == OpCode.String && skip == 6 + program[pc + 4])) { // substring anchor\r
+ // Anchor\r
+ // String\r
+ // True\r
+\r
+ if (qs == null) {\r
+ bool ignore = ((OpFlags)program[pc + 3] & OpFlags.IgnoreCase) != 0;\r
+ string substring = GetString (pc + 3);\r
+\r
+ qs = new QuickSearch (substring, ignore);\r
+ }\r
+\r
+ while (anch_ptr <= anch_end) {\r
+ anch_ptr = qs.Search (text, anch_ptr, anch_end);\r
+ if (anch_ptr < 0)\r
+ break;\r
+\r
+ ptr = anch_ptr - anch_offset;\r
+ if (TryMatch (ref ptr, pc + skip))\r
+ goto Pass;\r
+\r
+ ++ anch_ptr;\r
+ }\r
+ }\r
+ else if (anch_op == OpCode.True) { // no anchor\r
+ // Anchor\r
+ // True\r
+\r
+ while (anch_ptr <= anch_end) {\r
+ ptr = anch_ptr;\r
+ if (TryMatch (ref ptr, pc + skip))\r
+ goto Pass;\r
+\r
+ ++ anch_ptr;\r
+ }\r
+ }\r
+ else { // general case\r
+ // Anchor\r
+ // <expr>\r
+ // True\r
+\r
+ while (anch_ptr <= anch_end) {\r
+ ptr = anch_ptr;\r
+ if (Eval (Mode.Match, ref ptr, pc + 3)) {\r
+ // anchor expression passed: try real expression at the correct offset\r
+\r
+ ptr = anch_ptr - anch_offset;\r
+ if (TryMatch (ref ptr, pc + skip))\r
+ goto Pass;\r
+ }\r
+\r
+ ++ anch_ptr;\r
+ }\r
+ }\r
+\r
+ goto Fail;\r
+ }\r
+ \r
+ case OpCode.False: {\r
+ goto Fail;\r
+ }\r
+\r
+ case OpCode.True: {\r
+ goto Pass;\r
+ }\r
+\r
+ case OpCode.Position: {\r
+ if (!IsPosition ((Position)program[pc + 1], ptr))\r
+ goto Fail;\r
+ pc += 2;\r
+ break;\r
+ }\r
+\r
+ case OpCode.String: {\r
+ bool reverse = (flags & OpFlags.RightToLeft) != 0;\r
+ bool ignore = (flags & OpFlags.IgnoreCase) != 0;\r
+ int len = program[pc + 1];\r
+\r
+ if (reverse) {\r
+ ptr -= len;\r
+ if (ptr < 0)\r
+ goto Fail;\r
+ }\r
+ else if (ptr + len > text_end)\r
+ goto Fail;\r
+\r
+ pc += 2;\r
+ for (int i = 0; i < len; ++ i) {\r
+ char c = text[ptr + i];\r
+ if (ignore)\r
+ c = Char.ToLower (c);\r
+\r
+ if (c != (char)program[pc ++])\r
+ goto Fail;\r
+ }\r
+\r
+ if (!reverse)\r
+ ptr += len;\r
+ break;\r
+ }\r
+\r
+ case OpCode.Reference: {\r
+ bool reverse = (flags & OpFlags.RightToLeft) != 0;\r
+ bool ignore = (flags & OpFlags.IgnoreCase) != 0;\r
+ Capture cap = captures[program[pc + 1]].GetLastDefined ();\r
+ if (cap == null)\r
+ goto Fail;\r
+\r
+ int str = cap.Index;\r
+ int len = cap.Length;\r
+\r
+ if (reverse) {\r
+ ptr -= len;\r
+ if (ptr < 0)\r
+ goto Fail;\r
+ }\r
+ else if (ptr + len > text_end)\r
+ goto Fail;\r
+\r
+ pc += 2;\r
+ for (int i = 0; i < len; ++ i) {\r
+ if (ignore) {\r
+ if (Char.ToLower (text[ptr + i]) != Char.ToLower (text[str + i]))\r
+ goto Fail;\r
+ }\r
+ else {\r
+ if (text[ptr + i] != text[str + i])\r
+ goto Fail;\r
+ }\r
+ }\r
+\r
+ if (!reverse)\r
+ ptr += len;\r
+ break;\r
+ }\r
+\r
+ case OpCode.Character: case OpCode.Category:\r
+ case OpCode.Range: case OpCode.Set: {\r
+ if (!EvalChar (mode, ref ptr, ref pc, false))\r
+ goto Fail;\r
+ break;\r
+ }\r
+\r
+ case OpCode.In: {\r
+ int target = pc + program[pc + 1];\r
+ pc += 2;\r
+ if (!EvalChar (mode, ref ptr, ref pc, true))\r
+ goto Fail;\r
+\r
+ pc = target;\r
+ break;\r
+ }\r
+\r
+ case OpCode.Open: {\r
+ Open (program[pc + 1], ptr);\r
+ pc += 2;\r
+ break;\r
+ }\r
+\r
+ case OpCode.Close: {\r
+ Close (program[pc + 1], ptr);\r
+ pc += 2;\r
+ break;\r
+ }\r
+\r
+ case OpCode.Balance: {\r
+ Balance (program[pc + 1], program[pc + 2], ptr);\r
+ break;\r
+ }\r
+\r
+ case OpCode.IfDefined: {\r
+ Capture cap = captures[program[pc + 2]];\r
+ if (cap.GetLastDefined () == null)\r
+ pc += program[pc + 1];\r
+ else\r
+ pc += 3;\r
+ break;\r
+ }\r
+\r
+ case OpCode.Sub: {\r
+ if (!Eval (Mode.Match, ref ptr, pc + 2))\r
+ goto Fail;\r
+\r
+ pc += program[pc + 1];\r
+ break;\r
+ }\r
+\r
+ case OpCode.Test: {\r
+ int cp = Checkpoint ();\r
+ int test_ptr = ptr;\r
+ if (Eval (Mode.Match, ref test_ptr, pc + 3))\r
+ pc += program[pc + 1];\r
+ else {\r
+ Backtrack (cp);\r
+ pc += program[pc + 2];\r
+ }\r
+ break;\r
+ }\r
+\r
+ case OpCode.Branch: {\r
+ OpCode branch_op;\r
+ do {\r
+ int cp = Checkpoint ();\r
+ if (Eval (mode, ref ptr, pc + 2))\r
+ goto Pass;\r
+ \r
+ Backtrack (cp);\r
+ \r
+ pc += program[pc + 1];\r
+ branch_op = (OpCode)(program[pc] & 0xff);\r
+ } while (branch_op != OpCode.False);\r
+\r
+ goto Fail;\r
+ }\r
+\r
+ case OpCode.Jump: {\r
+ pc += program[pc + 1];\r
+ break;\r
+ }\r
+\r
+ case OpCode.Repeat: {\r
+ this.repeat = new RepeatContext (\r
+ this.repeat, // previous context\r
+ program[pc + 2], // minimum\r
+ program[pc + 3], // maximum\r
+ (flags & OpFlags.Lazy) != 0, // lazy\r
+ pc + 4 // subexpression\r
+ );\r
+\r
+ if (Eval (Mode.Match, ref ptr, pc + program[pc + 1]))\r
+ goto Pass;\r
+ else {\r
+ this.repeat = this.repeat.Previous;\r
+ goto Fail;\r
+ }\r
+ }\r
+\r
+ case OpCode.Until: {\r
+ RepeatContext current = this.repeat;\r
+ int start = current.Start;\r
+\r
+ if (!current.IsMinimum) {\r
+ ++ current.Count;\r
+ current.Start = ptr;\r
+ if (Eval (Mode.Match, ref ptr, repeat.Expression))\r
+ goto Pass;\r
+\r
+ current.Start = start;\r
+ -- current.Count;\r
+ goto Fail;\r
+ }\r
+\r
+ if (ptr == current.Start) {\r
+ // degenerate match ... match tail or fail\r
+\r
+ this.repeat = current.Previous;\r
+ if (Eval (Mode.Match, ref ptr, pc + 1))\r
+ goto Pass;\r
+ \r
+ goto Fail;\r
+ }\r
+\r
+ if (current.IsLazy) {\r
+ // match tail first ...\r
+\r
+ this.repeat = current.Previous;\r
+ int cp = Checkpoint ();\r
+ if (Eval (Mode.Match, ref ptr, pc + 1))\r
+ goto Pass;\r
+\r
+ Backtrack (cp);\r
+\r
+ // ... then match more\r
+\r
+ this.repeat = current;\r
+ if (!current.IsMaximum) {\r
+ ++ current.Count;\r
+ current.Start = ptr;\r
+ if (Eval (Mode.Match, ref ptr, current.Expression))\r
+ goto Pass;\r
+\r
+ current.Start = start;\r
+ -- current.Count;\r
+ goto Fail;\r
+ }\r
+\r
+ return false;\r
+ }\r
+ else {\r
+ // match more first ...\r
+\r
+ if (!current.IsMaximum) {\r
+ int cp = Checkpoint ();\r
+ ++ current.Count;\r
+ current.Start = ptr;\r
+ if (Eval (Mode.Match, ref ptr, current.Expression))\r
+ goto Pass;\r
+\r
+ current.Start = start;\r
+ -- current.Count;\r
+ Backtrack (cp);\r
+ }\r
+\r
+ // ... then match tail\r
+\r
+ this.repeat = current.Previous;\r
+ if (Eval (Mode.Match, ref ptr, pc + 1))\r
+ goto Pass;\r
+\r
+ this.repeat = current;\r
+ goto Fail;\r
+ }\r
+ }\r
+\r
+ case OpCode.FastRepeat: {\r
+ this.fast = new RepeatContext (\r
+ fast,\r
+ program[pc + 2], // minimum\r
+ program[pc + 3], // maximum\r
+ (flags & OpFlags.Lazy) != 0, // lazy\r
+ pc + 4 // subexpression\r
+ );\r
+ fast.Start = ptr;\r
+\r
+ int cp = Checkpoint ();\r
+\r
+ pc += program[pc + 1]; // tail expression\r
+ ushort tail_word = program[pc];\r
+\r
+ int c1, c2; // first character of tail operator\r
+ int coff; // 0 or -1 depending on direction\r
+\r
+ OpCode tail_op = (OpCode)(tail_word & 0xff);\r
+ if (tail_op == OpCode.Character || tail_op == OpCode.String) {\r
+ OpFlags tail_flags = (OpFlags)(tail_word & 0xff00);\r
+\r
+ if (tail_op == OpCode.String)\r
+ c1 = program[pc + 2]; // first char of string\r
+ else\r
+ c1 = program[pc + 1]; // character\r
+ \r
+ if ((tail_flags & OpFlags.IgnoreCase) != 0)\r
+ c2 = Char.ToUpper ((char)c1); // ignore case\r
+ else\r
+ c2 = c1;\r
+\r
+ if ((tail_flags & OpFlags.RightToLeft) != 0)\r
+ coff = -1; // reverse\r
+ else\r
+ coff = 0;\r
+ }\r
+ else {\r
+ c1 = c2 = -1;\r
+ coff = 0;\r
+ }\r
+\r
+ if (fast.IsLazy) {\r
+ if (!fast.IsMinimum && !Eval (Mode.Count, ref ptr, fast.Expression)) {\r
+ //Console.WriteLine ("lazy fast: failed mininum.");\r
+ fast = fast.Previous;\r
+ goto Fail;\r
+ }\r
+ \r
+ while (true) {\r
+ int p = ptr + coff;\r
+ if ((c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) &&\r
+ Eval (Mode.Match, ref ptr, pc))\r
+ break;\r
+\r
+ if (fast.IsMaximum) {\r
+ //Console.WriteLine ("lazy fast: failed with maximum.");\r
+ fast = fast.Previous;\r
+ goto Fail;\r
+ }\r
+\r
+ Backtrack (cp);\r
+ if (!Eval (Mode.Count, ref ptr, fast.Expression)) {\r
+ //Console.WriteLine ("lazy fast: no more.");\r
+ fast = fast.Previous;\r
+ goto Fail;\r
+ }\r
+ }\r
+ fast = fast.Previous;\r
+ goto Pass;\r
+ }\r
+ else {\r
+ if (!Eval (Mode.Count, ref ptr, fast.Expression)) {\r
+ fast = fast.Previous;\r
+ goto Fail;\r
+ }\r
+ \r
+ int width;\r
+ if (fast.Count > 0)\r
+ width = (ptr - fast.Start) / fast.Count;\r
+ else\r
+ width = 0;\r
+\r
+ while (true) {\r
+ int p = ptr + coff;\r
+ if ((c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) &&\r
+ Eval (Mode.Match, ref ptr, pc))\r
+ break;\r
+\r
+ -- fast.Count;\r
+ if (!fast.IsMinimum) {\r
+ fast = fast.Previous;\r
+ goto Fail;\r
+ }\r
+\r
+ ptr -= width;\r
+ Backtrack (cp);\r
+ }\r
+ fast = fast.Previous;\r
+ goto Pass;\r
+ }\r
+ }\r
+\r
+ case OpCode.Info: {\r
+ throw NewInterpretException ("Info block found in pattern.");\r
+ }\r
+ }\r
+ }\r
+ Pass:\r
+ ref_ptr = ptr;\r
+\r
+ switch (mode) {\r
+ case Mode.Match:\r
+ return true;\r
+\r
+ case Mode.Count: {\r
+ ++ fast.Count;\r
+ if (fast.IsMaximum || (fast.IsLazy && fast.IsMinimum))\r
+ return true;\r
+\r
+ pc = fast.Expression;\r
+ goto Begin;\r
+ }\r
+ }\r
+\r
+ Fail:\r
+ switch (mode) {\r
+ case Mode.Match:\r
+ return false;\r
+\r
+ case Mode.Count: {\r
+ if (!fast.IsLazy && fast.IsMinimum)\r
+ return true;\r
+\r
+ ref_ptr = fast.Start;\r
+ return false;\r
+ }\r
+ }\r
+\r
+ return false;\r
+ }\r
+\r
+ private bool EvalChar (Mode mode, ref int ptr, ref int pc, bool multi) {\r
+ bool consumed = false;\r
+ char c = '\0';\r
+ bool negate;\r
+ bool ignore;\r
+ do {\r
+ ushort word = program[pc];\r
+ OpCode op = (OpCode)(word & 0x00ff);\r
+ OpFlags flags = (OpFlags)(word & 0xff00);\r
+\r
+ ++ pc;\r
+\r
+ ignore = (flags & OpFlags.IgnoreCase) != 0;\r
+ \r
+ // consume character: the direction of an In construct is\r
+ // determined by the direction of its first op\r
+\r
+ if (!consumed) {\r
+ if ((flags & OpFlags.RightToLeft) != 0) {\r
+ if (ptr <= 0)\r
+ return false;\r
+\r
+ c = text[-- ptr];\r
+ }\r
+ else {\r
+ if (ptr >= text_end)\r
+ return false;\r
+\r
+ c = text[ptr ++];\r
+ }\r
+\r
+ if (ignore)\r
+ c = Char.ToLower (c);\r
+\r
+ consumed = true;\r
+ }\r
+\r
+ // negate flag\r
+\r
+ negate = (flags & OpFlags.Negate) != 0;\r
+\r
+ // execute op\r
+ \r
+ switch (op) {\r
+ case OpCode.True:\r
+ return true;\r
+\r
+ case OpCode.False:\r
+ return false;\r
+ \r
+ case OpCode.Character: {\r
+ if (c == (char)program[pc ++])\r
+ return !negate;\r
+ break;\r
+ }\r
+\r
+ case OpCode.Category: {\r
+ if (CategoryUtils.IsCategory ((Category)program[pc ++], c))\r
+ return !negate;\r
+\r
+ break;\r
+ }\r
+ \r
+ case OpCode.Range: {\r
+ int lo = (char)program[pc ++];\r
+ int hi = (char)program[pc ++];\r
+ if (lo <= c && c <= hi)\r
+ return !negate;\r
+ break;\r
+ }\r
+\r
+ case OpCode.Set: {\r
+ int lo = (char)program[pc ++];\r
+ int len = (char)program[pc ++];\r
+ int bits = pc;\r
+ pc += len;\r
+\r
+ int i = (int)c - lo;\r
+ if (i < 0 || i >= len << 4)\r
+ break;\r
+\r
+ if ((program[bits + (i >> 4)] & (1 << (i & 0xf))) != 0)\r
+ return !negate;\r
+ break;\r
+ }\r
+ }\r
+ } while (multi);\r
+\r
+ return negate;\r
+ }\r
+\r
+ private bool TryMatch (ref int ref_ptr, int pc) {\r
+ Reset ();\r
+ \r
+ int ptr = ref_ptr;\r
+ captures[0].Open (ptr);\r
+ if (Eval (Mode.Match, ref ptr, pc)) {\r
+ captures[0].Close (ptr);\r
+ ref_ptr = ptr;\r
+ return true;\r
+ }\r
+\r
+ return false;\r
+ }\r
+ \r
+ private bool IsPosition (Position pos, int ptr) {\r
+ switch (pos) {\r
+ case Position.Start: case Position.StartOfString:\r
+ return ptr == 0;\r
+\r
+ case Position.StartOfLine:\r
+ return ptr == 0 || text[ptr - 1] == '\n';\r
+ \r
+ case Position.StartOfScan:\r
+ return ptr == scan_ptr;\r
+ \r
+ case Position.End:\r
+ return ptr == text_end ||\r
+ (ptr == text_end - 1 && text[ptr] == '\n');\r
+\r
+ case Position.EndOfLine:\r
+ return ptr == text_end || text[ptr] == '\n';\r
+ \r
+ case Position.EndOfString:\r
+ return ptr == text_end;\r
+ \r
+ case Position.Boundary:\r
+ if (text_end == 0)\r
+ return false;\r
+\r
+ if (ptr == 0)\r
+ return IsWordChar (text[ptr]);\r
+ else if (ptr == text_end)\r
+ return IsWordChar (text[ptr - 1]);\r
+ else\r
+ return IsWordChar (text[ptr]) != IsWordChar (text[ptr - 1]);\r
+\r
+ case Position.NonBoundary:\r
+ if (text_end == 0)\r
+ return false;\r
+\r
+ if (ptr == 0)\r
+ return !IsWordChar (text[ptr]);\r
+ else if (ptr == text_end)\r
+ return !IsWordChar (text[ptr - 1]);\r
+ else\r
+ return IsWordChar (text[ptr]) == IsWordChar (text[ptr - 1]);\r
+ \r
+ default:\r
+ return false;\r
+ }\r
+ }\r
+\r
+ private bool IsWordChar (char c) {\r
+ return CategoryUtils.IsCategory (Category.Word, c);\r
+ }\r
+\r
+ private string GetString (int pc) {\r
+ int len = program[pc + 1];\r
+ int str = pc + 2;\r
+\r
+ char[] cs = new char[len];\r
+ for (int i = 0; i < len; ++ i)\r
+ cs[i] = (char)program[str ++];\r
+\r
+ return new string (cs);\r
+ }\r
+\r
+ // capture management\r
+\r
+ private void Open (int gid, int ptr) {\r
+ Capture cap = captures[gid];\r
+ if (cap.IsDefined || cap.Checkpoint < checkpoint) {\r
+ cap = new Capture (cap, checkpoint);\r
+ captures[gid] = cap;\r
+ }\r
+\r
+ cap.Open (ptr);\r
+ }\r
+\r
+ private void Close (int gid, int ptr) {\r
+ captures[gid].Close (ptr);\r
+ }\r
+\r
+ private void Balance (int gid, int balance_gid, int ptr) {\r
+ Capture balance = captures[balance_gid];\r
+ if (!balance.IsDefined)\r
+ throw NewInterpretException ("Invalid state - balancing group not closed.");\r
+\r
+ if (gid > 0) {\r
+ Open (gid, balance.Index + balance.Length);\r
+ Close (gid, ptr);\r
+ }\r
+\r
+ captures[balance_gid] = balance.Previous;\r
+ }\r
+\r
+ private int Checkpoint () {\r
+ checkpoints.Push (captures);\r
+ captures = (Capture[])captures.Clone ();\r
+ checkpoint = checkpoints.Count;\r
+\r
+ return checkpoint;\r
+ }\r
+\r
+ private void Backtrack (int cp) {\r
+ if (cp > checkpoints.Count)\r
+ throw NewInterpretException ("Can't backtrack forwards");\r
+\r
+ while (checkpoints.Count > cp)\r
+ checkpoints.Pop ();\r
+\r
+ captures = (Capture[])checkpoints.Peek ();\r
+ checkpoint = cp;\r
+\r
+ // TODO optimize this\r
+ }\r
+\r
+ private Exception NewInterpretException (string msg) {\r
+ return new ApplicationException (msg);\r
+ }\r
+\r
+ // interpreter attributes\r
+\r
+ private ushort[] program; // regex program\r
+ private int program_start; // first instruction after info block\r
+ private string text; // input text\r
+ private int text_end; // end of input text (last character + 1)\r
+ private int group_count; // number of capturing groups\r
+ private int match_min, match_max; // match width information\r
+ private QuickSearch qs; // fast substring matcher\r
+\r
+ // match state\r
+ \r
+ private int scan_ptr; // start of scan\r
+\r
+ private Capture[] captures; // current captures\r
+\r
+ private int checkpoint; // last checkpoint\r
+ private Stack checkpoints; // checkpointed captures\r
+ \r
+ private RepeatContext repeat; // current repeat context\r
+ private RepeatContext fast; // fast repeat context\r
+\r
+ // private classes\r
+\r
+ private class RepeatContext {\r
+ public RepeatContext (RepeatContext previous, int min, int max, bool lazy, int expr_pc) {\r
+ this.previous = previous;\r
+ this.min = min;\r
+ this.max = max;\r
+ this.lazy = lazy;\r
+ this.expr_pc = expr_pc;\r
+ \r
+ this.start = -1;\r
+ this.count = 0;\r
+ }\r
+\r
+ public int Count {\r
+ get { return count; }\r
+ set { count = value; }\r
+ }\r
+\r
+ public int Start {\r
+ get { return start; }\r
+ set { start = value; }\r
+ }\r
+\r
+ public bool IsMinimum {\r
+ get { return min <= count; }\r
+ }\r
+\r
+ public bool IsMaximum {\r
+ get { return max <= count; }\r
+ }\r
+\r
+ public bool IsLazy {\r
+ get { return lazy; }\r
+ }\r
+\r
+ public int Expression {\r
+ get { return expr_pc; }\r
+ }\r
+\r
+ public RepeatContext Previous {\r
+ get { return previous; }\r
+ }\r
+ \r
+ private int start;\r
+ private int min, max;\r
+ private bool lazy;\r
+ private int expr_pc;\r
+ private RepeatContext previous;\r
+\r
+ private int count;\r
+ }\r
+\r
+ private enum Mode {\r
+ Search,\r
+ Match,\r
+ Count\r
+ }\r
+ }\r
+}\r
--- /dev/null
+//\r
+// assembly: System\r
+// namespace: System.Text.RegularExpressions\r
+// file: interval.cs\r
+//\r
+// author: Dan Lewis (dlewis@gmx.co.uk)\r
+// (c) 2002\r
+\r
+using System;\r
+using System.Collections;\r
+\r
+namespace System.Text.RegularExpressions {\r
+\r
+ struct Interval : IComparable {\r
+ public int low;\r
+ public int high;\r
+ public bool contiguous;\r
+\r
+ public static Interval Empty {\r
+ get {\r
+ Interval i;\r
+ i.low = 0;\r
+ i.high = i.low - 1;\r
+ i.contiguous = true;\r
+\r
+ return i;\r
+ }\r
+ }\r
+\r
+ public static Interval Entire {\r
+ get { return new Interval (Int32.MinValue, Int32.MaxValue); }\r
+ }\r
+\r
+ public Interval (int low, int high) {\r
+ if (low > high) {\r
+ int t = low;\r
+ low = high;\r
+ high = t;\r
+ }\r
+ \r
+ this.low = low;\r
+ this.high = high;\r
+ this.contiguous = true;\r
+ }\r
+\r
+ public bool IsDiscontiguous {\r
+ get { return !contiguous; }\r
+ }\r
+ \r
+ public bool IsSingleton {\r
+ get { return contiguous && low == high; }\r
+ }\r
+\r
+ public bool IsRange {\r
+ get { return !IsSingleton && !IsEmpty; }\r
+ }\r
+\r
+ public bool IsEmpty {\r
+ get { return low > high; }\r
+ }\r
+\r
+ public int Size {\r
+ get {\r
+ if (IsEmpty)\r
+ return 0;\r
+ \r
+ return high - low + 1;\r
+ }\r
+ }\r
+\r
+ public bool IsDisjoint (Interval i) {\r
+ if (IsEmpty || i.IsEmpty)\r
+ return true;\r
+ \r
+ return !(low <= i.high && i.low <= high);\r
+ }\r
+\r
+ public bool IsAdjacent (Interval i) {\r
+ if (IsEmpty || i.IsEmpty)\r
+ return false;\r
+ \r
+ return low == i.high + 1 || high == i.low - 1;\r
+ }\r
+\r
+ public bool Contains (Interval i) {\r
+ if (!IsEmpty && i.IsEmpty)\r
+ return true;\r
+ if (IsEmpty)\r
+ return false;\r
+ \r
+ return low <= i.low && i.high <= high;\r
+ }\r
+\r
+ public bool Contains (int i) {\r
+ return low <= i && i <= high;\r
+ }\r
+\r
+ public void Merge (Interval i) {\r
+ if (i.IsEmpty)\r
+ return;\r
+ if (IsEmpty) {\r
+ this.low = i.low;\r
+ this.high = i.high;\r
+ }\r
+ \r
+ if (i.low < low)\r
+ low = i.low;\r
+ if (i.high > high)\r
+ high = i.high;\r
+ }\r
+\r
+ public void Intersect (Interval i) {\r
+ if (IsDisjoint (i)) {\r
+ low = 0;\r
+ high = low - 1;\r
+ return;\r
+ }\r
+ \r
+ if (i.low > low)\r
+ low = i.low;\r
+ if (i.high > high)\r
+ high = i.high;\r
+ }\r
+\r
+ public int CompareTo (object o) {\r
+ return low - ((Interval)o).low;\r
+ }\r
+\r
+ public new string ToString () {\r
+ if (IsEmpty)\r
+ return "(EMPTY)";\r
+ else if (!contiguous)\r
+ return "{" + low + ", " + high + "}";\r
+ else if (IsSingleton)\r
+ return "(" + low + ")";\r
+ else\r
+ return "(" + low + ", " + high + ")";\r
+ }\r
+ }\r
+\r
+ class IntervalCollection : ICollection, IEnumerable {\r
+ public IntervalCollection () {\r
+ intervals = new ArrayList ();\r
+ }\r
+\r
+ public Interval this[int i] {\r
+ get { return (Interval)intervals[i]; }\r
+ set { intervals[i] = value; }\r
+ }\r
+\r
+ public void Add (Interval i) {\r
+ intervals.Add (i);\r
+ }\r
+ \r
+ public void Clear () {\r
+ intervals.Clear ();\r
+ }\r
+\r
+ public void Sort () {\r
+ intervals.Sort ();\r
+ }\r
+ \r
+ public void Normalize () {\r
+ intervals.Sort ();\r
+\r
+ int j = 0;\r
+ while (j < intervals.Count - 1) {\r
+ Interval a = (Interval)intervals[j];\r
+ Interval b = (Interval)intervals[j + 1];\r
+\r
+ if (!a.IsDisjoint (b) || a.IsAdjacent (b)) {\r
+ a.Merge (b);\r
+ intervals[j] = a;\r
+ intervals.RemoveAt (j + 1);\r
+ }\r
+ else\r
+ ++ j;\r
+ }\r
+\r
+ }\r
+\r
+ public delegate double CostDelegate (Interval i);\r
+\r
+ public IntervalCollection GetMetaCollection (CostDelegate cost_del) {\r
+ IntervalCollection meta = new IntervalCollection ();\r
+ \r
+ Normalize ();\r
+ Optimize (0, Count - 1, meta, cost_del);\r
+ meta.intervals.Sort ();\r
+\r
+ return meta;\r
+ }\r
+\r
+ private void Optimize (int begin, int end, IntervalCollection meta, CostDelegate cost_del) {\r
+ Interval set;\r
+ set.contiguous = false;\r
+ \r
+ int best_set_begin = -1;\r
+ int best_set_end = -1;\r
+ double best_set_cost = 0;\r
+\r
+ for (int i = begin; i <= end; ++ i) {\r
+ set.low = this[i].low;\r
+\r
+ double cost = 0.0;\r
+ for (int j = i; j <= end; ++ j) {\r
+ set.high = this[j].high;\r
+ cost += cost_del (this[j]);\r
+ \r
+ double set_cost = cost_del (set);\r
+ if (set_cost < cost && cost > best_set_cost) {\r
+ best_set_begin = i;\r
+ best_set_end = j;\r
+ best_set_cost = cost;\r
+ }\r
+ }\r
+ }\r
+\r
+ if (best_set_begin < 0) {\r
+ // didn't find an optimal set: add original members\r
+\r
+ for (int i = begin; i <= end; ++ i)\r
+ meta.Add (this[i]);\r
+ }\r
+ else {\r
+ // found set: add it ...\r
+\r
+ set.low = this[best_set_begin].low;\r
+ set.high = this[best_set_end].high;\r
+ \r
+ meta.Add (set);\r
+\r
+ // ... and optimize to the left and right\r
+\r
+ if (best_set_begin > begin)\r
+ Optimize (begin, best_set_begin - 1, meta, cost_del);\r
+ if (best_set_end < end)\r
+ Optimize (best_set_end + 1, end, meta, cost_del);\r
+ }\r
+ }\r
+\r
+ // ICollection implementation\r
+\r
+ public int Count {\r
+ get { return intervals.Count; }\r
+ }\r
+\r
+ public bool IsSynchronized {\r
+ get { return false; }\r
+ }\r
+\r
+ public object SyncRoot {\r
+ get { return intervals; }\r
+ }\r
+\r
+ public void CopyTo (Array array, int index) {\r
+ foreach (Interval i in intervals) {\r
+ if (index > array.Length)\r
+ break;\r
+ \r
+ array.SetValue (i, index ++);\r
+ }\r
+ }\r
+\r
+ // IEnumerator implementation\r
+\r
+ public IEnumerator GetEnumerator () {\r
+ return new Enumerator (intervals);\r
+ }\r
+\r
+ private class Enumerator : IEnumerator {\r
+ public Enumerator (IList list) {\r
+ this.list = list;\r
+ Reset ();\r
+ }\r
+\r
+ public object Current {\r
+ get {\r
+ if (ptr >= list.Count)\r
+ throw new InvalidOperationException ();\r
+\r
+ return list[ptr];\r
+ }\r
+ }\r
+\r
+ public bool MoveNext () {\r
+ if (ptr > list.Count)\r
+ throw new InvalidOperationException ();\r
+ \r
+ return ++ ptr < list.Count;\r
+ }\r
+\r
+ public void Reset () {\r
+ ptr = -1;\r
+ }\r
+\r
+ private IList list;\r
+ private int ptr;\r
+ }\r
+\r
+ // private fields\r
+\r
+ private ArrayList intervals;\r
+ }\r
+}\r
--- /dev/null
+//\r
+// assembly: System\r
+// namespace: System.Text.RegularExpressions\r
+// file: match.cs\r
+//\r
+// author: Dan Lewis (dlewis@gmx.co.uk)\r
+// (c) 2002\r
+\r
+using System;\r
+\r
+namespace System.Text.RegularExpressions {\r
+\r
+ public class Capture {\r
+ public int Index {\r
+ get {\r
+ if (!IsDefined)\r
+ return 0; // capture not completed\r
+ else if (start <= end)\r
+ return start; // normal capture\r
+ else\r
+ return end; // reverse capture\r
+ }\r
+ }\r
+\r
+ public int Length {\r
+ get {\r
+ if (!IsDefined)\r
+ return 0;\r
+ else if (start <= end)\r
+ return end - start;\r
+ else\r
+ return start - end;\r
+ }\r
+ }\r
+\r
+ public string Value {\r
+ get { return IsDefined ? text.Substring (Index, Length) : ""; }\r
+ }\r
+\r
+ public override string ToString () {\r
+ return Value;\r
+ }\r
+\r
+ // internal members\r
+\r
+ internal Capture () { // empty capture\r
+ this.previous = null;\r
+ this.text = null;\r
+ this.checkpoint = 0;\r
+\r
+ this.start = -1;\r
+ this.end = -1;\r
+ }\r
+\r
+ internal Capture (Capture cap) { // copy constructor\r
+ this.previous = cap.previous;\r
+ this.text = cap.text;\r
+ this.checkpoint = cap.checkpoint;\r
+\r
+ this.start = cap.start;\r
+ this.end = cap.end;\r
+ }\r
+\r
+ internal Capture (string text) { // first capture\r
+ this.previous = null;\r
+ this.text = text;\r
+ this.checkpoint = 0;\r
+\r
+ this.start = -1;\r
+ this.end = -1;\r
+ }\r
+ \r
+ internal Capture (Capture previous, int checkpoint) {\r
+ this.previous = previous;\r
+ this.text = previous.text;\r
+ this.checkpoint = checkpoint;\r
+\r
+ this.start = -1;\r
+ this.end = -1;\r
+ }\r
+\r
+ internal Capture Previous {\r
+ get { return previous; }\r
+ }\r
+\r
+ internal string Text {\r
+ get { return text; }\r
+ }\r
+\r
+ internal int Checkpoint {\r
+ get { return checkpoint; }\r
+ }\r
+\r
+ internal bool IsDefined {\r
+ get { return start >= 0 && end >= 0; }\r
+ }\r
+\r
+ internal Capture GetLastDefined () {\r
+ Capture cap = this;\r
+ while (cap != null && !cap.IsDefined)\r
+ cap = cap.Previous;\r
+\r
+ return cap;\r
+ }\r
+\r
+ internal void Open (int ptr) {\r
+ this.start = ptr;\r
+ }\r
+\r
+ internal void Close (int ptr) {\r
+ this.end = ptr;\r
+ }\r
+\r
+ // private\r
+\r
+ private int start, end;\r
+ private string text;\r
+ private int checkpoint;\r
+ private Capture previous;\r
+ }\r
+\r
+ public class Group : Capture {\r
+ public static Group Synchronized (Group inner) {\r
+ return inner; // is this enough?\r
+ }\r
+\r
+ public CaptureCollection Captures {\r
+ get { return captures; }\r
+ }\r
+\r
+ public bool Success {\r
+ get { return GetLastDefined () != null; }\r
+ }\r
+\r
+ // internal\r
+\r
+ internal Group () : base () {\r
+ }\r
+ \r
+ internal Group (Capture last) : base (last) {\r
+ captures = new CaptureCollection (last);\r
+\r
+ // TODO make construction of captures lazy\r
+ }\r
+\r
+ private CaptureCollection captures;\r
+ }\r
+\r
+ public class Match : Group {\r
+ public static Match Empty {\r
+ get { return empty; }\r
+ }\r
+ \r
+ public static Match Synchronized (Match inner) {\r
+ return inner; // FIXME need to sync on machine access\r
+ }\r
+ \r
+ public GroupCollection Groups {\r
+ get { return groups; }\r
+ }\r
+\r
+ public Match NextMatch () {\r
+ if (this == Empty)\r
+ return Empty;\r
+\r
+ int scan_ptr = regex.RightToLeft ? Index : Index + Length;\r
+\r
+ // next match after an empty match: make sure scan ptr makes progress\r
+ \r
+ if (Length == 0)\r
+ scan_ptr += regex.RightToLeft ? -1 : +1;\r
+\r
+ return machine.Scan (regex, Text, scan_ptr, text_length);\r
+ }\r
+\r
+ public virtual string Result (string replacement) {\r
+ return ReplacementEvaluator.Evaluate (replacement, this);\r
+ }\r
+\r
+ // internal\r
+\r
+ internal Match () : base () {\r
+ this.regex = null;\r
+ this.machine = null;\r
+ this.text_length = 0;\r
+ this.groups = new GroupCollection ();\r
+\r
+ groups.Add (this);\r
+ }\r
+ \r
+ internal Match (Regex regex, IMachine machine, int text_length, Capture[] captures) : base (captures[0]) {\r
+ this.regex = regex;\r
+ this.machine = machine;\r
+ this.text_length = text_length;\r
+ this.groups = new GroupCollection ();\r
+\r
+ groups.Add (this);\r
+ for (int i = 1; i < captures.Length; ++ i)\r
+ groups.Add (new Group (captures[i]));\r
+ }\r
+\r
+ internal Regex Regex {\r
+ get { return regex; }\r
+ }\r
+\r
+ // private\r
+\r
+ private Regex regex;\r
+ private IMachine machine;\r
+ private int text_length;\r
+ private GroupCollection groups;\r
+\r
+ private static Match empty = new Match ();\r
+ }\r
+}\r
--- /dev/null
+TODO:
+
+* Need to go through everything and square it with RightToLeft matching.
+ The support for this was built into an early version, and lots of things built
+ afterwards are not savvy about bi-directional matching. Things that spring to
+ mind: Regex match methods should start at 0 or text.Length depending on
+ direction. Do split and replace need changes? Match should be aware of its
+ direction (already applied some of this to NextMatch logic). The interpreter
+ needs to check left and right bounds. Anchoring and substring discovery need
+ to be reworked. RTL matches are going to have anchors on the right - ie $, \Z
+ and \z. This should be added to the anchor logic. QuickSearch needs to work in
+ reverse. There may be other stuff.... work through the code.
+
+* Add ECMAScript support to the parser. For example, [.\w\s\d] map to ECMA
+ categories instead of canonical ones. There's different behaviour on
+ backreference/octal disambiguation. Find out what the runtime behavioural
+ difference is for cyclic backreferences eg (?(1)abc\1) - this is only briefly
+ mentioned in the spec. I couldn't find much on this in the ECMAScript
+ specification either.
+
+* Check the octal disambiguation for canonical syntax works as specced.
+
+* Add a check in QuickSearch for single character substrings. This is likely to
+ be a common case. There's no need to go through a shift table. Also, have a
+ look at just computing a relevant subset of the shift table and using an
+ (offset, size) pair to help test inclusion. Characters not in the table get
+ the default len + 1 shift.
+
+* Improve the perl test suite. Run under MS runtime to generate checksums for
+ each trial. Checksums should incorporate: all captures (index, length) for all
+ groups; names of explicit capturing groups, and the numbers they map to. Any
+ other state? RegexTrial.Execute() will then compare result and checksum.
--- /dev/null
+//\r
+// assembly: System\r
+// namespace: System.Text.RegularExpressions\r
+// file: parser.cs\r
+//\r
+// author: Dan Lewis (dlewis@gmx.co.uk)\r
+// (c) 2002\r
+\r
+using System;\r
+using System.Collections;\r
+using System.Globalization;\r
+\r
+namespace System.Text.RegularExpressions.Syntax {\r
+\r
+ class Parser {\r
+ public static int ParseDecimal (string str, ref int ptr) {\r
+ return ParseNumber (str, ref ptr, 10, 1, Int32.MaxValue);\r
+ }\r
+\r
+ public static int ParseOctal (string str, ref int ptr) {\r
+ return ParseNumber (str, ref ptr, 8, 1, 3);\r
+ }\r
+\r
+ public static int ParseHex (string str, ref int ptr, int digits) {\r
+ return ParseNumber (str, ref ptr, 16, digits, digits);\r
+ }\r
+\r
+ public static int ParseNumber (string str, ref int ptr, int b, int min, int max) {\r
+ int p = ptr, n = 0, digits = 0, d;\r
+ if (max < min)\r
+ max = Int32.MaxValue;\r
+\r
+ while (digits < max && p < str.Length) {\r
+ d = ParseDigit (str[p ++], b, digits);\r
+ if (d < 0) {\r
+ -- p;\r
+ break;\r
+ }\r
+\r
+ n = n * b + d;\r
+ ++ digits;\r
+ }\r
+\r
+ if (digits < min)\r
+ return -1;\r
+\r
+ ptr = p;\r
+ return n;\r
+ }\r
+\r
+ public static string ParseName (string str, ref int ptr) {\r
+ if (Char.IsDigit (str[ptr])) {\r
+ int gid = ParseNumber (str, ref ptr, 10, 1, 0);\r
+ if (gid > 0)\r
+ return gid.ToString ();\r
+ \r
+ return null;\r
+ }\r
+\r
+ int start = ptr;\r
+ for (;;) {\r
+ if (!IsNameChar (str[ptr]))\r
+ break;\r
+ ++ ptr;\r
+ }\r
+\r
+ if (ptr - start > 0)\r
+ return str.Substring (start, ptr - start);\r
+\r
+ return null;\r
+ }\r
+\r
+ public static string Escape (string str) {\r
+ string result = "";\r
+ for (int i = 0; i < str.Length; ++ i) {\r
+ char c = str[i];\r
+ switch (c) {\r
+ case '\\': case '*': case '+': case '?': case '|':\r
+ case '{': case '[': case '(': case ')': case '^':\r
+ case '$': case '.': case '#': case ' ':\r
+ result += "\\" + c;\r
+ break;\r
+\r
+ case '\t': result += "\\t"; break;\r
+ case '\n': result += "\\n"; break;\r
+ case '\r': result += "\\r"; break;\r
+ case '\f': result += "\\f"; break;\r
+\r
+ default: result += c; break;\r
+ }\r
+ }\r
+\r
+ return result;\r
+ }\r
+\r
+ public static string Unescape (string str) {\r
+ return new Parser ().ParseString (str);\r
+ }\r
+\r
+ // public instance\r
+\r
+ public Parser () {\r
+ this.caps = new ArrayList ();\r
+ this.refs = new Hashtable ();\r
+ }\r
+\r
+ public RegularExpression ParseRegularExpression (string pattern, RegexOptions options) {\r
+ this.pattern = pattern;\r
+ this.ptr = 0;\r
+\r
+ caps.Clear ();\r
+ refs.Clear ();\r
+ this.num_groups = 0;\r
+\r
+ try {\r
+ RegularExpression re = new RegularExpression ();\r
+ ParseGroup (re, options, null);\r
+ ResolveReferences ();\r
+\r
+ re.GroupCount = num_groups;\r
+ \r
+ return re;\r
+ }\r
+ catch (IndexOutOfRangeException) {\r
+ throw NewParseException ("Unexpected end of pattern.");\r
+ }\r
+ }\r
+\r
+ public IDictionary GetMapping () {\r
+ Hashtable mapping = new Hashtable ();\r
+ foreach (CapturingGroup group in caps) {\r
+ if (group.Name != null)\r
+ mapping.Add (group.Name, group.Number);\r
+ }\r
+\r
+ return mapping;\r
+ }\r
+\r
+ // private methods\r
+\r
+ private void ParseGroup (Group group, RegexOptions options, Assertion assertion) {\r
+ bool is_top_level = group is RegularExpression;\r
+ \r
+ Alternation alternation = null;\r
+ string literal = null;\r
+\r
+ Group current = new Group ();\r
+ Expression expr = null;\r
+ bool closed = false;\r
+\r
+ while (true) {\r
+ ConsumeWhitespace (IsIgnorePatternWhitespace (options));\r
+ if (ptr >= pattern.Length)\r
+ break;\r
+ \r
+ // (1) Parse for Expressions\r
+ \r
+ char ch = pattern[ptr ++];\r
+ \r
+ switch (ch) {\r
+ case '^': {\r
+ Position pos =\r
+ IsMultiline (options) ? Position.StartOfLine : Position.Start;\r
+ expr = new PositionAssertion (pos);\r
+ break;\r
+ }\r
+\r
+ case '$': {\r
+ Position pos =\r
+ IsMultiline (options) ? Position.EndOfLine : Position.End;\r
+ expr = new PositionAssertion (pos);\r
+ break;\r
+ }\r
+\r
+ case '.': {\r
+ Category cat =\r
+ IsSingleline (options) ? Category.AnySingleline : Category.Any;\r
+ expr = new CharacterClass (cat, false);\r
+ break;\r
+ }\r
+\r
+ case '\\': {\r
+ int c = ParseEscape ();\r
+ if (c >= 0)\r
+ ch = (char)c;\r
+ else {\r
+ expr = ParseSpecial (options);\r
+\r
+ if (expr == null)\r
+ ch = pattern[ptr ++]; // default escape\r
+ }\r
+ break;\r
+ }\r
+\r
+ case '[': {\r
+ expr = ParseCharacterClass (options);\r
+ break;\r
+ }\r
+\r
+ case '(': {\r
+ bool ignore = IsIgnoreCase (options);\r
+ expr = ParseGroupingConstruct (ref options);\r
+ if (expr == null) {\r
+ if (literal != null && IsIgnoreCase (options) != ignore) {\r
+ current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));\r
+ literal = null;\r
+ }\r
+\r
+ continue;\r
+ }\r
+ break;\r
+ }\r
+\r
+ case ')': {\r
+ closed = true;\r
+ goto EndOfGroup;\r
+ }\r
+\r
+ case '|': {\r
+ if (literal != null) {\r
+ current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));\r
+ literal = null;\r
+ }\r
+\r
+ if (assertion != null) {\r
+ if (assertion.TrueExpression == null)\r
+ assertion.TrueExpression = current;\r
+ else if (assertion.FalseExpression == null)\r
+ assertion.FalseExpression = current;\r
+ else\r
+ throw NewParseException ("Too many | in (?()|).");\r
+ }\r
+ else {\r
+ if (alternation == null)\r
+ alternation = new Alternation ();\r
+\r
+ alternation.AddAlternative (current);\r
+ }\r
+\r
+ current = new Group ();\r
+ continue;\r
+ }\r
+\r
+ case '*': case '+': case '?': case '{': {\r
+ throw NewParseException ("Bad quantifier.");\r
+ }\r
+\r
+ default: \r
+ break; // literal character\r
+ }\r
+\r
+ ConsumeWhitespace (IsIgnorePatternWhitespace (options));\r
+ \r
+ // (2) Check for Repetitions\r
+ \r
+ if (ptr < pattern.Length) {\r
+ char k = pattern[ptr];\r
+\r
+ if (k == '?' || k == '*' || k == '+' || k == '{') {\r
+ ++ ptr;\r
+\r
+ int min = 0, max = 0;\r
+ bool lazy = false;\r
+\r
+ switch (k) {\r
+ case '?': min = 0; max = 1; break;\r
+ case '*': min = 0; max = 0xffff; break;\r
+ case '+': min = 1; max = 0xffff; break;\r
+ case '{': ParseRepetitionBounds (out min, out max, options); break;\r
+ }\r
+\r
+ ConsumeWhitespace (IsIgnorePatternWhitespace (options));\r
+ if (ptr < pattern.Length && pattern[ptr] == '?') {\r
+ ++ ptr;\r
+ lazy = true;\r
+ }\r
+\r
+ Repetition repetition = new Repetition (min, max, lazy);\r
+\r
+ if (expr == null)\r
+ repetition.Expression = new Literal (ch.ToString (), IsIgnoreCase (options));\r
+ else\r
+ repetition.Expression = expr;\r
+\r
+ expr = repetition;\r
+ }\r
+ }\r
+\r
+ // (3) Append Expression and/or Literal\r
+\r
+ if (expr == null) {\r
+ if (literal == null)\r
+ literal = "";\r
+ literal += ch;\r
+ }\r
+ else {\r
+ if (literal != null) {\r
+ current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));\r
+ literal = null;\r
+ }\r
+\r
+ current.AppendExpression (expr);\r
+ expr = null;\r
+ }\r
+\r
+ if (is_top_level && ptr >= pattern.Length)\r
+ goto EndOfGroup;\r
+ }\r
+\r
+ EndOfGroup:\r
+ if (is_top_level && closed)\r
+ throw NewParseException ("Too many )'s.");\r
+ if (!is_top_level && !closed)\r
+ throw NewParseException ("Not enough )'s.");\r
+ \r
+ \r
+ // clean up literals and alternations\r
+\r
+ if (literal != null)\r
+ current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));\r
+\r
+ if (assertion != null) {\r
+ if (assertion.TrueExpression == null)\r
+ assertion.TrueExpression = current;\r
+ else\r
+ assertion.FalseExpression = current;\r
+ \r
+ group.AppendExpression (assertion);\r
+ }\r
+ else if (alternation != null) {\r
+ alternation.AddAlternative (current);\r
+ group.AppendExpression (alternation);\r
+ }\r
+ else\r
+ group.AppendExpression (current);\r
+ }\r
+\r
+ private Expression ParseGroupingConstruct (ref RegexOptions options) {\r
+ if (pattern[ptr] != '?') {\r
+ Group group;\r
+\r
+ if (IsExplicitCapture (options))\r
+ group = new Group ();\r
+ else {\r
+ group = new CapturingGroup ();\r
+ caps.Add (group);\r
+ }\r
+\r
+ ParseGroup (group, options, null);\r
+ return group;\r
+ }\r
+ else\r
+ ++ ptr;\r
+\r
+ switch (pattern[ptr]) {\r
+ case ':': { // non-capturing group\r
+ ++ ptr;\r
+ Group group = new Group ();\r
+ ParseGroup (group, options, null);\r
+\r
+ return group;\r
+ }\r
+\r
+ case '>': { // non-backtracking group\r
+ ++ ptr;\r
+ Group group = new NonBacktrackingGroup ();\r
+ ParseGroup (group, options, null);\r
+ \r
+ return group;\r
+ }\r
+\r
+ case 'i': case 'm': case 'n':\r
+ case 's': case 'x': case '-': { // options\r
+ RegexOptions o = options;\r
+ ParseOptions (ref o, false);\r
+ if (pattern[ptr] == '-') {\r
+ ++ ptr;\r
+ ParseOptions (ref o, true);\r
+ }\r
+\r
+ if (pattern[ptr] == ':') { // pass options to child group\r
+ ++ ptr;\r
+ Group group = new Group ();\r
+ ParseGroup (group, o, null);\r
+ return group;\r
+ }\r
+ else if (pattern[ptr] == ')') { // change options of enclosing group\r
+ ++ ptr;\r
+ options = o;\r
+ return null;\r
+ }\r
+ else\r
+ throw NewParseException ("Bad options");\r
+ }\r
+\r
+ case '<': case '=': case '!': { // lookahead/lookbehind\r
+ ExpressionAssertion asn = new ExpressionAssertion ();\r
+ if (!ParseAssertionType (asn))\r
+ goto case '\''; // it's a (?<name> ) construct\r
+\r
+ Group test = new Group ();\r
+ ParseGroup (test, options, null);\r
+\r
+ asn.TestExpression = test;\r
+ return asn;\r
+ }\r
+\r
+ case '\'': { // named/balancing group\r
+ char delim;\r
+ if (pattern[ptr] == '<')\r
+ delim = '>';\r
+ else\r
+ delim = '\'';\r
+\r
+ ++ ptr;\r
+ string name = ParseName ();\r
+\r
+ if (pattern[ptr] == delim) {\r
+ // capturing group\r
+\r
+ if (name == null)\r
+ throw NewParseException ("Bad group name.");\r
+\r
+ ++ ptr;\r
+ CapturingGroup cap = new CapturingGroup ();\r
+ cap.Name = name;\r
+ caps.Add (cap);\r
+ ParseGroup (cap, options, null);\r
+\r
+ return cap;\r
+ }\r
+ else if (pattern[ptr] == '-') {\r
+ // balancing group\r
+\r
+ ++ ptr;\r
+ string balance_name = ParseName ();\r
+ if (balance_name == null || pattern[ptr] != delim)\r
+ throw NewParseException ("Bad balancing group name.");\r
+\r
+ ++ ptr;\r
+ BalancingGroup bal = new BalancingGroup ();\r
+ bal.Name = name;\r
+ caps.Add (bal);\r
+ refs.Add (bal, balance_name);\r
+\r
+ return bal;\r
+ }\r
+ else\r
+ throw NewParseException ("Bad group name.");\r
+ }\r
+\r
+ case '(': { // expression/capture test\r
+ Assertion asn;\r
+ \r
+ ++ ptr;\r
+ int p = ptr;\r
+ string name = ParseName ();\r
+ if (name == null || pattern[ptr] != ')') { // expression test\r
+ // FIXME MS implementation doesn't seem to\r
+ // implement this version of (?(x) ...)\r
+\r
+ ptr = p;\r
+ ExpressionAssertion expr_asn = new ExpressionAssertion ();\r
+\r
+ if (pattern[ptr] == '?') {\r
+ ++ ptr;\r
+ if (!ParseAssertionType (expr_asn))\r
+ throw NewParseException ("Bad conditional.");\r
+ }\r
+ else {\r
+ expr_asn.Negate = false;\r
+ expr_asn.Reverse = false;\r
+ }\r
+\r
+ Group test = new Group ();\r
+ ParseGroup (test, options, null);\r
+ expr_asn.TestExpression = test;\r
+ asn = expr_asn;\r
+ }\r
+ else { // capture test\r
+ ++ ptr;\r
+ asn = new CaptureAssertion ();\r
+ refs.Add (asn, name);\r
+ }\r
+\r
+ Group group = new Group ();\r
+ ParseGroup (group, options, asn);\r
+ return group;\r
+ }\r
+\r
+ case '#': { // comment\r
+ ++ ptr;\r
+ while (pattern[ptr ++] != ')') {\r
+ if (ptr >= pattern.Length)\r
+ throw NewParseException ("Unterminated (?#...) comment.");\r
+ }\r
+ return null;\r
+ }\r
+\r
+ default: // error\r
+ throw NewParseException ("Bad grouping construct.");\r
+ }\r
+ }\r
+\r
+ private bool ParseAssertionType (ExpressionAssertion assertion) {\r
+ if (pattern[ptr] == '<') {\r
+ switch (pattern[ptr + 1]) {\r
+ case '=':\r
+ assertion.Negate = false;\r
+ break;\r
+ case '!':\r
+ assertion.Negate = true;\r
+ break;\r
+ default:\r
+ return false;\r
+ }\r
+\r
+ assertion.Reverse = true;\r
+ ptr += 2;\r
+ }\r
+ else {\r
+ switch (pattern[ptr]) {\r
+ case '=':\r
+ assertion.Negate = false;\r
+ break;\r
+ case '!':\r
+ assertion.Negate = true;\r
+ break;\r
+ default:\r
+ return false;\r
+ }\r
+\r
+ assertion.Reverse = false;\r
+ ptr += 1;\r
+ }\r
+\r
+ return true;\r
+ }\r
+\r
+ private void ParseOptions (ref RegexOptions options, bool negate) {\r
+ for (;;) {\r
+ switch (pattern[ptr]) {\r
+ case 'i':\r
+ if (negate)\r
+ options &= ~RegexOptions.IgnoreCase;\r
+ else\r
+ options |= RegexOptions.IgnoreCase;\r
+ break;\r
+\r
+ case 'm':\r
+ if (negate)\r
+ options &= ~RegexOptions.Multiline;\r
+ else\r
+ options |= RegexOptions.Multiline;\r
+ break;\r
+ \r
+ case 'n':\r
+ if (negate)\r
+ options &= ~RegexOptions.ExplicitCapture;\r
+ else\r
+ options |= RegexOptions.ExplicitCapture;\r
+ break;\r
+ \r
+ case 's':\r
+ if (negate)\r
+ options &= ~RegexOptions.Singleline;\r
+ else\r
+ options |= RegexOptions.Singleline;\r
+ break;\r
+ \r
+ case 'x':\r
+ if (negate)\r
+ options &= ~RegexOptions.IgnorePatternWhitespace;\r
+ else\r
+ options |= RegexOptions.IgnorePatternWhitespace;\r
+ break;\r
+\r
+ default:\r
+ return;\r
+ }\r
+\r
+ ++ ptr;\r
+ }\r
+ }\r
+\r
+ private Expression ParseCharacterClass (RegexOptions options) {\r
+ bool negate;\r
+ if (pattern[ptr] == '^') {\r
+ negate = true;\r
+ ++ ptr;\r
+ }\r
+ else\r
+ negate = false;\r
+ \r
+ CharacterClass cls = new CharacterClass (negate, IsIgnoreCase (options));\r
+\r
+ if (pattern[ptr] == ']') {\r
+ cls.AddCharacter (']');\r
+ ++ ptr;\r
+ }\r
+\r
+ int c = -1;\r
+ int last = -1;\r
+ bool range = false;\r
+ bool closed = false;\r
+ while (ptr < pattern.Length) {\r
+ c = pattern[ptr ++];\r
+\r
+ if (c == ']') {\r
+ closed = true;\r
+ break;\r
+ }\r
+ \r
+ if (c == '-') {\r
+ range = true;\r
+ continue;\r
+ }\r
+\r
+ if (c == '\\') {\r
+ c = ParseEscape ();\r
+ if (c < 0) {\r
+ // didn't recognize escape\r
+\r
+ c = pattern[ptr ++];\r
+ switch (c) {\r
+ case 'b': c = '\b'; break;\r
+\r
+ case 'd': cls.AddCategory (Category.Digit, false); last = -1; continue;\r
+ case 'w': cls.AddCategory (Category.Word, false); last = -1; continue;\r
+ case 's': cls.AddCategory (Category.WhiteSpace, false); last = -1; continue;\r
+ case 'p': cls.AddCategory (ParseUnicodeCategory (), true); last = -1; continue;\r
+ case 'D': cls.AddCategory (Category.Digit, true); last = -1; continue;\r
+ case 'W': cls.AddCategory (Category.Word, true); last = -1; continue;\r
+ case 'S': cls.AddCategory (Category.WhiteSpace, true); last = -1; continue;\r
+ case 'P': cls.AddCategory (ParseUnicodeCategory (), true); last = -1; continue;\r
+\r
+ default: break; // add escaped character\r
+ }\r
+ }\r
+ }\r
+\r
+ if (range) {\r
+ if (c < last)\r
+ throw NewParseException ("[x-y] range in reverse order.");\r
+\r
+ if (last >=0 )\r
+ cls.AddRange ((char)last, (char)c);\r
+ else {\r
+ cls.AddCharacter ((char)c);\r
+ cls.AddCharacter ('-');\r
+ }\r
+\r
+ range = false;\r
+ last = -1;\r
+ }\r
+ else {\r
+ cls.AddCharacter ((char)c);\r
+ last = c;\r
+ }\r
+ }\r
+\r
+ if (!closed)\r
+ throw NewParseException ("Unterminated [] set.");\r
+\r
+ if (range)\r
+ cls.AddCharacter ('-');\r
+\r
+ return cls;\r
+ }\r
+\r
+ private void ParseRepetitionBounds (out int min, out int max, RegexOptions options) {\r
+ int n, m;\r
+\r
+ /* check syntax */\r
+\r
+ ConsumeWhitespace (IsIgnorePatternWhitespace (options));\r
+ n = ParseNumber (10, 1, 0);\r
+ if (n < 0)\r
+ throw NewParseException ("Illegal {x,y} - bad value of x.");\r
+\r
+ ConsumeWhitespace (IsIgnorePatternWhitespace (options));\r
+ switch (pattern[ptr ++]) {\r
+ case '}':\r
+ m = n;\r
+ break;\r
+ case ',':\r
+ ConsumeWhitespace (IsIgnorePatternWhitespace (options));\r
+ m = ParseNumber (10, 1, 0);\r
+ ConsumeWhitespace (IsIgnorePatternWhitespace (options));\r
+ if (pattern[ptr ++] != '}')\r
+ throw NewParseException ("Illegal {x,y} - bad value of y.");\r
+ break;\r
+ default:\r
+ throw NewParseException ("Illegal {x,y}");\r
+ }\r
+\r
+ /* check bounds and ordering */\r
+\r
+ if (n >= 0xffff || m >= 0xffff)\r
+ throw NewParseException ("Illegal {x, y} - maximum of 65535.");\r
+ if (m >= 0 && m < n)\r
+ throw NewParseException ("Illegal {x, y} with x > y.");\r
+\r
+ /* assign min and max */\r
+ \r
+ min = n;\r
+ if (m > 0)\r
+ max = m;\r
+ else\r
+ max = 0xffff;\r
+ }\r
+\r
+ private Category ParseUnicodeCategory () {\r
+ if (pattern[ptr ++] != '{')\r
+ throw NewParseException ("Incomplete \\p{X} character escape.");\r
+\r
+ string name = ParseName (pattern, ref ptr);\r
+ if (name == null)\r
+ throw NewParseException ("Incomplete \\p{X} character escape.");\r
+\r
+ Category cat = CategoryUtils.CategoryFromName (name);\r
+ if (cat == Category.None)\r
+ throw NewParseException ("Unknown property '" + name + "'.");\r
+\r
+ if (pattern[ptr ++] != '}')\r
+ throw NewParseException ("Incomplete \\p{X} character escape.");\r
+\r
+ return cat;\r
+ }\r
+\r
+ private Expression ParseSpecial (RegexOptions options) {\r
+ int p = ptr;\r
+ Expression expr = null;\r
+ \r
+ switch (pattern[ptr ++]) {\r
+\r
+ // categories\r
+\r
+ case 'd': expr = new CharacterClass (Category.Digit, false); break;\r
+ case 'w': expr = new CharacterClass (Category.Word, false); break;\r
+ case 's': expr = new CharacterClass (Category.WhiteSpace, false); break;\r
+ case 'D': expr = new CharacterClass (Category.Digit, true); break;\r
+ case 'W': expr = new CharacterClass (Category.Word, true); break;\r
+ case 'S': expr = new CharacterClass (Category.WhiteSpace, true); break;\r
+ case 'p': expr = new CharacterClass (ParseUnicodeCategory (), true); break;\r
+ case 'P': expr = new CharacterClass (ParseUnicodeCategory (), false); break;\r
+\r
+ // positions\r
+\r
+ case 'A': expr = new PositionAssertion (Position.StartOfString); break;\r
+ case 'Z': expr = new PositionAssertion (Position.End); break;\r
+ case 'z': expr = new PositionAssertion (Position.EndOfString); break;\r
+ case 'G': expr = new PositionAssertion (Position.StartOfScan); break;\r
+ case 'b': expr = new PositionAssertion (Position.Boundary); break;\r
+ case 'B': expr = new PositionAssertion (Position.NonBoundary); break;\r
+ \r
+ // references\r
+\r
+ case '1': case '2': case '3': case '4': case '5':\r
+ case '6': case '7': case '8': case '9': {\r
+ ptr --;\r
+ int n = ParseNumber (10, 1, 0);\r
+ if (n < 0) {\r
+ ptr = p;\r
+ return null;\r
+ }\r
+\r
+ // FIXME test if number is within number of assigned groups\r
+ // this may present a problem for right-to-left matching\r
+\r
+ Reference reference = new Reference (IsIgnoreCase (options));\r
+ refs.Add (reference, n.ToString ());\r
+ expr = reference;\r
+ break;\r
+ }\r
+\r
+ case 'k': {\r
+ char delim = pattern[ptr ++];\r
+ if (delim == '<')\r
+ delim = '>';\r
+ else if (delim != '\'')\r
+ throw NewParseException ("Malformed \\k<...> named backreference.");\r
+\r
+ string name = ParseName ();\r
+ if (name == null || pattern[ptr] != delim)\r
+ throw NewParseException ("Malformed \\k<...> named backreference.");\r
+\r
+ ++ ptr;\r
+ Reference reference = new Reference (IsIgnoreCase (options));\r
+ refs.Add (reference, name);\r
+ expr = reference;\r
+ break;\r
+ }\r
+\r
+ default:\r
+ expr = null;\r
+ break;\r
+ }\r
+\r
+ if (expr == null)\r
+ ptr = p;\r
+\r
+ return expr;\r
+ }\r
+\r
+ private int ParseEscape () {\r
+ int p = ptr;\r
+ int c;\r
+\r
+ switch (pattern[ptr ++]) {\r
+ \r
+ // standard escapes (except \b)\r
+\r
+ case 'a': return '\u0007';\r
+ case 't': return '\u0009';\r
+ case 'r': return '\u000d';\r
+ case 'v': return '\u000b';\r
+ case 'f': return '\u000c';\r
+ case 'n': return '\u000a';\r
+ case 'e': return '\u001b';\r
+ case '\\': return '\\';\r
+\r
+ // character codes\r
+\r
+ case '0': return ParseOctal (pattern, ref ptr);\r
+\r
+ case 'x':\r
+ c = ParseHex (pattern, ref ptr, 2);\r
+ if (c < 0)\r
+ throw NewParseException ("Insufficient hex digits");\r
+\r
+ return c;\r
+\r
+ case 'u':\r
+ c = ParseHex (pattern, ref ptr, 4);\r
+ if (c < 0)\r
+ throw NewParseException ("Insufficient hex digits");\r
+ \r
+ return c;\r
+\r
+ // control characters\r
+\r
+ case 'c':\r
+ c = pattern[p ++];\r
+ if (c >= 'A' && c <= 'Z')\r
+ return c - 'A';\r
+ else if (c >= '@' && c <= '_')\r
+ return c - '@';\r
+ else\r
+ throw NewParseException ("Unrecognized control character.");\r
+\r
+ // unknown escape\r
+\r
+ default:\r
+ ptr = p;\r
+ return -1;\r
+ }\r
+ }\r
+\r
+ private string ParseName () {\r
+ return Parser.ParseName (pattern, ref ptr);\r
+ }\r
+\r
+ private static bool IsNameChar (char c) {\r
+ UnicodeCategory cat = Char.GetUnicodeCategory (c);\r
+ if (cat == UnicodeCategory.ModifierLetter)\r
+ return false;\r
+ if (cat == UnicodeCategory.ConnectorPunctuation)\r
+ return true;\r
+ return Char.IsLetterOrDigit (c);\r
+ }\r
+ \r
+ private int ParseNumber (int b, int min, int max) {\r
+ return Parser.ParseNumber (pattern, ref ptr, b, min, max);\r
+ }\r
+\r
+ private int ParseDecimal () {\r
+ return Parser.ParseDecimal (pattern, ref ptr);\r
+ }\r
+\r
+ private static int ParseDigit (char c, int b, int n) {\r
+ switch (b) {\r
+ case 8:\r
+ if (c >= '0' && c <= '7')\r
+ return c - '0';\r
+ else\r
+ return -1;\r
+ case 10:\r
+ if (c >= '0' && c <= '9')\r
+ return c - '0';\r
+ else\r
+ return -1;\r
+ case 16:\r
+ if (c >= '0' && c <= '9')\r
+ return c - '0';\r
+ else if (c >= 'a' && c <= 'f')\r
+ return 10 + c - 'a';\r
+ else if (c >= 'A' && c <= 'F')\r
+ return 10 + c - 'A';\r
+ else\r
+ return -1;\r
+ default:\r
+ return -1;\r
+ }\r
+ }\r
+\r
+ private void ConsumeWhitespace (bool ignore) {\r
+ while (true) {\r
+ if (ptr >= pattern.Length)\r
+ break;\r
+ \r
+ if (pattern[ptr] == '(') {\r
+ if (ptr + 3 >= pattern.Length)\r
+ return;\r
+\r
+ if (pattern[ptr + 1] != '?' || pattern[ptr + 2] != '#')\r
+ return;\r
+\r
+ ptr += 3;\r
+ while (pattern[ptr ++] != ')')\r
+ /* ignore */ ;\r
+ }\r
+ else if (ignore && pattern[ptr] == '#') {\r
+ while (ptr < pattern.Length && pattern[ptr ++] != '\n')\r
+ /* ignore */ ;\r
+ }\r
+ else if (ignore && Char.IsWhiteSpace (pattern[ptr])) {\r
+ while (ptr < pattern.Length && Char.IsWhiteSpace (pattern[ptr]))\r
+ ++ ptr;\r
+ }\r
+ else\r
+ return;\r
+ }\r
+ }\r
+\r
+ private string ParseString (string pattern) {\r
+ this.pattern = pattern;\r
+ this.ptr = 0;\r
+\r
+ string result = "";\r
+ while (ptr < pattern.Length) {\r
+ int c = pattern[ptr ++];\r
+ if (c == '\\')\r
+ c = ParseEscape ();\r
+ \r
+ result += (char)c;\r
+ }\r
+\r
+ return result;\r
+ }\r
+\r
+ private void ResolveReferences () {\r
+ int gid = 1;\r
+ Hashtable dict = new Hashtable ();\r
+\r
+ // number unnamed groups\r
+\r
+ foreach (CapturingGroup group in caps) {\r
+ if (group.Name == null) {\r
+ dict.Add (gid.ToString (), group);\r
+ group.Number = gid ++;\r
+\r
+ ++ num_groups;\r
+ }\r
+ }\r
+\r
+ // number named groups\r
+\r
+ foreach (CapturingGroup group in caps) {\r
+ if (group.Name != null) {\r
+ if (!dict.Contains (group.Name)) {\r
+ dict.Add (group.Name, group);\r
+ group.Number = gid ++;\r
+\r
+ ++ num_groups;\r
+ }\r
+ else {\r
+ CapturingGroup prev = (CapturingGroup)dict[group.Name];\r
+ group.Number = prev.Number;\r
+ }\r
+ }\r
+ }\r
+\r
+ // resolve references\r
+\r
+ foreach (Expression expr in refs.Keys) {\r
+ string name = (string)refs[expr];\r
+ if (!dict.Contains (name)) {\r
+ throw NewParseException ("Reference to undefined group " +\r
+ (Char.IsDigit (name[0]) ? "number " : "name ") +\r
+ name);\r
+ }\r
+\r
+ CapturingGroup group = (CapturingGroup)dict[name];\r
+ if (expr is Reference)\r
+ ((Reference)expr).CapturingGroup = group;\r
+ else if (expr is CaptureAssertion)\r
+ ((CaptureAssertion)expr).CapturingGroup = group;\r
+ else if (expr is BalancingGroup)\r
+ ((BalancingGroup)expr).Balance = group;\r
+ }\r
+ }\r
+\r
+ // flag helper functions\r
+\r
+ private static bool IsIgnoreCase (RegexOptions options) {\r
+ return (options & RegexOptions.IgnoreCase) != 0;\r
+ }\r
+\r
+ private static bool IsMultiline (RegexOptions options) {\r
+ return (options & RegexOptions.Multiline) != 0;\r
+ }\r
+\r
+ private static bool IsExplicitCapture (RegexOptions options) {\r
+ return (options & RegexOptions.ExplicitCapture) != 0;\r
+ }\r
+ \r
+ private static bool IsSingleline (RegexOptions options) {\r
+ return (options & RegexOptions.Singleline) != 0;\r
+ }\r
+\r
+ private static bool IsIgnorePatternWhitespace (RegexOptions options) {\r
+ return (options & RegexOptions.IgnorePatternWhitespace) != 0;\r
+ }\r
+\r
+ private static bool IsRightToLeft (RegexOptions options) {\r
+ return (options & RegexOptions.RightToLeft) != 0;\r
+ }\r
+\r
+ // exception creation\r
+\r
+ private ArgumentException NewParseException (string msg) {\r
+ msg = "parsing \"" + pattern + "\" - " + msg;\r
+ return new ArgumentException (msg, pattern);\r
+ }\r
+\r
+ private string pattern;\r
+ private int ptr;\r
+\r
+ private ArrayList caps;\r
+ private Hashtable refs;\r
+ private int num_groups;\r
+ }\r
+}\r
--- /dev/null
+//\r
+// assembly: System\r
+// namespace: System.Text.RegularExpressions\r
+// file: quicksearch.cs\r
+//\r
+// author: Dan Lewis (dlewis@gmx.co.uk)\r
+// (c) 2002\r
+\r
+using System;\r
+\r
+namespace System.Text.RegularExpressions {\r
+\r
+ // TODO use simple test for single character strings\r
+\r
+ class QuickSearch {\r
+ // simplified boyer-moore for fast substring matching\r
+ \r
+ public QuickSearch (string str, bool ignore) {\r
+ this.str = str;\r
+ this.len = str.Length;\r
+ this.ignore = ignore;\r
+ \r
+ Setup ();\r
+ }\r
+ \r
+ public string String {\r
+ get { return str; }\r
+ }\r
+\r
+ public int Length {\r
+ get { return len; }\r
+ }\r
+\r
+ public bool IgnoreCase {\r
+ get { return ignore; }\r
+ }\r
+\r
+ public int Search (string text, int start, int end) {\r
+ if (end > text.Length - len)\r
+ end = text.Length - len;\r
+ \r
+ int ptr = start;\r
+ if (!ignore) {\r
+ while (ptr <= end) {\r
+ int i = len - 1;\r
+ while (str[i] == text[ptr + i]) {\r
+ if (-- i < 0)\r
+ return ptr;\r
+ }\r
+\r
+ if (ptr < end)\r
+ ptr += shift[text[ptr + len]];\r
+ else\r
+ break;\r
+ }\r
+ }\r
+ else {\r
+ // ignore case: same as above, but we convert text\r
+ // to lower case before doing the string compare\r
+ \r
+ while (ptr <= end) {\r
+ int i = len - 1;\r
+ while (str[i] == Char.ToLower (text[ptr + i])) {\r
+ if (-- i < 0)\r
+ return ptr;\r
+ }\r
+\r
+ if (ptr < end)\r
+ ptr += shift[text[ptr + len]];\r
+ else\r
+ break;\r
+ }\r
+ }\r
+\r
+ return -1;\r
+ }\r
+\r
+ // private\r
+\r
+ private void Setup () {\r
+ if (ignore)\r
+ str = str.ToLower ();\r
+\r
+ // this is a 64k entry shift table. that's 128kb per pattern!\r
+ // is it worth compressing this by only storing shifts within\r
+ // a (lo, hi) character range? for most substrings this would\r
+ // be around 50 bytes...\r
+\r
+ shift = new int[0x1000];\r
+ for (int i = 0; i < 0x1000; ++ i)\r
+ shift[i] = len + 1;\r
+\r
+ for (int i = 0; i < len; ++ i) {\r
+ char c = str[i];\r
+\r
+ shift[c] = len - i;\r
+ if (ignore)\r
+ shift[Char.ToUpper (c)] = len - i;\r
+ }\r
+ }\r
+\r
+ private string str;\r
+ private int len;\r
+ private bool ignore;\r
+\r
+ private int[] shift;\r
+ }\r
+}\r
--- /dev/null
+//\r
+// assembly: System\r
+// namespace: System.Text.RegularExpressions\r
+// file: regex.cs\r
+//\r
+// author: Dan Lewis (dlewis@gmx.co.uk)\r
+// (c) 2002\r
+\r
+using System;\r
+using System.Text;\r
+using System.Collections;\r
+using System.Reflection;\r
+using System.Reflection.Emit;\r
+using System.Runtime.Serialization;\r
+\r
+using RegularExpression = System.Text.RegularExpressions.Syntax.RegularExpression;\r
+using Parser = System.Text.RegularExpressions.Syntax.Parser;\r
+\r
+namespace System.Text.RegularExpressions {\r
+ \r
+ public delegate string MatchEvaluator (Match match);\r
+\r
+ [Flags]\r
+ public enum RegexOptions {\r
+ None = 0x000,\r
+ IgnoreCase = 0x001,\r
+ Multiline = 0x002,\r
+ ExplicitCapture = 0x004,\r
+ Compiled = 0x008,\r
+ Singleline = 0x010,\r
+ IgnorePatternWhitespace = 0x020,\r
+ RightToLeft = 0x040,\r
+ ECMAScript = 0x100\r
+ }\r
+ \r
+ public class Regex : ISerializable {\r
+ public static void CompileToAssembly\r
+ (RegexCompilationInfo[] regexes, AssemblyName aname)\r
+ {\r
+ throw new Exception ("Not implemented.");\r
+ }\r
+\r
+ public static void CompileToAssembly\r
+ (RegexCompilationInfo[] regexes, AssemblyName aname,\r
+ CustomAttributeBuilder[] attribs)\r
+ {\r
+ throw new Exception ("Not implemented.");\r
+ }\r
+\r
+ public static void CompileToAssembly\r
+ (RegexCompilationInfo[] regexes, AssemblyName aname,\r
+ CustomAttributeBuilder[] attribs, string resourceFile)\r
+ {\r
+ throw new Exception ("Not implemented.");\r
+ }\r
+ \r
+ public static string Escape (string str) {\r
+ return Parser.Escape (str);\r
+ }\r
+\r
+ public static string Unescape (string str) {\r
+ return Parser.Unescape (str);\r
+ }\r
+\r
+ public static bool IsMatch (string input, string pattern) {\r
+ Regex re = new Regex (pattern, RegexOptions.None);\r
+ return re.IsMatch (input);\r
+ }\r
+\r
+ public static Match Match (string input, string pattern) {\r
+ return Regex.Match (input, pattern, RegexOptions.None);\r
+ }\r
+\r
+ public static Match Match (string input, string pattern, RegexOptions options) {\r
+ Regex re = new Regex (pattern, options);\r
+ return re.Match (input);\r
+ }\r
+\r
+ public static string Replace\r
+ (string input, string pattern, MatchEvaluator evaluator)\r
+ {\r
+ return Regex.Replace (input, pattern, evaluator, RegexOptions.None);\r
+ }\r
+\r
+ public static string Replace\r
+ (string input, string pattern, MatchEvaluator evaluator,\r
+ RegexOptions options)\r
+ {\r
+ Regex re = new Regex (pattern, options);\r
+ return re.Replace (input, evaluator);\r
+ }\r
+\r
+ public static string Replace\r
+ (string input, string pattern, string replacement)\r
+ {\r
+ return Regex.Replace (input, pattern, replacement, RegexOptions.None);\r
+ }\r
+\r
+ public static string Replace\r
+ (string input, string pattern, string replacement,\r
+ RegexOptions options)\r
+ {\r
+ Regex re = new Regex (pattern, options);\r
+ return re.Replace (input, pattern);\r
+ }\r
+\r
+ public static string[] Split (string input, string pattern) {\r
+ return Regex.Split (input, pattern, RegexOptions.None);\r
+ }\r
+\r
+ public static string[] Split (string input, string pattern, RegexOptions options) {\r
+ Regex re = new Regex (input, options);\r
+ return re.Split (input);\r
+ }\r
+\r
+ // private\r
+\r
+ private static FactoryCache cache = new FactoryCache (200); // TODO put some meaningful number here\r
+\r
+ // constructors\r
+\r
+ protected Regex () {\r
+ // XXX what's this constructor for?\r
+ }\r
+\r
+ public Regex (string pattern) : this (pattern, RegexOptions.None) {\r
+ }\r
+\r
+ public Regex (string pattern, RegexOptions options) {\r
+ this.pattern = pattern;\r
+ this.options = options;\r
+ \r
+ this.factory = cache.Lookup (pattern, options);\r
+\r
+ if (this.factory == null) {\r
+ // parse and install group mapping\r
+\r
+ Parser psr = new Parser ();\r
+ RegularExpression re = psr.ParseRegularExpression (pattern, options);\r
+ this.group_count = re.GroupCount;\r
+ this.mapping = psr.GetMapping ();\r
+\r
+ // compile\r
+ \r
+ ICompiler cmp;\r
+ if ((options & RegexOptions.Compiled) != 0)\r
+ throw new Exception ("Not implemented.");\r
+ //cmp = new CILCompiler ();\r
+ else\r
+ cmp = new PatternCompiler ();\r
+\r
+ re.Compile (cmp, RightToLeft);\r
+\r
+ // install machine factory and add to pattern cache\r
+\r
+ this.factory = cmp.GetMachineFactory ();\r
+ cache.Add (pattern, options, this.factory);\r
+ }\r
+ }\r
+\r
+ // public instance properties\r
+ \r
+ public RegexOptions Options {\r
+ get { return options; }\r
+ }\r
+\r
+ public bool RightToLeft {\r
+ get { return (options & RegexOptions.RightToLeft) != 0; }\r
+ }\r
+\r
+ // public instance methods\r
+ \r
+ public string[] GetGroupNames () {\r
+ string[] names = new string[mapping.Count];\r
+ mapping.Keys.CopyTo (names, 0);\r
+\r
+ return names;\r
+ }\r
+\r
+ public int[] GetGroupNumbers () {\r
+ int[] numbers = new int[mapping.Count];\r
+ mapping.Values.CopyTo (numbers, 0);\r
+\r
+ return numbers;\r
+ }\r
+\r
+ public string GroupNameFromNumber (int i) {\r
+ if (i >= group_count)\r
+ return "";\r
+ \r
+ foreach (string name in mapping.Keys) {\r
+ if ((int)mapping[name] == i)\r
+ return name;\r
+ }\r
+\r
+ return "";\r
+ }\r
+\r
+ public int GroupNumberFromName (string name) {\r
+ if (mapping.Contains (name))\r
+ return (int)mapping[name];\r
+\r
+ return -1;\r
+ }\r
+\r
+ // match methods\r
+ \r
+ public bool IsMatch (string input) {\r
+ return IsMatch (input, 0);\r
+ }\r
+\r
+ public bool IsMatch (string input, int startat) {\r
+ return Match (input, startat).Success;\r
+ }\r
+\r
+ public Match Match (string input) {\r
+ return Match (input, 0);\r
+ }\r
+\r
+ public Match Match (string input, int startat) {\r
+ return CreateMachine ().Scan (this, input, startat, input.Length);\r
+ }\r
+\r
+ public Match Match (string input, int startat, int length) {\r
+ return CreateMachine ().Scan (this, input, startat, startat + length);\r
+ }\r
+\r
+ public MatchCollection Matches (string input) {\r
+ return Matches (input, 0);\r
+ }\r
+\r
+ public MatchCollection Matches (string input, int startat) {\r
+ MatchCollection ms = new MatchCollection ();\r
+ Match m = Match (input, startat);\r
+ while (m.Success) {\r
+ ms.Add (m);\r
+ m = m.NextMatch ();\r
+ }\r
+\r
+ return ms;\r
+ }\r
+\r
+ // replace methods\r
+\r
+ public string Replace (string input, MatchEvaluator evaluator) {\r
+ return Replace (input, evaluator, Int32.MaxValue, 0);\r
+ }\r
+\r
+ public string Replace (string input, MatchEvaluator evaluator, int count) {\r
+ return Replace (input, evaluator, count, 0);\r
+ }\r
+\r
+ public string Replace (string input, MatchEvaluator evaluator, int count, int startat)\r
+ {\r
+ StringBuilder result = new StringBuilder ();\r
+ int ptr = startat;\r
+\r
+ Match m = Match (input, startat);\r
+ while (m.Success && count -- > 0) {\r
+ result.Append (input.Substring (ptr, m.Index - ptr));\r
+ result.Append (evaluator (m));\r
+\r
+ ptr = m.Index + m.Length;\r
+ m = m.NextMatch ();\r
+ }\r
+ result.Append (input.Substring (ptr));\r
+\r
+ return result.ToString ();\r
+ }\r
+\r
+ public string Replace (string input, string replacement) {\r
+ return Replace (input, replacement, Int32.MaxValue, 0);\r
+ }\r
+\r
+ public string Replace (string input, string replacement, int count) {\r
+ return Replace (input, replacement, count, 0);\r
+ }\r
+\r
+ public string Replace (string input, string replacement, int count, int startat) {\r
+ ReplacementEvaluator ev = new ReplacementEvaluator (this, replacement);\r
+ return Replace (input, new MatchEvaluator (ev.Evaluate), count, startat);\r
+ }\r
+\r
+ // split methods\r
+\r
+ public string[] Split (string input) {\r
+ return Split (input, Int32.MaxValue, 0);\r
+ }\r
+\r
+ public string[] Split (string input, int count) {\r
+ return Split (input, count, 0);\r
+ }\r
+\r
+ public string[] Split (string input, int count, int startat) {\r
+ ArrayList splits = new ArrayList ();\r
+ if (count == 0)\r
+ count = Int32.MaxValue;\r
+\r
+ int ptr = startat;\r
+ Match m = Match (input, startat);\r
+ while (count -- > 0 && m.Success) {\r
+ splits.Add (input.Substring (ptr, m.Index - ptr));\r
+ ptr = m.Index + m.Length;\r
+ }\r
+\r
+ if (count > 0)\r
+ splits.Add (input.Substring (ptr));\r
+\r
+ string[] result = new string[splits.Count];\r
+ splits.CopyTo (result);\r
+ return result;\r
+ }\r
+\r
+ // object methods\r
+ \r
+ public override string ToString () {\r
+ return pattern;\r
+ }\r
+\r
+ // ISerializable interface\r
+\r
+ public void GetObjectData (SerializationInfo info, StreamingContext context) {\r
+ throw new Exception ("Not implemented.");\r
+ }\r
+\r
+ // internal\r
+\r
+ internal int GroupCount {\r
+ get { return group_count; }\r
+ }\r
+\r
+ // private\r
+\r
+ private IMachine CreateMachine () {\r
+ return factory.NewInstance ();\r
+ }\r
+\r
+ private string pattern;\r
+ private RegexOptions options;\r
+\r
+ private IMachineFactory factory;\r
+ private IDictionary mapping;\r
+ private int group_count;\r
+ }\r
+\r
+ public class RegexCompilationInfo {\r
+ public RegexCompilationInfo (string pattern, RegexOptions options, string name, string full_namespace, bool is_public) {\r
+ this.pattern = pattern;\r
+ this.options = options;\r
+ this.name = name;\r
+ this.full_namespace = full_namespace;\r
+ this.is_public = is_public;\r
+ }\r
+\r
+ public bool IsPublic {\r
+ get { return is_public; }\r
+ set { is_public = value; }\r
+ }\r
+\r
+ public string Name {\r
+ get { return name; }\r
+ set { name = value; }\r
+ }\r
+\r
+ public string Namespace {\r
+ get { return full_namespace; }\r
+ set { full_namespace = value; }\r
+ }\r
+\r
+ public RegexOptions Options {\r
+ get { return options; }\r
+ set { options = value; }\r
+ }\r
+\r
+ public string Pattern {\r
+ get { return pattern; }\r
+ set { pattern = value; }\r
+ }\r
+\r
+ // private\r
+\r
+ private string pattern, name, full_namespace;\r
+ private RegexOptions options;\r
+ private bool is_public;\r
+ }\r
+}\r
--- /dev/null
+//\r
+// assembly: System\r
+// namespace: System.Text.RegularExpressions\r
+// file: replace.cs\r
+//\r
+// author: Dan Lewis (dlewis@gmx.co.uk)\r
+// (c) 2002\r
+\r
+using System;\r
+using System.Text;\r
+using System.Collections;\r
+\r
+using Parser = System.Text.RegularExpressions.Syntax.Parser;\r
+\r
+namespace System.Text.RegularExpressions {\r
+\r
+ class ReplacementEvaluator {\r
+ public static string Evaluate (string replacement, Match match) {\r
+ ReplacementEvaluator ev = new ReplacementEvaluator (match.Regex, replacement);\r
+ return ev.Evaluate (match);\r
+ }\r
+\r
+ public ReplacementEvaluator (Regex regex, string replacement) {\r
+ this.regex = regex;\r
+ terms = new ArrayList ();\r
+ Compile (replacement);\r
+ }\r
+\r
+ public string Evaluate (Match match) {\r
+ StringBuilder result = new StringBuilder ();\r
+ foreach (Term term in terms)\r
+ result.Append (term.GetResult (match));\r
+\r
+ return result.ToString ();\r
+ }\r
+\r
+ // private\r
+\r
+ private void Compile (string replacement) {\r
+ replacement = Parser.Unescape (replacement);\r
+ string literal = "";\r
+\r
+ int ptr = 0;\r
+ char c;\r
+ Term term = null;\r
+ while (ptr < replacement.Length) {\r
+ c = replacement[ptr ++];\r
+\r
+ if (c == '$') {\r
+ if (replacement[ptr] == '$') {\r
+ ++ ptr;\r
+ break;\r
+ }\r
+\r
+ term = CompileTerm (replacement, ref ptr);\r
+ }\r
+\r
+ if (term != null) {\r
+ term.Literal = literal;\r
+ terms.Add (term);\r
+\r
+ term = null;\r
+ literal = "";\r
+ }\r
+ else\r
+ literal += c;\r
+ }\r
+\r
+ if (term == null && literal.Length > 0) {\r
+ terms.Add (new Term (literal));\r
+ }\r
+ }\r
+\r
+ private Term CompileTerm (string str, ref int ptr) {\r
+ char c = str[ptr];\r
+\r
+ if (Char.IsDigit (c)) { // numbered group\r
+ int n = Parser.ParseDecimal (str, ref ptr);\r
+ if (n < 0 || n > regex.GroupCount)\r
+ throw new ArgumentException ("Bad group number.");\r
+ \r
+ return new Term (TermOp.Match, n);\r
+ }\r
+ \r
+ ++ ptr;\r
+\r
+ switch (c) {\r
+ case '{': { // named group\r
+ string name = Parser.ParseName (str, ref ptr);\r
+ if (str[ptr ++] != '}' || name == null)\r
+ throw new ArgumentException ("Bad group name.");\r
+ \r
+ int n = regex.GroupNumberFromName (name);\r
+ \r
+ if (n < 0)\r
+ throw new ArgumentException ("Bad group name.");\r
+\r
+ return new Term (TermOp.Match, n);\r
+ }\r
+\r
+ case '&': // entire match\r
+ return new Term (TermOp.Match, 0);\r
+\r
+ case '`': // text before match\r
+ return new Term (TermOp.PreMatch, 0);\r
+\r
+ case '\'': // text after match\r
+ return new Term (TermOp.PostMatch, 0);\r
+\r
+ case '+': // last group\r
+ return new Term (TermOp.Match, regex.GroupCount - 1);\r
+\r
+ case '_': // entire text\r
+ return new Term (TermOp.All, 0);\r
+\r
+ default:\r
+ throw new ArgumentException ("Bad replacement pattern.");\r
+ }\r
+ }\r
+\r
+ private Regex regex;\r
+ private ArrayList terms;\r
+\r
+ private enum TermOp {\r
+ None, // no action\r
+ Match, // input within group\r
+ PreMatch, // input before group\r
+ PostMatch, // input after group\r
+ All // entire input\r
+ }\r
+\r
+ private class Term {\r
+ public Term (TermOp op, int arg) {\r
+ this.op = op;\r
+ this.arg = arg;\r
+ this.literal = "";\r
+ }\r
+\r
+ public Term (string literal) {\r
+ this.op = TermOp.None;\r
+ this.arg = 0;\r
+ this.literal = literal;\r
+ }\r
+\r
+ public string Literal {\r
+ set { literal = value; }\r
+ }\r
+\r
+ public string GetResult (Match match) {\r
+ Group group = match.Groups[arg];\r
+ \r
+ switch (op) {\r
+ case TermOp.None:\r
+ return literal;\r
+\r
+ case TermOp.Match:\r
+ return literal + group.Value;\r
+\r
+ case TermOp.PreMatch:\r
+ return literal + group.Text.Substring (0, group.Index);\r
+\r
+ case TermOp.PostMatch:\r
+ return literal + group.Text.Substring (group.Index + group.Length);\r
+\r
+ case TermOp.All:\r
+ return literal + group.Text;\r
+ }\r
+\r
+ return "";\r
+ }\r
+ \r
+ public TermOp op; // term type\r
+ public int arg; // group argument\r
+ public string literal; // literal to prepend\r
+\r
+ public override string ToString () {\r
+ return op.ToString () + "(" + arg + ") " + literal;\r
+ }\r
+ }\r
+ }\r
+}\r
--- /dev/null
+//\r
+// assembly: System\r
+// namespace: System.Text.RegularExpressions\r
+// file: syntax.cs\r
+//\r
+// author: Dan Lewis (dlewis@gmx.co.uk)\r
+// (c) 2002\r
+\r
+using System;\r
+using System.Collections;\r
+\r
+namespace System.Text.RegularExpressions.Syntax {\r
+ // collection classes\r
+ \r
+ class ExpressionCollection : CollectionBase {\r
+ public void Add (Expression e) {\r
+ List.Add (e);\r
+ }\r
+\r
+ public Expression this[int i] {\r
+ get { return (Expression)List[i]; }\r
+ set { List[i] = value; }\r
+ }\r
+\r
+ protected override void OnValidate (object o) {\r
+ // allow null elements\r
+ }\r
+ }\r
+\r
+ // abstract classes\r
+ \r
+ abstract class Expression {\r
+ public abstract void Compile (ICompiler cmp, bool reverse);\r
+ public abstract void GetWidth (out int min, out int max);\r
+\r
+ public int GetFixedWidth () {\r
+ int min, max;\r
+ GetWidth (out min, out max);\r
+\r
+ if (min == max)\r
+ return min;\r
+\r
+ return -1;\r
+ }\r
+\r
+ public virtual AnchorInfo GetAnchorInfo () {\r
+ return new AnchorInfo (this, GetFixedWidth ());\r
+ }\r
+\r
+ public virtual bool IsComplex () {\r
+ return true;\r
+ }\r
+ }\r
+\r
+ // composite expressions\r
+ \r
+ abstract class CompositeExpression : Expression {\r
+ public CompositeExpression () {\r
+ expressions = new ExpressionCollection ();\r
+ }\r
+\r
+ protected ExpressionCollection Expressions {\r
+ get { return expressions; }\r
+ }\r
+\r
+ protected void GetWidth (out int min, out int max, int count) {\r
+ min = Int32.MaxValue;\r
+ max = 0;\r
+ bool empty = true;\r
+\r
+ for (int i = 0; i < count; ++ i) {\r
+ Expression e = Expressions[i];\r
+ if (e == null)\r
+ continue;\r
+ \r
+ empty = false;\r
+ int a, b;\r
+ e.GetWidth (out a, out b);\r
+ if (a < min) min = a;\r
+ if (b > max) max = b;\r
+ }\r
+\r
+ if (empty)\r
+ min = max = 0;\r
+ }\r
+\r
+ private ExpressionCollection expressions;\r
+ }\r
+\r
+ // groups\r
+ \r
+ class Group : CompositeExpression {\r
+ public Group () {\r
+ }\r
+\r
+ public Expression Expression {\r
+ get { return Expressions[0]; }\r
+ set { Expressions[0] = value; }\r
+ }\r
+\r
+ public void AppendExpression (Expression e) {\r
+ Expressions.Add (e);\r
+ }\r
+\r
+ public override void Compile (ICompiler cmp, bool reverse) {\r
+ int count = Expressions.Count;\r
+ for (int i = 0; i < count; ++ i) {\r
+ Expression e;\r
+ if (reverse)\r
+ e = Expressions[count - i - 1];\r
+ else\r
+ e = Expressions[i];\r
+\r
+ e.Compile (cmp, reverse);\r
+ }\r
+ }\r
+\r
+ public override void GetWidth (out int min, out int max) {\r
+ min = 0;\r
+ max = 0;\r
+\r
+ foreach (Expression e in Expressions) {\r
+ int a, b;\r
+ e.GetWidth (out a, out b);\r
+ min += a;\r
+ if (max == Int32.MaxValue || b == Int32.MaxValue)\r
+ max = Int32.MaxValue;\r
+ else\r
+ max += b;\r
+ }\r
+ }\r
+\r
+ public override AnchorInfo GetAnchorInfo () {\r
+ int ptr;\r
+ int width = GetFixedWidth ();\r
+\r
+ ArrayList infos = new ArrayList ();\r
+ IntervalCollection segments = new IntervalCollection ();\r
+\r
+ // accumulate segments\r
+\r
+ ptr = 0;\r
+ foreach (Expression e in Expressions) {\r
+ AnchorInfo info = e.GetAnchorInfo ();\r
+ infos.Add (info);\r
+\r
+ if (info.IsPosition)\r
+ return new AnchorInfo (this, ptr + info.Offset, width, info.Position);\r
+\r
+ if (info.IsSubstring)\r
+ segments.Add (info.GetInterval (ptr));\r
+\r
+ if (info.IsUnknownWidth)\r
+ break;\r
+\r
+ ptr += info.Width;\r
+ }\r
+\r
+ // normalize and find the longest segment\r
+\r
+ segments.Normalize ();\r
+\r
+ Interval longest = Interval.Empty;\r
+ foreach (Interval segment in segments) {\r
+ if (segment.Size > longest.Size)\r
+ longest = segment;\r
+ }\r
+\r
+ // now chain the substrings that made this segment together\r
+\r
+ if (!longest.IsEmpty) {\r
+ string str = "";\r
+ bool ignore = false;\r
+\r
+ ptr = 0;\r
+ foreach (AnchorInfo info in infos) {\r
+ if (info.IsSubstring && longest.Contains (info.GetInterval (ptr))) {\r
+ str += info.Substring; // TODO mark subexpressions\r
+ ignore |= info.IgnoreCase;\r
+ }\r
+\r
+ if (info.IsUnknownWidth)\r
+ break;\r
+\r
+ ptr += info.Width;\r
+ }\r
+\r
+ return new AnchorInfo (this, longest.low, width, str, ignore);\r
+ }\r
+\r
+ return new AnchorInfo (this, width);\r
+ }\r
+\r
+ public override bool IsComplex () {\r
+ bool comp = false;\r
+ foreach (Expression e in Expressions) {\r
+ comp |= e.IsComplex ();\r
+ }\r
+\r
+ return comp | GetFixedWidth () <= 0;\r
+ }\r
+ }\r
+\r
+ class RegularExpression : Group {\r
+ public RegularExpression () {\r
+ group_count = 0;\r
+ }\r
+\r
+ public int GroupCount {\r
+ get { return group_count; }\r
+ set { group_count = value; }\r
+ }\r
+\r
+ public override void Compile (ICompiler cmp, bool reverse) {\r
+ // info block\r
+\r
+ int min, max;\r
+ GetWidth (out min, out max);\r
+ cmp.EmitInfo (group_count, min, max);\r
+\r
+ // anchoring expression\r
+\r
+ AnchorInfo info = GetAnchorInfo ();\r
+ if (reverse)\r
+ info = new AnchorInfo (this, GetFixedWidth ()); // FIXME\r
+\r
+ LinkRef pattern = cmp.NewLink ();\r
+ cmp.EmitAnchor (info.Offset, pattern);\r
+\r
+ if (info.IsPosition)\r
+ cmp.EmitPosition (info.Position);\r
+ else if (info.IsSubstring)\r
+ cmp.EmitString (info.Substring, info.IgnoreCase, reverse);\r
+ \r
+ cmp.EmitTrue ();\r
+ \r
+ // pattern\r
+\r
+ cmp.ResolveLink (pattern);\r
+ base.Compile (cmp, reverse);\r
+ cmp.EmitTrue ();\r
+ }\r
+\r
+ private int group_count;\r
+ }\r
+\r
+ class CapturingGroup : Group {\r
+ public CapturingGroup () {\r
+ this.gid = 0;\r
+ this.name = null;\r
+ }\r
+\r
+ public int Number {\r
+ get { return gid; }\r
+ set { gid = value; }\r
+ }\r
+\r
+ public string Name {\r
+ get { return name; }\r
+ set { name = value; }\r
+ }\r
+\r
+ public bool IsNamed {\r
+ get { return name != null; }\r
+ }\r
+\r
+ public override void Compile (ICompiler cmp, bool reverse) {\r
+ cmp.EmitOpen (gid);\r
+ base.Compile (cmp, reverse);\r
+ cmp.EmitClose (gid);\r
+ }\r
+\r
+ public override bool IsComplex () {\r
+ return true;\r
+ }\r
+\r
+ private int gid;\r
+ private string name;\r
+ }\r
+\r
+ class BalancingGroup : CapturingGroup {\r
+ public BalancingGroup () {\r
+ this.balance = null;\r
+ }\r
+\r
+ public CapturingGroup Balance {\r
+ get { return balance; }\r
+ set { balance = value; }\r
+ }\r
+\r
+ public override void Compile (ICompiler cmp, bool reverse) {\r
+ // can't invoke Group.Compile from here :(\r
+ // so I'll just repeat the code\r
+ \r
+ int count = Expressions.Count;\r
+ for (int i = 0; i < count; ++ i) {\r
+ Expression e;\r
+ if (reverse)\r
+ e = Expressions[count - i - 1];\r
+ else\r
+ e = Expressions[i];\r
+\r
+ e.Compile (cmp, reverse);\r
+ }\r
+\r
+ cmp.EmitBalance (this.Number, balance.Number);\r
+ }\r
+\r
+ private CapturingGroup balance;\r
+ }\r
+\r
+ class NonBacktrackingGroup : Group {\r
+ public NonBacktrackingGroup () {\r
+ }\r
+\r
+ public override void Compile (ICompiler cmp, bool reverse) {\r
+ LinkRef tail = cmp.NewLink ();\r
+\r
+ cmp.EmitSub (tail);\r
+ base.Compile (cmp, reverse);\r
+ cmp.EmitTrue ();\r
+ cmp.ResolveLink (tail);\r
+ }\r
+\r
+ public override bool IsComplex () {\r
+ return true;\r
+ }\r
+ }\r
+\r
+ // repetition\r
+\r
+ class Repetition : CompositeExpression {\r
+ public Repetition (int min, int max, bool lazy) {\r
+ Expressions.Add (null);\r
+ \r
+ this.min = min;\r
+ this.max = max;\r
+ this.lazy = lazy;\r
+ }\r
+\r
+ public Expression Expression {\r
+ get { return Expressions[0]; }\r
+ set { Expressions[0] = value; }\r
+ }\r
+\r
+ public int Minimum {\r
+ get { return min; }\r
+ set { min = value; }\r
+ }\r
+\r
+ public int Maximum {\r
+ get { return max; }\r
+ set { max = value; }\r
+ }\r
+\r
+ public bool Lazy {\r
+ get { return lazy; }\r
+ set { lazy = value; }\r
+ }\r
+\r
+ public override void Compile (ICompiler cmp, bool reverse) {\r
+ if (Expression.IsComplex ()) {\r
+ LinkRef until = cmp.NewLink ();\r
+ \r
+ cmp.EmitRepeat (min, max, lazy, until);\r
+ Expression.Compile (cmp, reverse);\r
+ cmp.EmitUntil (until);\r
+ }\r
+ else {\r
+ LinkRef tail = cmp.NewLink ();\r
+\r
+ cmp.EmitFastRepeat (min, max, lazy, tail);\r
+ Expression.Compile (cmp, reverse);\r
+ cmp.EmitTrue ();\r
+ cmp.ResolveLink (tail);\r
+ }\r
+ }\r
+\r
+ public override void GetWidth (out int min, out int max) {\r
+ Expression.GetWidth (out min, out max);\r
+ min = min * this.min;\r
+ if (max == Int32.MaxValue || this.max == 0xffff)\r
+ max = Int32.MaxValue;\r
+ else\r
+ max = max * this.max;\r
+ }\r
+\r
+ public override AnchorInfo GetAnchorInfo () {\r
+ int width = GetFixedWidth ();\r
+ if (Minimum == 0)\r
+ return new AnchorInfo (this, width);\r
+ \r
+ AnchorInfo info = Expression.GetAnchorInfo ();\r
+ if (info.IsPosition)\r
+ return new AnchorInfo (this, info.Offset, width, info.Position);\r
+ \r
+ if (info.IsSubstring) {\r
+ if (info.IsComplete) {\r
+ string str = "";\r
+ for (int i = 0; i < Minimum; ++ i)\r
+ str += info.Substring;\r
+\r
+ return new AnchorInfo (this, 0, width, str, info.IgnoreCase);\r
+ }\r
+\r
+ return new AnchorInfo (this, info.Offset, width, info.Substring, info.IgnoreCase);\r
+ }\r
+\r
+ return new AnchorInfo (this, width);\r
+ }\r
+\r
+ private int min, max;\r
+ private bool lazy;\r
+ }\r
+\r
+ // assertions\r
+\r
+ abstract class Assertion : CompositeExpression {\r
+ public Assertion () {\r
+ Expressions.Add (null); // true expression\r
+ Expressions.Add (null); // false expression\r
+ }\r
+\r
+ public Expression TrueExpression {\r
+ get { return Expressions[0]; }\r
+ set { Expressions[0] = value; }\r
+ }\r
+\r
+ public Expression FalseExpression {\r
+ get { return Expressions[1]; }\r
+ set { Expressions[1] = value; }\r
+ }\r
+\r
+ public override void GetWidth (out int min, out int max) {\r
+ GetWidth (out min, out max, 2);\r
+\r
+ if (TrueExpression == null || FalseExpression == null)\r
+ min = 0;\r
+ }\r
+ }\r
+\r
+ class CaptureAssertion : Assertion {\r
+ public CaptureAssertion () {\r
+ }\r
+\r
+ public CapturingGroup CapturingGroup {\r
+ get { return group; }\r
+ set { group = value; }\r
+ }\r
+\r
+ public override void Compile (ICompiler cmp, bool reverse) {\r
+ int gid = group.Number;\r
+ LinkRef tail = cmp.NewLink ();\r
+\r
+ if (FalseExpression == null) {\r
+ // IfDefined :1\r
+ // <yes_exp>\r
+ // 1: <tail>\r
+ \r
+ cmp.EmitIfDefined (gid, tail);\r
+ TrueExpression.Compile (cmp, reverse);\r
+ }\r
+ else {\r
+ // IfDefined :1\r
+ // <yes_expr>\r
+ // Jump :2\r
+ // 1: <no_expr>\r
+ // 2: <tail>\r
+ \r
+ LinkRef false_expr = cmp.NewLink ();\r
+ cmp.EmitIfDefined (gid, false_expr);\r
+ TrueExpression.Compile (cmp, reverse);\r
+ cmp.EmitJump (tail);\r
+ cmp.ResolveLink (false_expr);\r
+ FalseExpression.Compile (cmp, reverse);\r
+ }\r
+\r
+ cmp.ResolveLink (tail);\r
+ }\r
+\r
+ public override bool IsComplex () {\r
+ bool comp = false;\r
+ if (TrueExpression != null)\r
+ comp |= TrueExpression.IsComplex ();\r
+ if (FalseExpression != null)\r
+ comp |= FalseExpression.IsComplex ();\r
+\r
+ return comp | GetFixedWidth () <= 0;\r
+ }\r
+\r
+ private CapturingGroup group;\r
+ }\r
+\r
+ class ExpressionAssertion : Assertion {\r
+ public ExpressionAssertion () {\r
+ Expressions.Add (null); // test expression\r
+ }\r
+\r
+ public bool Reverse {\r
+ get { return reverse; }\r
+ set { reverse = value; }\r
+ }\r
+\r
+ public bool Negate {\r
+ get { return negate; }\r
+ set { negate = value; }\r
+ }\r
+\r
+ public Expression TestExpression {\r
+ get { return Expressions[2]; }\r
+ set { Expressions[2] = value; }\r
+ }\r
+\r
+ public override void Compile (ICompiler cmp, bool reverse) {\r
+ LinkRef true_expr = cmp.NewLink ();\r
+ LinkRef false_expr = cmp.NewLink ();\r
+\r
+ // test op: positive / negative\r
+\r
+ if (!negate)\r
+ cmp.EmitTest (true_expr, false_expr);\r
+ else\r
+ cmp.EmitTest (false_expr, true_expr);\r
+ \r
+ // test expression: lookahead / lookbehind\r
+\r
+ TestExpression.Compile (cmp, reverse ^ this.reverse);\r
+ cmp.EmitTrue ();\r
+\r
+ // target expressions\r
+\r
+ if (TrueExpression == null) { // (?= ...)\r
+ // Test :1, :2\r
+ // <test_expr>\r
+ // :2 False\r
+ // :1 <tail>\r
+ \r
+ cmp.ResolveLink (false_expr);\r
+ cmp.EmitFalse ();\r
+ cmp.ResolveLink (true_expr);\r
+ }\r
+ else {\r
+ cmp.ResolveLink (true_expr);\r
+ TrueExpression.Compile (cmp, reverse);\r
+ \r
+ if (FalseExpression == null) { // (?(...) ...)\r
+ // Test :1, :2\r
+ // <test_expr>\r
+ // :1 <yes_expr>\r
+ // :2 <tail>\r
+\r
+ cmp.ResolveLink (false_expr);\r
+ }\r
+ else { // (?(...) ... | ...)\r
+ // Test :1, :2\r
+ // <test_expr>\r
+ // :1 <yes_expr>\r
+ // Jump :3\r
+ // :2 <no_expr>\r
+ // :3 <tail>\r
+ \r
+ LinkRef tail = cmp.NewLink ();\r
+ \r
+ cmp.EmitJump (tail);\r
+ cmp.ResolveLink (false_expr);\r
+ FalseExpression.Compile (cmp, reverse);\r
+ cmp.ResolveLink (tail);\r
+ }\r
+ }\r
+ }\r
+\r
+ private bool reverse, negate;\r
+ }\r
+\r
+ // alternation\r
+\r
+ class Alternation : CompositeExpression {\r
+ public Alternation () {\r
+ }\r
+\r
+ public ExpressionCollection Alternatives {\r
+ get { return Expressions; }\r
+ }\r
+\r
+ public void AddAlternative (Expression e) {\r
+ Alternatives.Add (e);\r
+ }\r
+\r
+ public override void Compile (ICompiler cmp, bool reverse) {\r
+ LinkRef next = cmp.NewLink ();\r
+ LinkRef tail = cmp.NewLink ();\r
+ \r
+ foreach (Expression e in Alternatives) {\r
+ cmp.EmitBranch (next);\r
+ e.Compile (cmp, reverse);\r
+ cmp.EmitJump (tail);\r
+ cmp.ResolveLink (next);\r
+ }\r
+\r
+ cmp.EmitFalse ();\r
+ cmp.ResolveLink (tail);\r
+ }\r
+\r
+ public override void GetWidth (out int min, out int max) {\r
+ GetWidth (out min, out max, Alternatives.Count);\r
+ }\r
+\r
+ public override bool IsComplex () {\r
+ bool comp = false;\r
+ foreach (Expression e in Alternatives) {\r
+ comp |= e.IsComplex ();\r
+ }\r
+\r
+ return comp | GetFixedWidth () <= 0;\r
+ }\r
+ }\r
+\r
+ // terminal expressions\r
+\r
+ class Literal : Expression {\r
+ public Literal (string str, bool ignore) {\r
+ this.str = str;\r
+ this.ignore = ignore;\r
+ }\r
+\r
+ public string String {\r
+ get { return str; }\r
+ set { str = value; }\r
+ }\r
+\r
+ public bool IgnoreCase {\r
+ get { return ignore; }\r
+ set { ignore = value; }\r
+ }\r
+\r
+ public override void Compile (ICompiler cmp, bool reverse) {\r
+ if (str.Length == 0)\r
+ return;\r
+\r
+ if (str.Length == 1)\r
+ cmp.EmitCharacter (str[0], false, ignore, reverse);\r
+ else\r
+ cmp.EmitString (str, ignore, reverse);\r
+ }\r
+\r
+ public override void GetWidth (out int min, out int max) {\r
+ min = max = str.Length;\r
+ }\r
+\r
+ public override AnchorInfo GetAnchorInfo () {\r
+ return new AnchorInfo (this, 0, str.Length, str, ignore);\r
+ }\r
+\r
+ public override bool IsComplex () {\r
+ return false;\r
+ }\r
+\r
+ private string str;\r
+ private bool ignore;\r
+ }\r
+\r
+ class PositionAssertion : Expression {\r
+ public PositionAssertion (Position pos) {\r
+ this.pos = pos;\r
+ }\r
+\r
+ public Position Position {\r
+ get { return pos; }\r
+ set { pos = value; }\r
+ }\r
+\r
+ public override void Compile (ICompiler cmp, bool reverse) {\r
+ cmp.EmitPosition (pos);\r
+ }\r
+\r
+ public override void GetWidth (out int min, out int max) {\r
+ min = max = 0;\r
+ }\r
+\r
+ public override bool IsComplex () {\r
+ return false;\r
+ }\r
+\r
+ public override AnchorInfo GetAnchorInfo () {\r
+ switch (pos) {\r
+ case Position.StartOfString: case Position.StartOfLine: case Position.StartOfScan:\r
+ return new AnchorInfo (this, 0, 0, pos);\r
+\r
+ default:\r
+ return new AnchorInfo (this, 0);\r
+ }\r
+ }\r
+\r
+ private Position pos;\r
+ }\r
+\r
+ class Reference : Expression {\r
+ public Reference (bool ignore) {\r
+ this.ignore = ignore;\r
+ }\r
+\r
+ public CapturingGroup CapturingGroup {\r
+ get { return group; }\r
+ set { group = value; }\r
+ }\r
+\r
+ public bool IgnoreCase {\r
+ get { return ignore; }\r
+ set { ignore = value; }\r
+ }\r
+\r
+ public override void Compile (ICompiler cmp, bool reverse) {\r
+ cmp.EmitReference (group.Number, ignore, reverse);\r
+ }\r
+\r
+ public override void GetWidth (out int min, out int max) {\r
+ //group.GetWidth (out min, out max);\r
+ // TODO set width to referenced group for non-cyclical references\r
+ min = 0;\r
+ max = Int32.MaxValue;\r
+ }\r
+\r
+ public override bool IsComplex () {\r
+ return true; // FIXME incorporate cyclic check\r
+ }\r
+\r
+ private CapturingGroup group;\r
+ private bool ignore;\r
+ }\r
+\r
+ class CharacterClass : Expression {\r
+ public CharacterClass (bool negate, bool ignore) {\r
+ this.negate = negate;\r
+ this.ignore = ignore;\r
+\r
+ intervals = new IntervalCollection ();\r
+\r
+ // initialize pos/neg category arrays\r
+\r
+ Array cat_values = Enum.GetValues (typeof (Category));\r
+ int cat_size = (int)(Category)cat_values.GetValue (cat_values.Length - 1) + 1;\r
+ pos_cats = new bool[cat_size];\r
+ neg_cats = new bool[cat_size];\r
+ for (int i = 0; i < cat_size; ++ i) {\r
+ pos_cats[i] = false;\r
+ neg_cats[i] = false;\r
+ }\r
+ }\r
+\r
+ public CharacterClass (Category cat, bool negate) : this (false, false) {\r
+ this.AddCategory (cat, negate);\r
+ }\r
+\r
+ public bool Negate {\r
+ get { return negate; }\r
+ set { negate = value; }\r
+ }\r
+\r
+ public bool IgnoreCase {\r
+ get { return ignore; }\r
+ set { ignore = value; }\r
+ }\r
+\r
+ public void AddCategory (Category cat, bool negate) {\r
+ int n = (int)cat;\r
+ \r
+ if (negate) {\r
+ if (pos_cats[n])\r
+ pos_cats[n] = false;\r
+\r
+ neg_cats[n] = true;\r
+ }\r
+ else {\r
+ if (neg_cats[n])\r
+ neg_cats[n] = false;\r
+\r
+ pos_cats[n] = true;\r
+ }\r
+ }\r
+\r
+ public void AddCharacter (char c) {\r
+ intervals.Add (new Interval (c, c));\r
+ }\r
+\r
+ public void AddRange (char lo, char hi) {\r
+ intervals.Add (new Interval (lo, hi));\r
+ }\r
+\r
+ public override void Compile (ICompiler cmp, bool reverse) {\r
+ // create the meta-collection\r
+\r
+ IntervalCollection meta =\r
+ intervals.GetMetaCollection (new IntervalCollection.CostDelegate (GetIntervalCost));\r
+\r
+ // count ops\r
+ \r
+ int count = meta.Count;\r
+ for (int i = 0; i < pos_cats.Length; ++ i) {\r
+ if (pos_cats[i]) ++ count;\r
+ if (neg_cats[i]) ++ count;\r
+ }\r
+\r
+ if (count == 0)\r
+ return;\r
+\r
+ // emit in op for |meta| > 1\r
+\r
+ LinkRef tail = cmp.NewLink ();\r
+ if (count > 1)\r
+ cmp.EmitIn (tail);\r
+\r
+ // emit categories\r
+\r
+ for (int i = 0; i < pos_cats.Length; ++ i) {\r
+ if (pos_cats[i])\r
+ cmp.EmitCategory ((Category)i, negate, reverse);\r
+ else if (neg_cats[i])\r
+ cmp.EmitCategory ((Category)i, !negate, reverse);\r
+ }\r
+\r
+ // emit character/range/sets from meta-collection\r
+\r
+ foreach (Interval a in meta) {\r
+ if (a.IsDiscontiguous) { // Set\r
+ BitArray bits = new BitArray (a.Size);\r
+ foreach (Interval b in intervals) {\r
+ if (a.Contains (b)) {\r
+ for (int i = b.low; i <= b.high; ++ i)\r
+ bits[i - a.low] = true;\r
+ }\r
+ }\r
+\r
+ cmp.EmitSet ((char)a.low, bits, negate, ignore, reverse);\r
+ }\r
+ else if (a.IsSingleton) // Character\r
+ cmp.EmitCharacter ((char)a.low, negate, ignore, reverse);\r
+ else // Range\r
+ cmp.EmitRange ((char)a.low, (char)a.high, negate, ignore, reverse);\r
+ }\r
+ \r
+ // finish up\r
+\r
+ if (count > 1) {\r
+ if (negate)\r
+ cmp.EmitTrue ();\r
+ else\r
+ cmp.EmitFalse ();\r
+\r
+ cmp.ResolveLink (tail);\r
+ }\r
+ }\r
+\r
+ public override void GetWidth (out int min, out int max) {\r
+ min = max = 1;\r
+ }\r
+\r
+ public override bool IsComplex () {\r
+ return false;\r
+ }\r
+\r
+ // private\r
+\r
+ private static double GetIntervalCost (Interval i) {\r
+ // use op length as cost metric (=> optimize for space)\r
+ \r
+ if (i.IsDiscontiguous)\r
+ return 3 + ((i.Size + 0xf) >> 4); // Set\r
+ else if (i.IsSingleton)\r
+ return 2; // Character\r
+ else\r
+ return 3; // Range\r
+ }\r
+\r
+ private bool negate, ignore;\r
+ private bool[] pos_cats, neg_cats;\r
+ private IntervalCollection intervals;\r
+ }\r
+\r
+ class AnchorInfo {\r
+ private Expression expr;\r
+\r
+ private Position pos;\r
+ private int offset;\r
+\r
+ private string str;\r
+ private int width;\r
+ private bool ignore;\r
+\r
+ public AnchorInfo (Expression expr, int width) {\r
+ this.expr = expr;\r
+ this.offset = 0;\r
+ this.width = width;\r
+\r
+ this.str = null;\r
+ this.ignore = false;\r
+ this.pos = Position.Any;\r
+ }\r
+ \r
+ public AnchorInfo (Expression expr, int offset, int width, string str, bool ignore) {\r
+ this.expr = expr;\r
+ this.offset = offset;\r
+ this.width = width;\r
+\r
+ this.str = ignore ? str.ToLower () : str;\r
+\r
+ this.ignore = ignore;\r
+ this.pos = Position.Any;\r
+ }\r
+\r
+ public AnchorInfo (Expression expr, int offset, int width, Position pos) {\r
+ this.expr = expr;\r
+ this.offset = offset;\r
+ this.width = width;\r
+\r
+ this.pos = pos;\r
+\r
+ this.str = null;\r
+ this.ignore = false;\r
+ }\r
+\r
+ public Expression Expression {\r
+ get { return expr; }\r
+ }\r
+\r
+ public int Offset {\r
+ get { return offset; }\r
+ }\r
+\r
+ public int Width {\r
+ get { return width; }\r
+ }\r
+\r
+ public int Length {\r
+ get { return (str != null) ? str.Length : 0; }\r
+ }\r
+\r
+ public bool IsUnknownWidth {\r
+ get { return width < 0; }\r
+ }\r
+\r
+ public bool IsComplete {\r
+ get { return Length == Width; }\r
+ }\r
+\r
+ public string Substring {\r
+ get { return str; }\r
+ }\r
+\r
+ public bool IgnoreCase {\r
+ get { return ignore; }\r
+ }\r
+\r
+ public Position Position {\r
+ get { return pos; }\r
+ }\r
+\r
+ public bool IsSubstring {\r
+ get { return str != null; }\r
+ }\r
+\r
+ public bool IsPosition {\r
+ get { return pos != Position.Any; }\r
+ }\r
+\r
+ public Interval GetInterval () {\r
+ return GetInterval (0);\r
+ }\r
+\r
+ public Interval GetInterval (int start) {\r
+ if (!IsSubstring)\r
+ return Interval.Empty;\r
+\r
+ return new Interval (start + Offset, start + Offset + Length - 1);\r
+ }\r
+ }\r
+}\r