Regex classes, initial revision.
authorDan Lewis <dan@mono-cvs.ximian.com>
Thu, 31 Jan 2002 08:00:16 +0000 (08:00 -0000)
committerDan Lewis <dan@mono-cvs.ximian.com>
Thu, 31 Jan 2002 08:00:16 +0000 (08:00 -0000)
svn path=/trunk/mcs/; revision=2205

15 files changed:
mcs/class/System/System.Text.RegularExpressions/arch.cs [new file with mode: 0644]
mcs/class/System/System.Text.RegularExpressions/cache.cs [new file with mode: 0644]
mcs/class/System/System.Text.RegularExpressions/category.cs [new file with mode: 0644]
mcs/class/System/System.Text.RegularExpressions/collections.cs [new file with mode: 0644]
mcs/class/System/System.Text.RegularExpressions/compiler.cs [new file with mode: 0644]
mcs/class/System/System.Text.RegularExpressions/debug.cs [new file with mode: 0644]
mcs/class/System/System.Text.RegularExpressions/interpreter.cs [new file with mode: 0644]
mcs/class/System/System.Text.RegularExpressions/interval.cs [new file with mode: 0644]
mcs/class/System/System.Text.RegularExpressions/match.cs [new file with mode: 0644]
mcs/class/System/System.Text.RegularExpressions/notes.txt [new file with mode: 0644]
mcs/class/System/System.Text.RegularExpressions/parser.cs [new file with mode: 0644]
mcs/class/System/System.Text.RegularExpressions/quicksearch.cs [new file with mode: 0644]
mcs/class/System/System.Text.RegularExpressions/regex.cs [new file with mode: 0644]
mcs/class/System/System.Text.RegularExpressions/replace.cs [new file with mode: 0644]
mcs/class/System/System.Text.RegularExpressions/syntax.cs [new file with mode: 0644]

diff --git a/mcs/class/System/System.Text.RegularExpressions/arch.cs b/mcs/class/System/System.Text.RegularExpressions/arch.cs
new file mode 100644 (file)
index 0000000..274d9ad
--- /dev/null
@@ -0,0 +1,333 @@
+//\r
+// assembly:   System\r
+// namespace:  System.Text.RegularExpressions\r
+// file:       arch.cs\r
+//\r
+// author:     Dan Lewis (dlewis@gmx.co.uk)\r
+//             (c) 2002\r
+\r
+using System;\r
+\r
+namespace System.Text.RegularExpressions {\r
+\r
+       enum OpCode : ushort {\r
+               False           = 0,    // always fails\r
+               True,                   // always succeeds\r
+\r
+               // matching\r
+\r
+               Position,               // zero-width position assertion\r
+               String,                 // match string literal\r
+               Reference,              // back reference\r
+\r
+               // character matching\r
+\r
+               Character,              // match character exactly\r
+               Category,               // match character from category\r
+               Range,                  // match character from range\r
+               Set,                    // match character from set\r
+               In,                     // match character from group of tests\r
+\r
+               // capturing\r
+\r
+               Open,                   // open group\r
+               Close,                  // close group\r
+               Balance,                // balance groups\r
+\r
+               // control flow\r
+\r
+               IfDefined,              // conditional on capture\r
+               Sub,                    // non-backtracking subexpression\r
+               Test,                   // non-backtracking lookahead/behind\r
+               Branch,                 // alternative expression\r
+               Jump,                   // unconditional goto\r
+               Repeat,                 // new repeat context\r
+               Until,                  // repeat subexpression within context\r
+               FastRepeat,             // repeat simple subexpression\r
+               Anchor,                 // anchoring expression\r
+\r
+               // miscellaneous\r
+               \r
+               Info                    // pattern information\r
+       }\r
+\r
+       [Flags]\r
+       enum OpFlags : ushort {\r
+               None            = 0x000,\r
+               Negate          = 0x100,        // succeed on mismatch\r
+               IgnoreCase      = 0x200,        // case insensitive matching\r
+               RightToLeft     = 0x400,        // right-to-left matching\r
+               Lazy            = 0x800         // minimizing repeat\r
+       }\r
+\r
+       enum Position : ushort {\r
+               Any,                    // anywhere\r
+               Start,                  // start of string                      \A\r
+               StartOfString,          // start of string                      \A\r
+               StartOfLine,            // start of line                        ^\r
+               StartOfScan,            // start of scan                        \G\r
+               End,                    // end or before newline at end         \Z\r
+               EndOfString,            // end of string                        \z\r
+               EndOfLine,              // end of line                          $\r
+               Boundary,               // word boundary                        \b\r
+               NonBoundary             // not word boundary                    \B\r
+       };\r
+       \r
+       // see category.cs for Category enum\r
+\r
+       interface IMachine {\r
+               Match Scan (Regex regex, string text, int start, int end);\r
+       }\r
+\r
+       interface IMachineFactory {\r
+               IMachine NewInstance ();\r
+       }\r
+\r
+       // Anchor SKIP OFFSET\r
+       //\r
+       // Flags:       [RightToLeft] ??\r
+       // SKIP:        relative address of tail expression\r
+       // OFFSET:      offset of anchor from start of pattern\r
+       //\r
+       // Usage:\r
+       //\r
+       //      Anchor :1 OFFSET\r
+       //              <expr>\r
+       //              True\r
+       // 1:   <tail>\r
+       //\r
+       // Notes:\r
+       //\r
+       // In practice, the anchoring expression is only going to be\r
+       // Position (StartOfString, StartOfLine, StartOfScan) or String.\r
+       // This is because the optimizer looks for position anchors at the\r
+       // start of the expression, and if that fails it looks for the\r
+       // longest substring. If an expression has neither a position\r
+       // anchor or a longest substring anchor, then the anchoring expression\r
+       // is left empty. Since an empty expression will anchor at any\r
+       // position in any string, the entire input string will be scanned.\r
+\r
+       // String LEN STR...\r
+       //\r
+       // Flags:       [RightToLeft, IgnoreCase]\r
+       // LEN:         length of string\r
+       // STR:         string characters\r
+\r
+       // Branch SKIP\r
+       //\r
+       // SKIP:        relative address of next branch\r
+       //\r
+       //      Branch :1\r
+       //              <alt expr 1>\r
+       //              Jump :4\r
+       // 1:   Branch :2\r
+       //              <alt expr 2>\r
+       //              Jump :4\r
+       // 2:   Branch :3\r
+       //              <alt expr 3>\r
+       //              Jump :4\r
+       // 3:   False\r
+       // 4:   <tail>\r
+\r
+       // Repeat SKIP MIN MAX\r
+       //\r
+       // Flags:       [Lazy]\r
+       // SKIP:        relative address of Until instruction\r
+       // MIN:         minimum iterations\r
+       // MAX:         maximum iterations (0xffff is infinity)\r
+       //\r
+       //      Repeat :1 MIN MAX\r
+       //              <expr>\r
+       //              Until\r
+       // 1:   <tail>\r
+\r
+       // FastRepeat SKIP MIN MAX\r
+       //\r
+       // Flags:       [Lazy]\r
+       // SKIP:        relative address of tail expression\r
+       // MIN:         minimum iterations\r
+       // MAX:         maximum iterations (0xffff is infinity)\r
+       //\r
+       //      FastRepeat :1 MIN MAX\r
+       //              <expr>\r
+       //              True\r
+       // 1:   <tail>\r
+       //\r
+       // Notes:\r
+       //\r
+       // The subexpression of a FastRepeat construct must not contain any\r
+       // complex operators. These include: Open, Close, Balance, Repeat,\r
+       // FastRepeat, Sub, Test. In addition, the subexpression must have\r
+       // been determined to have a fixed width.\r
+       \r
+       // Sub SKIP\r
+       //\r
+       // SKIP:        relative address of tail expression\r
+       //\r
+       //      Sub :1\r
+       //              <expr>\r
+       // 1:   <tail>\r
+       //\r
+       // Notes:\r
+       //\r
+       // The Sub operator invokes an independent subexpression. This means\r
+       // that the subexpression will match only once and so will not\r
+       // participate in any backtracking.\r
+\r
+       // Test TSKIP FSKIP\r
+       //\r
+       // TSKIP:       relative address of true expression\r
+       // FSKIP:       relative address of false expression\r
+       //\r
+       // Usage:       (?(?=test)true|false)\r
+       //\r
+       //      Test :1 :2\r
+       //              <test expr>\r
+       // 1:           <true expr>\r
+       //              Jump\r
+       // 2:           <false epxr>\r
+       //      <tail>\r
+       //\r
+       // Usage:       (?(?=test)true)\r
+       //\r
+       //      Test :1 :2\r
+       //              <test expr>\r
+       // 1:           <true expr>\r
+       // 2:   <tail>\r
+       //\r
+       // Usage:       (?=test)\r
+       //\r
+       //      Test :1 :2\r
+       //              <test expr>\r
+       // 1:           <true expr>\r
+       //              Jump 3:\r
+       // 2:           False\r
+       // 3:           <tail>\r
+       //\r
+       // Notes:\r
+       //\r
+       // For negative lookaheads, just swap the values of TSKIP and\r
+       // FSKIP. For lookbehinds, the test expression must be compiled\r
+       // in reverse. The test expression is always executed as an\r
+       // independent subexpression, so its behaviour is non-backtracking\r
+       // (like a Sub clause.)\r
+\r
+       // IfDefined SKIP GID\r
+       //\r
+       // SKIP:        relative address of else expression\r
+       // GID:         number of group to check\r
+       //\r
+       // Usage:       (?(gid)true)\r
+       //\r
+       //      IfDefined :1\r
+       //              <true expr>\r
+       // 1:   <tail>\r
+       //\r
+       // Usage:       (?(gid)true|false)\r
+       //\r
+       //      IfDefined :1\r
+       //              <true expr>\r
+       //              Jump :2\r
+       // 1:           <false expr>\r
+       // 2:   <tail>\r
+\r
+       // Jump SKIP\r
+       //\r
+       // SKIP:        relative address of target expression\r
+       //\r
+       //      Jump :1\r
+       //      ...\r
+       // :1   <target expr>\r
+\r
+       // Character CHAR\r
+       //\r
+       // Flags:       [Negate, IgnoreCase, RightToLeft]\r
+       // CHAR:        exact character to match\r
+\r
+       // Category CAT\r
+       //\r
+       // Flags:       [Negate, RightToLeft]\r
+       // CAT:         category to match (see Category enum)\r
+\r
+       // Range LO HI\r
+       //\r
+       // Flags:       [Negate, IgnoreCase, RightToLeft]\r
+       // LO:          lowest character in range\r
+       // HI:          higest character in range\r
+\r
+       // Set LO LEN SET...\r
+       //\r
+       // Flags:       [Negate, IgnoreCase, RightToLeft]\r
+       // LO:          lowest character in set\r
+       // LEN:         number of words in set\r
+       // SET:         bit array representing characters in set\r
+       //\r
+       // Notes:\r
+       //\r
+       // Each word in the set represents 16 characters, so the first word\r
+       // defines membership for characters LO to LO + 15, the second for\r
+       // LO + 16 to LO + 31, and so on up to LO + (LEN * 16 - 1). It is\r
+       // up to the compiler to provide a compact representation for sparse\r
+       // unicode sets. The simple way is to use Set 0 4096. Other methods\r
+       // involve paritioning the set and placing the components into an\r
+       // In block.\r
+\r
+       // In SKIP\r
+       //\r
+       // SKIP:        relative address of tail expression\r
+       //\r
+       // Usage:       [expr]\r
+       //\r
+       //      In :1\r
+       //              <expr>\r
+       //              True\r
+       // :1   <tail>\r
+       //\r
+       // Usage:       [^expr]\r
+       //\r
+       //      In :1\r
+       //              <expr>\r
+       //              False\r
+       // :1   <tail>\r
+       //\r
+       // Notes:\r
+       //\r
+       // The In instruction consumes a single character, using the flags\r
+       // of the first instruction in the subexpression to determine its\r
+       // IgnoreCase and RightToLeft properties. The subexpression is then\r
+       // applied to the single character as a disjunction. If any instruction\r
+       // in the subexpression succeeds, the entire In construct succeeds\r
+       // and matching continues with the tail.\r
+\r
+       // Position POS\r
+       //\r
+       // POS:         position to match (see Position enum)\r
+\r
+       // Open GID\r
+       //\r
+       // GID:         number of group to open\r
+\r
+       // Close GID\r
+       //\r
+       // GID:         number of group to close\r
+       \r
+       // Balance GID BAL\r
+       //\r
+       // GID:         number of capturing group (0 if none)\r
+       // BAL:         number of group to undefine\r
+\r
+       // Info GROUPS MIN MAX\r
+       //\r
+       // GROUPS:      number of capturing groups\r
+       // MIN:         minimum width of pattern\r
+       // MAX:         maximum width of pattern (0xffff means undefined)\r
+\r
+       // False\r
+\r
+       // True\r
+\r
+       // Reference GID\r
+       //\r
+       // Flags:       [IgnoreCase, RightToLeft]\r
+       // GID:         number of group to reference\r
+}\r
diff --git a/mcs/class/System/System.Text.RegularExpressions/cache.cs b/mcs/class/System/System.Text.RegularExpressions/cache.cs
new file mode 100644 (file)
index 0000000..8936eca
--- /dev/null
@@ -0,0 +1,138 @@
+//\r
+// assembly:   System\r
+// namespace:  System.Text.RegularExpressions\r
+// file:       cache.cs\r
+//\r
+// author:     Dan Lewis (dlewis@gmx.co.uk)\r
+//             (c) 2002\r
+\r
+using System;\r
+using System.Collections;\r
+\r
+namespace System.Text.RegularExpressions {\r
+\r
+       class FactoryCache {\r
+               public FactoryCache (int capacity) {\r
+                       this.capacity = capacity;\r
+                       this.factories = new Hashtable (capacity);\r
+                       this.mru_list = new MRUList ();\r
+               }\r
+\r
+               public void Add (string pattern, RegexOptions options, IMachineFactory factory) {\r
+                       lock (this) {\r
+                               Key k = new Key (pattern, options);\r
+\r
+                               while (factories.Count >= capacity) {\r
+                                       object victim = mru_list.Evict ();\r
+                                       if (victim != null)\r
+                                               factories.Remove ((Key)victim);\r
+                               }\r
+                               \r
+                               factories[k] = factory;\r
+                               mru_list.Use (k);\r
+                       }\r
+               }\r
+\r
+               public IMachineFactory Lookup (string pattern, RegexOptions options) {\r
+                       lock (this) {\r
+                               Key k = new Key (pattern, options);\r
+                               if (factories.Contains (k)) {\r
+                                       mru_list.Use (k);\r
+                                       return (IMachineFactory)factories[k];\r
+                               }\r
+                       }\r
+\r
+                       return null;\r
+               }\r
+\r
+               private int capacity;\r
+               private Hashtable factories;\r
+               private MRUList mru_list;\r
+\r
+               struct Key {\r
+                       public string pattern;\r
+                       public RegexOptions options;\r
+\r
+                       public Key (string pattern, RegexOptions options) {\r
+                               this.pattern = pattern;\r
+                               this.options = options;\r
+                       }\r
+                       \r
+                       public new int GetHashCode () {\r
+                               return pattern.GetHashCode () ^ (int)options;\r
+                       }\r
+\r
+                       public new bool Equals (object o) {\r
+                               if (o == null || o.GetType () != this.GetType ())\r
+                                       return false;\r
+\r
+                               Key k = (Key)o;\r
+                               return options == k.options && pattern.Equals (k.pattern);\r
+                       }\r
+\r
+                       public new string ToString () {\r
+                               return "('" + pattern + "', [" + options + "])";\r
+                       }\r
+               }\r
+       }\r
+\r
+       class MRUList {\r
+               public MRUList () {\r
+                       head = tail = null;\r
+               }\r
+\r
+               public void Use (object o) {\r
+                       Node node;\r
+\r
+                       if (head == null) {\r
+                               node = new Node (o);\r
+                               head = tail = node;\r
+                               return;\r
+                       }\r
+\r
+                       node = head;\r
+                       while (node != null && !o.Equals (node.value))\r
+                               node = node.previous;\r
+\r
+                       if (node == null)\r
+                               node = new Node (o);\r
+                       else {\r
+                               if (node == head)\r
+                                       return;\r
+\r
+                               if (node == tail)\r
+                                       tail = node.next;\r
+                               else\r
+                                       node.previous.next = node.next;\r
+\r
+                               node.next.previous = node.previous;\r
+                       }\r
+\r
+                       head.next = node;\r
+                       node.previous = head;\r
+                       node.next = null;\r
+                       head = node;\r
+               }\r
+\r
+               public object Evict () {\r
+                       if (tail == null)\r
+                               return null;\r
+\r
+                       object o = tail.value;\r
+                       tail = tail.next;\r
+                       tail.previous = null;\r
+                       return o;\r
+               }\r
+\r
+               private Node head, tail;\r
+\r
+               private class Node {\r
+                       public object value;\r
+                       public Node previous, next;\r
+\r
+                       public Node (object value) {\r
+                               this.value = value;\r
+                       }\r
+               }\r
+       }\r
+}\r
diff --git a/mcs/class/System/System.Text.RegularExpressions/category.cs b/mcs/class/System/System.Text.RegularExpressions/category.cs
new file mode 100644 (file)
index 0000000..763712c
--- /dev/null
@@ -0,0 +1,637 @@
+//\r
+// assembly:   System\r
+// namespace:  System.Text.RegularExpressions\r
+// file:       category.cs\r
+//\r
+// author:     Dan Lewis (dlewis@gmx.co.uk)\r
+//             (c) 2002\r
+\r
+using System;\r
+using System.Globalization;\r
+\r
+namespace System.Text.RegularExpressions {\r
+\r
+       enum Category : ushort {\r
+               None,\r
+\r
+               // canonical classes\r
+       \r
+               Any,                    // any character except newline         .\r
+               AnySingleline,          // any character                        . (s option)\r
+               Word,                   // any word character                   \w\r
+               Digit,                  // any digit character                  \d\r
+               WhiteSpace,             // any whitespace character             \s\r
+               \r
+               // ECMAScript classes\r
+\r
+\r
+               EcmaAny,\r
+               EcmaAnySingleline,\r
+               EcmaWord,               // [a-zA-Z_0-9]\r
+               EcmaDigit,              // [0-9]\r
+               EcmaWhiteSpace,         // [ \f\n\r\t\v]\r
+\r
+               // unicode categories\r
+               \r
+               UnicodeL,               // Letter\r
+               UnicodeM,               // Mark\r
+               UnicodeN,               // Number\r
+               UnicodeZ,               // Separator\r
+               UnicodeP,               // Punctuation\r
+               UnicodeS,               // Symbol\r
+               UnicodeC,               // Other\r
+\r
+               UnicodeLu,              // UppercaseLetter\r
+               UnicodeLl,              // LowercaseLetter\r
+               UnicodeLt,              // TitlecaseLetter\r
+               UnicodeLm,              // ModifierLetter\r
+               UnicodeLo,              // OtherLetter\r
+               UnicodeMn,              // NonspacingMark\r
+               UnicodeMe,              // EnclosingMark\r
+               UnicodeMc,              // SpacingMark\r
+               UnicodeNd,              // DecimalNumber\r
+               UnicodeNl,              // LetterNumber\r
+               UnicodeNo,              // OtherNumber\r
+               UnicodeZs,              // SpaceSeparator\r
+               UnicodeZl,              // LineSeparator\r
+               UnicodeZp,              // ParagraphSeparator\r
+               UnicodePd,              // DashPunctuation\r
+               UnicodePs,              // OpenPunctuation\r
+               UnicodePi,              // InitialPunctuation\r
+               UnicodePe,              // ClosePunctuation\r
+               UnicodePf,              // FinalPunctuation\r
+               UnicodePc,              // ConnectorPunctuation\r
+               UnicodePo,              // OtherPunctuation\r
+               UnicodeSm,              // MathSymbol\r
+               UnicodeSc,              // CurrencySymbol\r
+               UnicodeSk,              // ModifierSymbol\r
+               UnicodeSo,              // OtherSymbol\r
+               UnicodeCc,              // Control\r
+               UnicodeCf,              // Format\r
+               UnicodeCo,              // PrivateUse\r
+               UnicodeCs,              // Surrogate\r
+               UnicodeCn,              // Unassigned\r
+\r
+               // unicode block ranges\r
+\r
+               // notes: the categories marked with a star are valid unicode block ranges,\r
+               // but don't seem to be accepted by the MS parser using the /p{...} format.\r
+               // any ideas?\r
+\r
+               UnicodeBasicLatin,\r
+               UnicodeLatin1Supplement,                        // *\r
+               UnicodeLatinExtendedA,                          // *\r
+               UnicodeLatinExtendedB,                          // *\r
+               UnicodeIPAExtensions,\r
+               UnicodeSpacingModifierLetters,\r
+               UnicodeCombiningDiacriticalMarks,\r
+               UnicodeGreek,\r
+               UnicodeCyrillic,\r
+               UnicodeArmenian,\r
+               UnicodeHebrew,\r
+               UnicodeArabic,\r
+               UnicodeSyriac,\r
+               UnicodeThaana,\r
+               UnicodeDevanagari,\r
+               UnicodeBengali,\r
+               UnicodeGurmukhi,\r
+               UnicodeGujarati,\r
+               UnicodeOriya,\r
+               UnicodeTamil,\r
+               UnicodeTelugu,\r
+               UnicodeKannada,\r
+               UnicodeMalayalam,\r
+               UnicodeSinhala,\r
+               UnicodeThai,\r
+               UnicodeLao,\r
+               UnicodeTibetan,\r
+               UnicodeMyanmar,\r
+               UnicodeGeorgian,\r
+               UnicodeHangulJamo,\r
+               UnicodeEthiopic,\r
+               UnicodeCherokee,\r
+               UnicodeUnifiedCanadianAboriginalSyllabics,\r
+               UnicodeOgham,\r
+               UnicodeRunic,\r
+               UnicodeKhmer,\r
+               UnicodeMongolian,\r
+               UnicodeLatinExtendedAdditional,\r
+               UnicodeGreekExtended,\r
+               UnicodeGeneralPunctuation,\r
+               UnicodeSuperscriptsandSubscripts,\r
+               UnicodeCurrencySymbols,\r
+               UnicodeCombiningMarksforSymbols,\r
+               UnicodeLetterlikeSymbols,\r
+               UnicodeNumberForms,\r
+               UnicodeArrows,\r
+               UnicodeMathematicalOperators,\r
+               UnicodeMiscellaneousTechnical,\r
+               UnicodeControlPictures,\r
+               UnicodeOpticalCharacterRecognition,\r
+               UnicodeEnclosedAlphanumerics,\r
+               UnicodeBoxDrawing,\r
+               UnicodeBlockElements,\r
+               UnicodeGeometricShapes,\r
+               UnicodeMiscellaneousSymbols,\r
+               UnicodeDingbats,\r
+               UnicodeBraillePatterns,\r
+               UnicodeCJKRadicalsSupplement,\r
+               UnicodeKangxiRadicals,\r
+               UnicodeIdeographicDescriptionCharacters,\r
+               UnicodeCJKSymbolsandPunctuation,\r
+               UnicodeHiragana,\r
+               UnicodeKatakana,\r
+               UnicodeBopomofo,\r
+               UnicodeHangulCompatibilityJamo,\r
+               UnicodeKanbun,\r
+               UnicodeBopomofoExtended,\r
+               UnicodeEnclosedCJKLettersandMonths,\r
+               UnicodeCJKCompatibility,\r
+               UnicodeCJKUnifiedIdeographsExtensionA,\r
+               UnicodeCJKUnifiedIdeographs,\r
+               UnicodeYiSyllables,\r
+               UnicodeYiRadicals,\r
+               UnicodeHangulSyllables,\r
+               UnicodeHighSurrogates,\r
+               UnicodeHighPrivateUseSurrogates,\r
+               UnicodeLowSurrogates,\r
+               UnicodePrivateUse,\r
+               UnicodeCJKCompatibilityIdeographs,\r
+               UnicodeAlphabeticPresentationForms,\r
+               UnicodeArabicPresentationFormsA,                // *\r
+               UnicodeCombiningHalfMarks,\r
+               UnicodeCJKCompatibilityForms,\r
+               UnicodeSmallFormVariants,\r
+               UnicodeArabicPresentationFormsB,                // *\r
+               UnicodeSpecials,\r
+               UnicodeHalfwidthandFullwidthForms,\r
+               \r
+               UnicodeOldItalic,\r
+               UnicodeGothic,\r
+               UnicodeDeseret,\r
+               UnicodeByzantineMusicalSymbols,\r
+               UnicodeMusicalSymbols,\r
+               UnicodeMathematicalAlphanumericSymbols,\r
+               UnicodeCJKUnifiedIdeographsExtensionB,\r
+               UnicodeCJKCompatibilityIdeographsSupplement,\r
+               UnicodeTags\r
+       }\r
+\r
+       class CategoryUtils {\r
+               public static Category CategoryFromName (string name) {\r
+                       try {\r
+                               if (name.Substring (0, 2).Equals ("Is"))        // remove prefix from block range\r
+                                       name = name.Substring (2);\r
+\r
+                               return (Category)Enum.Parse (typeof (Category), "Unicode" + name);\r
+                       }\r
+                       catch (ArgumentException) {\r
+                               return Category.None;\r
+                       }\r
+               }\r
+       \r
+               public static bool IsCategory (Category cat, char c) {\r
+                       switch (cat) {\r
+                       case Category.None:\r
+                               return false;\r
+                       \r
+                       case Category.Any:\r
+                               return c != '\n';\r
+\r
+                       case Category.AnySingleline:\r
+                               return true;\r
+\r
+                       case Category.Word:\r
+                               return\r
+                                       Char.IsLetterOrDigit (c) ||\r
+                                       IsCategory (UnicodeCategory.ConnectorPunctuation, c);\r
+\r
+                       case Category.Digit:\r
+                               return Char.IsDigit (c);\r
+\r
+                       case Category.WhiteSpace:\r
+                               return Char.IsWhiteSpace (c);\r
+\r
+                       // ECMA categories\r
+\r
+                       case Category.EcmaAny:\r
+                               return c != '\n';\r
+                               \r
+                       case Category.EcmaAnySingleline:\r
+                               return true;\r
+\r
+                       case Category.EcmaWord:\r
+                               return\r
+                                       'a' <= c && c <= 'z' ||\r
+                                       'A' <= c && c <= 'Z' ||\r
+                                       '0' <= c && c <= '9' ||\r
+                                       '_' == c;\r
+\r
+                       case Category.EcmaDigit:\r
+                               return\r
+                                       '0' <= c && c <= 9;\r
+                       \r
+                       case Category.EcmaWhiteSpace:\r
+                               return\r
+                                       c == ' '  ||\r
+                                       c == '\f' ||\r
+                                       c == '\n' ||\r
+                                       c == '\r' ||\r
+                                       c == '\t' ||\r
+                                       c == '\v';\r
+\r
+                       // Unicode categories...\r
+\r
+                       // letter\r
+                       \r
+                       case Category.UnicodeLu: return IsCategory (UnicodeCategory.UppercaseLetter, c);\r
+                       case Category.UnicodeLl: return IsCategory (UnicodeCategory.LowercaseLetter, c);\r
+                       case Category.UnicodeLt: return IsCategory (UnicodeCategory.TitlecaseLetter, c);\r
+                       case Category.UnicodeLm: return IsCategory (UnicodeCategory.ModifierLetter, c);\r
+                       case Category.UnicodeLo: return IsCategory (UnicodeCategory.OtherLetter, c);\r
+\r
+                       // mark\r
+\r
+                       case Category.UnicodeMn: return IsCategory (UnicodeCategory.NonSpacingMark, c);\r
+                       case Category.UnicodeMe: return IsCategory (UnicodeCategory.EnclosingMark, c);\r
+                       case Category.UnicodeMc: return IsCategory (UnicodeCategory.SpacingCombiningMark, c);\r
+                       case Category.UnicodeNd: return IsCategory (UnicodeCategory.DecimalDigitNumber, c);\r
+\r
+                       // number\r
+\r
+                       case Category.UnicodeNl: return IsCategory (UnicodeCategory.LetterNumber, c);\r
+                       case Category.UnicodeNo: return IsCategory (UnicodeCategory.OtherNumber, c);\r
+\r
+                       // separator\r
+\r
+                       case Category.UnicodeZs: return IsCategory (UnicodeCategory.SpaceSeparator, c);\r
+                       case Category.UnicodeZl: return IsCategory (UnicodeCategory.LineSeparator, c);\r
+                       case Category.UnicodeZp: return IsCategory (UnicodeCategory.ParagraphSeparator, c);\r
+\r
+                       // punctuation\r
+\r
+                       case Category.UnicodePd: return IsCategory (UnicodeCategory.DashPunctuation, c);\r
+                       case Category.UnicodePs: return IsCategory (UnicodeCategory.OpenPunctuation, c);\r
+                       case Category.UnicodePi: return IsCategory (UnicodeCategory.InitialQuotePunctuation, c);\r
+                       case Category.UnicodePe: return IsCategory (UnicodeCategory.ClosePunctuation, c);\r
+                       case Category.UnicodePf: return IsCategory (UnicodeCategory.FinalQuotePunctuation, c);\r
+                       case Category.UnicodePc: return IsCategory (UnicodeCategory.ConnectorPunctuation, c);\r
+                       case Category.UnicodePo: return IsCategory (UnicodeCategory.OtherPunctuation, c);\r
+\r
+                       // symbol\r
+\r
+                       case Category.UnicodeSm: return IsCategory (UnicodeCategory.MathSymbol, c);\r
+                       case Category.UnicodeSc: return IsCategory (UnicodeCategory.CurrencySymbol, c);\r
+                       case Category.UnicodeSk: return IsCategory (UnicodeCategory.ModifierSymbol, c);\r
+                       case Category.UnicodeSo: return IsCategory (UnicodeCategory.OtherSymbol, c);\r
+\r
+                       // other\r
+\r
+                       case Category.UnicodeCc: return IsCategory (UnicodeCategory.Control, c);\r
+                       case Category.UnicodeCf: return IsCategory (UnicodeCategory.Format, c);\r
+                       case Category.UnicodeCo: return IsCategory (UnicodeCategory.PrivateUse, c);\r
+                       case Category.UnicodeCs: return IsCategory (UnicodeCategory.Surrogate, c);\r
+                       case Category.UnicodeCn: return IsCategory (UnicodeCategory.OtherNotAssigned, c); \r
+\r
+                       case Category.UnicodeL: // letter\r
+                               return\r
+                                       IsCategory (UnicodeCategory.UppercaseLetter, c) ||\r
+                                       IsCategory (UnicodeCategory.LowercaseLetter, c) ||\r
+                                       IsCategory (UnicodeCategory.TitlecaseLetter, c) ||\r
+                                       IsCategory (UnicodeCategory.ModifierLetter, c) ||\r
+                                       IsCategory (UnicodeCategory.OtherLetter, c);\r
+                       \r
+                       case Category.UnicodeM: // mark\r
+                               return\r
+                                       IsCategory (UnicodeCategory.NonSpacingMark, c) ||\r
+                                       IsCategory (UnicodeCategory.EnclosingMark, c) ||\r
+                                       IsCategory (UnicodeCategory.SpacingCombiningMark, c);\r
+\r
+                       case Category.UnicodeN: // number\r
+                               return\r
+                                       IsCategory (UnicodeCategory.DecimalDigitNumber, c) ||\r
+                                       IsCategory (UnicodeCategory.LetterNumber, c) ||\r
+                                       IsCategory (UnicodeCategory.OtherNumber, c);\r
+\r
+                       case Category.UnicodeZ: // separator\r
+                               return\r
+                                       IsCategory (UnicodeCategory.SpaceSeparator, c) ||\r
+                                       IsCategory (UnicodeCategory.LineSeparator, c) ||\r
+                                       IsCategory (UnicodeCategory.ParagraphSeparator, c);\r
+                                       \r
+                       case Category.UnicodeP: // punctuation\r
+                               return\r
+                                       IsCategory (UnicodeCategory.DashPunctuation, c) ||\r
+                                       IsCategory (UnicodeCategory.OpenPunctuation, c) ||\r
+                                       IsCategory (UnicodeCategory.InitialQuotePunctuation, c) ||\r
+                                       IsCategory (UnicodeCategory.ClosePunctuation, c) ||\r
+                                       IsCategory (UnicodeCategory.FinalQuotePunctuation, c) ||\r
+                                       IsCategory (UnicodeCategory.ConnectorPunctuation, c) ||\r
+                                       IsCategory (UnicodeCategory.OtherPunctuation, c);\r
+                       \r
+                       case Category.UnicodeS: // symbol\r
+                               return\r
+                                       IsCategory (UnicodeCategory.MathSymbol, c) ||\r
+                                       IsCategory (UnicodeCategory.CurrencySymbol, c) ||\r
+                                       IsCategory (UnicodeCategory.ModifierSymbol, c) ||\r
+                                       IsCategory (UnicodeCategory.OtherSymbol, c);\r
+                       \r
+                       case Category.UnicodeC: // other\r
+                               return\r
+                                       IsCategory (UnicodeCategory.Control, c) ||\r
+                                       IsCategory (UnicodeCategory.Format, c) ||\r
+                                       IsCategory (UnicodeCategory.PrivateUse, c) ||\r
+                                       IsCategory (UnicodeCategory.Surrogate, c) ||\r
+                                       IsCategory (UnicodeCategory.OtherNotAssigned, c);\r
+\r
+                       // Unicode block ranges...\r
+\r
+                       case Category.UnicodeBasicLatin:\r
+                               return '\u0000' <= c && c <= '\u007F';\r
+\r
+                       case Category.UnicodeLatin1Supplement:\r
+                               return '\u0080' <= c && c <= '\u00FF';\r
+\r
+                       case Category.UnicodeLatinExtendedA:\r
+                               return '\u0100' <= c && c <= '\u017F';\r
+\r
+                       case Category.UnicodeLatinExtendedB:\r
+                               return '\u0180' <= c && c <= '\u024F';\r
+\r
+                       case Category.UnicodeIPAExtensions:\r
+                               return '\u0250' <= c && c <= '\u02AF';\r
+\r
+                       case Category.UnicodeSpacingModifierLetters:\r
+                               return '\u02B0' <= c && c <= '\u02FF';\r
+\r
+                       case Category.UnicodeCombiningDiacriticalMarks:\r
+                               return '\u0300' <= c && c <= '\u036F';\r
+\r
+                       case Category.UnicodeGreek:\r
+                               return '\u0370' <= c && c <= '\u03FF';\r
+\r
+                       case Category.UnicodeCyrillic:\r
+                               return '\u0400' <= c && c <= '\u04FF';\r
+\r
+                       case Category.UnicodeArmenian:\r
+                               return '\u0530' <= c && c <= '\u058F';\r
+\r
+                       case Category.UnicodeHebrew:\r
+                               return '\u0590' <= c && c <= '\u05FF';\r
+\r
+                       case Category.UnicodeArabic:\r
+                               return '\u0600' <= c && c <= '\u06FF';\r
+\r
+                       case Category.UnicodeSyriac:\r
+                               return '\u0700' <= c && c <= '\u074F';\r
+\r
+                       case Category.UnicodeThaana:\r
+                               return '\u0780' <= c && c <= '\u07BF';\r
+\r
+                       case Category.UnicodeDevanagari:\r
+                               return '\u0900' <= c && c <= '\u097F';\r
+\r
+                       case Category.UnicodeBengali:\r
+                               return '\u0980' <= c && c <= '\u09FF';\r
+\r
+                       case Category.UnicodeGurmukhi:\r
+                               return '\u0A00' <= c && c <= '\u0A7F';\r
+\r
+                       case Category.UnicodeGujarati:\r
+                               return '\u0A80' <= c && c <= '\u0AFF';\r
+\r
+                       case Category.UnicodeOriya:\r
+                               return '\u0B00' <= c && c <= '\u0B7F';\r
+\r
+                       case Category.UnicodeTamil:\r
+                               return '\u0B80' <= c && c <= '\u0BFF';\r
+\r
+                       case Category.UnicodeTelugu:\r
+                               return '\u0C00' <= c && c <= '\u0C7F';\r
+\r
+                       case Category.UnicodeKannada:\r
+                               return '\u0C80' <= c && c <= '\u0CFF';\r
+\r
+                       case Category.UnicodeMalayalam:\r
+                               return '\u0D00' <= c && c <= '\u0D7F';\r
+\r
+                       case Category.UnicodeSinhala:\r
+                               return '\u0D80' <= c && c <= '\u0DFF';\r
+\r
+                       case Category.UnicodeThai:\r
+                               return '\u0E00' <= c && c <= '\u0E7F';\r
+\r
+                       case Category.UnicodeLao:\r
+                               return '\u0E80' <= c && c <= '\u0EFF';\r
+\r
+                       case Category.UnicodeTibetan:\r
+                               return '\u0F00' <= c && c <= '\u0FFF';\r
+\r
+                       case Category.UnicodeMyanmar:\r
+                               return '\u1000' <= c && c <= '\u109F';\r
+\r
+                       case Category.UnicodeGeorgian:\r
+                               return '\u10A0' <= c && c <= '\u10FF';\r
+\r
+                       case Category.UnicodeHangulJamo:\r
+                               return '\u1100' <= c && c <= '\u11FF';\r
+\r
+                       case Category.UnicodeEthiopic:\r
+                               return '\u1200' <= c && c <= '\u137F';\r
+\r
+                       case Category.UnicodeCherokee:\r
+                               return '\u13A0' <= c && c <= '\u13FF';\r
+\r
+                       case Category.UnicodeUnifiedCanadianAboriginalSyllabics:\r
+                               return '\u1400' <= c && c <= '\u167F';\r
+\r
+                       case Category.UnicodeOgham:\r
+                               return '\u1680' <= c && c <= '\u169F';\r
+\r
+                       case Category.UnicodeRunic:\r
+                               return '\u16A0' <= c && c <= '\u16FF';\r
+\r
+                       case Category.UnicodeKhmer:\r
+                               return '\u1780' <= c && c <= '\u17FF';\r
+\r
+                       case Category.UnicodeMongolian:\r
+                               return '\u1800' <= c && c <= '\u18AF';\r
+\r
+                       case Category.UnicodeLatinExtendedAdditional:\r
+                               return '\u1E00' <= c && c <= '\u1EFF';\r
+\r
+                       case Category.UnicodeGreekExtended:\r
+                               return '\u1F00' <= c && c <= '\u1FFF';\r
+\r
+                       case Category.UnicodeGeneralPunctuation:\r
+                               return '\u2000' <= c && c <= '\u206F';\r
+\r
+                       case Category.UnicodeSuperscriptsandSubscripts:\r
+                               return '\u2070' <= c && c <= '\u209F';\r
+\r
+                       case Category.UnicodeCurrencySymbols:\r
+                               return '\u20A0' <= c && c <= '\u20CF';\r
+\r
+                       case Category.UnicodeCombiningMarksforSymbols:\r
+                               return '\u20D0' <= c && c <= '\u20FF';\r
+\r
+                       case Category.UnicodeLetterlikeSymbols:\r
+                               return '\u2100' <= c && c <= '\u214F';\r
+\r
+                       case Category.UnicodeNumberForms:\r
+                               return '\u2150' <= c && c <= '\u218F';\r
+\r
+                       case Category.UnicodeArrows:\r
+                               return '\u2190' <= c && c <= '\u21FF';\r
+\r
+                       case Category.UnicodeMathematicalOperators:\r
+                               return '\u2200' <= c && c <= '\u22FF';\r
+\r
+                       case Category.UnicodeMiscellaneousTechnical:\r
+                               return '\u2300' <= c && c <= '\u23FF';\r
+\r
+                       case Category.UnicodeControlPictures:\r
+                               return '\u2400' <= c && c <= '\u243F';\r
+\r
+                       case Category.UnicodeOpticalCharacterRecognition:\r
+                               return '\u2440' <= c && c <= '\u245F';\r
+\r
+                       case Category.UnicodeEnclosedAlphanumerics:\r
+                               return '\u2460' <= c && c <= '\u24FF';\r
+\r
+                       case Category.UnicodeBoxDrawing:\r
+                               return '\u2500' <= c && c <= '\u257F';\r
+\r
+                       case Category.UnicodeBlockElements:\r
+                               return '\u2580' <= c && c <= '\u259F';\r
+\r
+                       case Category.UnicodeGeometricShapes:\r
+                               return '\u25A0' <= c && c <= '\u25FF';\r
+\r
+                       case Category.UnicodeMiscellaneousSymbols:\r
+                               return '\u2600' <= c && c <= '\u26FF';\r
+\r
+                       case Category.UnicodeDingbats:\r
+                               return '\u2700' <= c && c <= '\u27BF';\r
+\r
+                       case Category.UnicodeBraillePatterns:\r
+                               return '\u2800' <= c && c <= '\u28FF';\r
+\r
+                       case Category.UnicodeCJKRadicalsSupplement:\r
+                               return '\u2E80' <= c && c <= '\u2EFF';\r
+\r
+                       case Category.UnicodeKangxiRadicals:\r
+                               return '\u2F00' <= c && c <= '\u2FDF';\r
+\r
+                       case Category.UnicodeIdeographicDescriptionCharacters:\r
+                               return '\u2FF0' <= c && c <= '\u2FFF';\r
+\r
+                       case Category.UnicodeCJKSymbolsandPunctuation:\r
+                               return '\u3000' <= c && c <= '\u303F';\r
+\r
+                       case Category.UnicodeHiragana:\r
+                               return '\u3040' <= c && c <= '\u309F';\r
+\r
+                       case Category.UnicodeKatakana:\r
+                               return '\u30A0' <= c && c <= '\u30FF';\r
+\r
+                       case Category.UnicodeBopomofo:\r
+                               return '\u3100' <= c && c <= '\u312F';\r
+\r
+                       case Category.UnicodeHangulCompatibilityJamo:\r
+                               return '\u3130' <= c && c <= '\u318F';\r
+\r
+                       case Category.UnicodeKanbun:\r
+                               return '\u3190' <= c && c <= '\u319F';\r
+\r
+                       case Category.UnicodeBopomofoExtended:\r
+                               return '\u31A0' <= c && c <= '\u31BF';\r
+\r
+                       case Category.UnicodeEnclosedCJKLettersandMonths:\r
+                               return '\u3200' <= c && c <= '\u32FF';\r
+\r
+                       case Category.UnicodeCJKCompatibility:\r
+                               return '\u3300' <= c && c <= '\u33FF';\r
+\r
+                       case Category.UnicodeCJKUnifiedIdeographsExtensionA:\r
+                               return '\u3400' <= c && c <= '\u4DB5';\r
+\r
+                       case Category.UnicodeCJKUnifiedIdeographs:\r
+                               return '\u4E00' <= c && c <= '\u9FFF';\r
+\r
+                       case Category.UnicodeYiSyllables:\r
+                               return '\uA000' <= c && c <= '\uA48F';\r
+\r
+                       case Category.UnicodeYiRadicals:\r
+                               return '\uA490' <= c && c <= '\uA4CF';\r
+\r
+                       case Category.UnicodeHangulSyllables:\r
+                               return '\uAC00' <= c && c <= '\uD7A3';\r
+\r
+                       case Category.UnicodeHighSurrogates:\r
+                               return '\uD800' <= c && c <= '\uDB7F';\r
+\r
+                       case Category.UnicodeHighPrivateUseSurrogates:\r
+                               return '\uDB80' <= c && c <= '\uDBFF';\r
+\r
+                       case Category.UnicodeLowSurrogates:\r
+                               return '\uDC00' <= c && c <= '\uDFFF';\r
+\r
+                       case Category.UnicodePrivateUse:\r
+                               return '\uE000' <= c && c <= '\uF8FF';\r
+\r
+                       case Category.UnicodeCJKCompatibilityIdeographs:\r
+                               return '\uF900' <= c && c <= '\uFAFF';\r
+\r
+                       case Category.UnicodeAlphabeticPresentationForms:\r
+                               return '\uFB00' <= c && c <= '\uFB4F';\r
+\r
+                       case Category.UnicodeArabicPresentationFormsA:\r
+                               return '\uFB50' <= c && c <= '\uFDFF';\r
+\r
+                       case Category.UnicodeCombiningHalfMarks:\r
+                               return '\uFE20' <= c && c <= '\uFE2F';\r
+\r
+                       case Category.UnicodeCJKCompatibilityForms:\r
+                               return '\uFE30' <= c && c <= '\uFE4F';\r
+\r
+                       case Category.UnicodeSmallFormVariants:\r
+                               return '\uFE50' <= c && c <= '\uFE6F';\r
+\r
+                       case Category.UnicodeArabicPresentationFormsB:\r
+                               return '\uFE70' <= c && c <= '\uFEFE';\r
+\r
+                       case Category.UnicodeHalfwidthandFullwidthForms:\r
+                               return '\uFF00' <= c && c <= '\uFFEF';\r
+\r
+                       case Category.UnicodeSpecials:\r
+                               return\r
+                                       '\uFEFF' <= c && c <= '\uFEFF' ||\r
+                                       '\uFFF0' <= c && c <= '\uFFFD';\r
+\r
+                       // these block ranges begin above 0x10000\r
+\r
+                       case Category.UnicodeOldItalic:\r
+                       case Category.UnicodeGothic:\r
+                       case Category.UnicodeDeseret:\r
+                       case Category.UnicodeByzantineMusicalSymbols:\r
+                       case Category.UnicodeMusicalSymbols:\r
+                       case Category.UnicodeMathematicalAlphanumericSymbols:\r
+                       case Category.UnicodeCJKUnifiedIdeographsExtensionB:\r
+                       case Category.UnicodeCJKCompatibilityIdeographsSupplement:\r
+                       case Category.UnicodeTags:\r
+                               return false;\r
+\r
+                       default:\r
+                               return false;\r
+                       }\r
+               }\r
+\r
+               private static bool IsCategory (UnicodeCategory uc, char c) {\r
+                       if (Char.GetUnicodeCategory (c) == uc)\r
+                               return true;\r
+\r
+                       return false;\r
+               }\r
+       }\r
+}\r
diff --git a/mcs/class/System/System.Text.RegularExpressions/collections.cs b/mcs/class/System/System.Text.RegularExpressions/collections.cs
new file mode 100644 (file)
index 0000000..11db0ea
--- /dev/null
@@ -0,0 +1,124 @@
+//\r
+// assembly:   System\r
+// namespace:  System.Text.RegularExpressions\r
+// file:       collections.cs\r
+//\r
+// author:     Dan Lewis (dlewis@gmx.co.uk)\r
+//             (c) 2002\r
+\r
+using System;\r
+using System.Collections;\r
+\r
+namespace System.Text.RegularExpressions {\r
+       public abstract class RegexCollectionBase : ICollection, IEnumerable {\r
+               public int Count {\r
+                       get { return list.Count; }\r
+               }\r
+\r
+               public bool IsReadOnly {\r
+                       get { return true; }    // FIXME\r
+               }\r
+\r
+               public bool IsSynchronized {\r
+                       get { return false; }   // FIXME\r
+               }\r
+\r
+               public object SyncRoot {\r
+                       get { return list; }    // FIXME\r
+               }\r
+\r
+               public void CopyTo (Array array, int index) {\r
+                       foreach (Object o in list) {\r
+                               if (index > array.Length)\r
+                                       break;\r
+                               \r
+                               array.SetValue (o, index ++);\r
+                       }\r
+               }\r
+\r
+               public IEnumerator GetEnumerator () {\r
+                       return new Enumerator (list);\r
+               }\r
+\r
+               // internal methods\r
+\r
+               internal RegexCollectionBase () {\r
+                       list = new ArrayList ();\r
+               }\r
+\r
+               internal void Add (Object o) {\r
+                       list.Add (o);\r
+               }\r
+\r
+               // IEnumerator implementation\r
+\r
+               private class Enumerator : IEnumerator {\r
+                       public Enumerator (IList list) {\r
+                               this.list = list;\r
+                               Reset ();\r
+                       }\r
+\r
+                       public object Current {\r
+                               get {\r
+                                       if (ptr >= list.Count)\r
+                                               throw new InvalidOperationException ();\r
+\r
+                                       return list[ptr];\r
+                               }\r
+                       }\r
+\r
+                       public bool MoveNext () {\r
+                               if (ptr > list.Count)\r
+                                       throw new InvalidOperationException ();\r
+                               \r
+                               return ++ ptr < list.Count;\r
+                       }\r
+\r
+                       public void Reset () {\r
+                               ptr = -1;\r
+                       }\r
+\r
+                       private IList list;\r
+                       private int ptr;\r
+               }\r
+\r
+               // protected fields\r
+\r
+               protected ArrayList list;\r
+       }\r
+\r
+       public class CaptureCollection : RegexCollectionBase, ICollection, IEnumerable {\r
+               public Capture this[int i] {\r
+                       get { return (Capture)list[i]; }\r
+               }\r
+\r
+               internal CaptureCollection (Capture cap) {\r
+                       while (cap != null) {\r
+                               if (cap.IsDefined)\r
+                                       Add (cap);\r
+\r
+                               cap = cap.Previous;\r
+                       }\r
+                       \r
+                       list.Reverse ();\r
+               }\r
+       }\r
+\r
+       public class GroupCollection : RegexCollectionBase, ICollection, IEnumerable {\r
+               public Group this[int i] {\r
+                       get { return (Group)list[i]; }\r
+               }\r
+               \r
+               internal GroupCollection () {\r
+               }\r
+       }\r
+\r
+       public class MatchCollection : RegexCollectionBase, ICollection, IEnumerable {\r
+               public Match this[int i] {\r
+                       get { return (Match)list[i]; }\r
+               }\r
+\r
+               internal MatchCollection () {\r
+               }\r
+       }\r
+}\r
diff --git a/mcs/class/System/System.Text.RegularExpressions/compiler.cs b/mcs/class/System/System.Text.RegularExpressions/compiler.cs
new file mode 100644 (file)
index 0000000..82fb8cf
--- /dev/null
@@ -0,0 +1,368 @@
+//\r
+// assembly:   System\r
+// namespace:  System.Text.RegularExpressions\r
+// file:       compiler.cs\r
+//\r
+// author:     Dan Lewis (dlewis@gmx.co.uk)\r
+//             (c) 2002\r
+\r
+using System;\r
+using System.Collections;\r
+\r
+namespace System.Text.RegularExpressions {\r
+       abstract class LinkRef {\r
+               // empty\r
+       }\r
+               \r
+       interface ICompiler {\r
+               void Reset ();\r
+               IMachineFactory GetMachineFactory ();\r
+\r
+               // instruction emission\r
+\r
+               void EmitFalse ();\r
+               void EmitTrue ();\r
+\r
+               // character matching\r
+\r
+               void EmitCharacter (char c, bool negate, bool ignore, bool reverse);\r
+               void EmitCategory (Category cat, bool negate, bool reverse);\r
+               void EmitRange (char lo, char hi, bool negate, bool ignore, bool reverse);\r
+               void EmitSet (char lo, BitArray set, bool negate, bool ignore, bool reverse);\r
+\r
+               // other operators\r
+\r
+               void EmitString (string str, bool ignore, bool reverse);\r
+               void EmitPosition (Position pos);\r
+               void EmitOpen (int gid);\r
+               void EmitClose (int gid);\r
+               void EmitBalance (int gid, int balance);\r
+               void EmitReference (int gid, bool ignore, bool reverse);\r
+\r
+               // constructs\r
+\r
+               void EmitIfDefined (int gid, LinkRef tail);\r
+               void EmitSub (LinkRef tail);\r
+               void EmitTest (LinkRef yes, LinkRef tail);\r
+               void EmitBranch (LinkRef next);\r
+               void EmitJump (LinkRef target);\r
+               void EmitRepeat (int min, int max, bool lazy, LinkRef until);\r
+               void EmitUntil (LinkRef repeat);\r
+               void EmitIn (LinkRef tail);\r
+               void EmitInfo (int count, int min, int max);\r
+               void EmitFastRepeat (int min, int max, bool lazy, LinkRef tail);\r
+               void EmitAnchor (int offset, LinkRef tail);\r
+\r
+               LinkRef NewLink ();\r
+               void ResolveLink (LinkRef link);\r
+       }\r
+\r
+       class InterpreterFactory : IMachineFactory {\r
+               public InterpreterFactory (ushort[] pattern) {\r
+                       this.pattern = pattern;\r
+               }\r
+               \r
+               public IMachine NewInstance () {\r
+                       return new Interpreter (pattern);\r
+               }\r
+\r
+               private ushort[] pattern;\r
+       }\r
+\r
+       class PatternCompiler : ICompiler {\r
+               public static ushort EncodeOp (OpCode op, OpFlags flags) {\r
+                       return (ushort)((int)op | ((int)flags & 0xff00));\r
+               }\r
+\r
+               public static void DecodeOp (ushort word, out OpCode op, out OpFlags flags) {\r
+                       op = (OpCode)(word & 0x00ff);\r
+                       flags = (OpFlags)(word & 0xff00);\r
+               }\r
+\r
+               public PatternCompiler () {\r
+                       pgm = new ArrayList ();\r
+               }\r
+\r
+               // ICompiler implementation\r
+\r
+               public void Reset () {\r
+                       pgm.Clear ();\r
+               }\r
+\r
+               public IMachineFactory GetMachineFactory () {\r
+                       ushort[] image = new ushort[pgm.Count];\r
+                       pgm.CopyTo (image);\r
+\r
+                       return new InterpreterFactory (image);\r
+               }\r
+\r
+               public void EmitFalse () {\r
+                       Emit (OpCode.False);\r
+               }\r
+\r
+               public void EmitTrue () {\r
+                       Emit (OpCode.True);\r
+               }\r
+\r
+               public void EmitCharacter (char c, bool negate, bool ignore, bool reverse) {\r
+                       Emit (OpCode.Character, MakeFlags (negate, ignore, reverse, false));\r
+\r
+                       if (ignore)\r
+                               c = Char.ToLower (c);\r
+\r
+                       Emit ((ushort)c);\r
+               }\r
+\r
+               public void EmitCategory (Category cat, bool negate, bool reverse) {\r
+                       Emit (OpCode.Category, MakeFlags (negate, false, reverse, false));\r
+                       Emit ((ushort)cat);\r
+               }\r
+\r
+               public void EmitRange (char lo, char hi, bool negate, bool ignore, bool reverse) {\r
+                       Emit (OpCode.Range, MakeFlags (negate, ignore, reverse, false));\r
+                       Emit ((ushort)lo);\r
+                       Emit ((ushort)hi);\r
+               }\r
+\r
+               public void EmitSet (char lo, BitArray set, bool negate, bool ignore, bool reverse) {\r
+                       Emit (OpCode.Set, MakeFlags (negate, ignore, reverse, false));\r
+                       Emit ((ushort)lo);\r
+\r
+                       int len = (set.Length + 0xf) >> 4;\r
+                       Emit ((ushort)len);\r
+\r
+                       int b = 0;\r
+                       while (len -- != 0) {\r
+                               ushort word = 0;\r
+                               for (int i = 0; i < 16; ++ i) {\r
+                                       if (b >= set.Length)\r
+                                               break;\r
+                               \r
+                                       if (set[b ++])\r
+                                               word |= (ushort)(1 << i);\r
+                               }\r
+\r
+                               Emit (word);\r
+                       }\r
+               }\r
+\r
+               public void EmitString (string str, bool ignore, bool reverse) {\r
+                       Emit (OpCode.String, MakeFlags (false, ignore, reverse, false));\r
+                       int len = str.Length;\r
+                       Emit ((ushort)len);\r
+\r
+                       if (ignore)\r
+                               str = str.ToLower ();\r
+                       \r
+                       for (int i = 0; i < len; ++ i)\r
+                               Emit ((ushort)str[i]);\r
+               }\r
+\r
+               public void EmitPosition (Position pos) {\r
+                       Emit (OpCode.Position, 0);\r
+                       Emit ((ushort)pos);\r
+               }\r
+\r
+               public void EmitOpen (int gid) {\r
+                       Emit (OpCode.Open);\r
+                       Emit ((ushort)gid);\r
+               }\r
+\r
+               public void EmitClose (int gid) {\r
+                       Emit (OpCode.Close);\r
+                       Emit ((ushort)gid);\r
+               }\r
+\r
+               public void EmitBalance (int gid, int balance) {\r
+                       Emit (OpCode.Balance);\r
+                       Emit ((ushort)gid);\r
+                       Emit ((ushort)balance);\r
+               }\r
+\r
+               public void EmitReference (int gid, bool ignore, bool reverse) {\r
+                       Emit (OpCode.Reference, MakeFlags (false, ignore, reverse, false));\r
+                       Emit ((ushort)gid);\r
+               }\r
+\r
+               public void EmitIfDefined (int gid, LinkRef tail) {\r
+                       BeginLink (tail);\r
+                       Emit (OpCode.IfDefined);\r
+                       EmitLink (tail);\r
+                       Emit ((ushort)gid);\r
+               }\r
+\r
+               public void EmitSub (LinkRef tail) {\r
+                       BeginLink (tail);\r
+                       Emit (OpCode.Sub);\r
+                       EmitLink (tail);\r
+               }\r
+\r
+               public void EmitTest (LinkRef yes, LinkRef tail) {\r
+                       BeginLink (yes);\r
+                       BeginLink (tail);\r
+                       Emit (OpCode.Test);\r
+                       EmitLink (yes);\r
+                       EmitLink (tail);\r
+               }\r
+\r
+               public void EmitBranch (LinkRef next) {\r
+                       BeginLink (next);\r
+                       Emit (OpCode.Branch, 0);\r
+                       EmitLink (next);\r
+               }\r
+\r
+               public void EmitJump (LinkRef target) {\r
+                       BeginLink (target);\r
+                       Emit (OpCode.Jump, 0);\r
+                       EmitLink (target);\r
+               }\r
+\r
+               public void EmitRepeat (int min, int max, bool lazy, LinkRef until) {\r
+                       BeginLink (until);\r
+                       Emit (OpCode.Repeat, MakeFlags (false, false, false, lazy));\r
+                       EmitLink (until);\r
+                       Emit ((ushort)min);\r
+                       Emit ((ushort)max);\r
+               }\r
+\r
+               public void EmitUntil (LinkRef repeat) {\r
+                       ResolveLink (repeat);\r
+                       Emit (OpCode.Until);\r
+               }\r
+\r
+               public void EmitFastRepeat (int min, int max, bool lazy, LinkRef tail) {\r
+                       BeginLink (tail);\r
+                       Emit (OpCode.FastRepeat, MakeFlags (false, false, false, lazy));\r
+                       EmitLink (tail);\r
+                       Emit ((ushort)min);\r
+                       Emit ((ushort)max);\r
+               }\r
+\r
+               public void EmitIn (LinkRef tail) {\r
+                       BeginLink (tail);\r
+                       Emit (OpCode.In);\r
+                       EmitLink (tail);\r
+               }\r
+\r
+               public void EmitAnchor (int offset, LinkRef tail) {\r
+                       BeginLink (tail);\r
+                       Emit (OpCode.Anchor);\r
+                       EmitLink (tail);\r
+                       Emit ((ushort)offset);\r
+               }\r
+\r
+               public void EmitInfo (int count, int min, int max) {\r
+                       Emit (OpCode.Info);\r
+                       Emit ((ushort)count);\r
+                       Emit ((ushort)min);\r
+                       Emit ((ushort)max);\r
+               }\r
+\r
+               public LinkRef NewLink () {\r
+                       return new PatternLinkStack ();\r
+               }\r
+               \r
+               public void ResolveLink (LinkRef lref) {\r
+                       PatternLinkStack stack = (PatternLinkStack)lref;\r
+               \r
+                       while (stack.Pop ())\r
+                               pgm[stack.OffsetAddress] = (ushort)stack.GetOffset (CurrentAddress);\r
+               }\r
+\r
+               // private members\r
+\r
+               private static OpFlags MakeFlags (bool negate, bool ignore, bool reverse, bool lazy) {\r
+                       OpFlags flags = 0;\r
+                       if (negate) flags |= OpFlags.Negate;\r
+                       if (ignore) flags |= OpFlags.IgnoreCase;\r
+                       if (reverse) flags |= OpFlags.RightToLeft;\r
+                       if (lazy) flags |= OpFlags.Lazy;\r
+\r
+                       return flags;\r
+               }\r
+               \r
+               private void Emit (OpCode op) {\r
+                       Emit (op, (OpFlags)0);\r
+               }\r
+\r
+               private void Emit (OpCode op, OpFlags flags) {\r
+                       Emit (EncodeOp (op, flags));\r
+               }\r
+\r
+               private void Emit (ushort word) {\r
+                       pgm.Add (word);\r
+               }\r
+\r
+               private int CurrentAddress {\r
+                       get { return pgm.Count; }\r
+               }\r
+\r
+               private void BeginLink (LinkRef lref) {\r
+                       PatternLinkStack stack = (PatternLinkStack)lref;\r
+                       stack.BaseAddress = CurrentAddress;\r
+               }\r
+\r
+               private void EmitLink (LinkRef lref) {\r
+                       PatternLinkStack stack = (PatternLinkStack)lref;\r
+                       stack.OffsetAddress = CurrentAddress;\r
+                       Emit ((ushort)0);       // placeholder\r
+                       stack.Push ();\r
+               }\r
+\r
+               private class PatternLinkStack : LinkStack {\r
+                       public PatternLinkStack () {\r
+                       }\r
+               \r
+                       public int BaseAddress {\r
+                               set { link.base_addr = value; }\r
+                       }\r
+\r
+                       public int OffsetAddress {\r
+                               get { return link.offset_addr; }\r
+                               set { link.offset_addr = value; }\r
+                       }\r
+\r
+                       public int GetOffset (int target_addr) {\r
+                               return target_addr - link.base_addr;\r
+                       }\r
+\r
+                       // LinkStack implementation\r
+\r
+                       protected override object GetCurrent () { return link; }\r
+                       protected override void SetCurrent (object l) { link = (Link)l; }\r
+\r
+                       private struct Link {\r
+                               public int base_addr;\r
+                               public int offset_addr;\r
+                       }\r
+\r
+                       Link link;\r
+               }\r
+\r
+               private ArrayList pgm;\r
+       }\r
+\r
+       abstract class LinkStack : LinkRef {\r
+               public LinkStack () {\r
+                       stack = new Stack ();\r
+               }\r
+\r
+               public void Push () {\r
+                       stack.Push (GetCurrent ());\r
+               }\r
+\r
+               public bool Pop () {\r
+                       if (stack.Count > 0) {\r
+                               SetCurrent (stack.Pop ());\r
+                               return true;\r
+                       }\r
+\r
+                       return false;\r
+               }\r
+\r
+               protected abstract object GetCurrent ();\r
+               protected abstract void SetCurrent (object l);\r
+\r
+               private Stack stack;\r
+       }\r
+}\r
diff --git a/mcs/class/System/System.Text.RegularExpressions/debug.cs b/mcs/class/System/System.Text.RegularExpressions/debug.cs
new file mode 100644 (file)
index 0000000..fdc3ab8
--- /dev/null
@@ -0,0 +1,208 @@
+//\r
+// assembly:   System\r
+// namespace:  System.Text.RegularExpressions\r
+// file:       debug.cs\r
+//\r
+// author:     Dan Lewis (dlewis@gmx.co.uk)\r
+//             (c) 2002\r
+\r
+using System;\r
+using System.Collections;\r
+\r
+namespace System.Text.RegularExpressions {\r
+\r
+       class Disassembler {\r
+               public static void DisassemblePattern (ushort[] image) {\r
+                       DisassembleBlock (image, 0, 0);\r
+               }\r
+       \r
+               public static void DisassembleBlock (ushort[] image, int pc, int depth) {\r
+                       OpCode op;\r
+                       OpFlags flags;\r
+\r
+                       for (;;) {\r
+                               if (pc >= image.Length)\r
+                                       return;\r
+                       \r
+                               PatternCompiler.DecodeOp (image[pc], out op, out flags);\r
+                               Console.Write (FormatAddress (pc) + ": ");              // address\r
+                               Console.Write (new string (' ', depth * 2));            // indent\r
+                               Console.Write (DisassembleOp (image, pc));              // instruction\r
+                               Console.WriteLine ();\r
+\r
+                               int skip;\r
+                               switch (op) {\r
+                               case OpCode.False: case OpCode.True: case OpCode.Until:\r
+                                       skip = 1;\r
+                                       break;\r
+\r
+                               case OpCode.Character: case OpCode.Category: case OpCode.Position:\r
+                               case OpCode.Open: case OpCode.Close: case OpCode.Reference:\r
+                               case OpCode.Sub: case OpCode.Branch: case OpCode.Jump: case OpCode.In:\r
+                                       skip = 2;\r
+                                       break;\r
+\r
+                               case OpCode.Balance: case OpCode.IfDefined: case OpCode.Range:\r
+                               case OpCode.Test: case OpCode.Anchor:\r
+                                       skip = 3;\r
+                                       break;\r
+\r
+                               case OpCode.Repeat: case OpCode.FastRepeat: case OpCode.Info:\r
+                                       skip = 4;\r
+                                       break;\r
+\r
+                               case OpCode.String: skip = image[pc + 1] + 2; break;\r
+                               case OpCode.Set: skip = image[pc + 2] + 3; break;\r
+\r
+                               default:\r
+                                       skip = 1;\r
+                                       break;\r
+                               }\r
+\r
+                               pc += skip;\r
+                       }\r
+               }\r
+\r
+               public static string DisassembleOp (ushort[] image, int pc) {\r
+                       OpCode op;\r
+                       OpFlags flags;\r
+\r
+                       PatternCompiler.DecodeOp (image[pc], out op, out flags);\r
+                       string str = op.ToString ();\r
+                       if (flags != 0)\r
+                               str += "[" + flags.ToString ("f") + "]";\r
+\r
+                       switch (op) {\r
+                       case OpCode.False: case OpCode.True: case OpCode.Until:\r
+                       default:\r
+                               break;\r
+\r
+                       case OpCode.Info:\r
+                               str += " " + image[pc + 1];\r
+                               str += " (" + image[pc + 2] + ", " + image[pc + 3] + ")";\r
+                               break;\r
+                       \r
+                       case OpCode.Character:\r
+                               str += " '" + FormatChar ((char)image[pc + 1]) + "'";\r
+                               break;\r
+\r
+                       case OpCode.Category:\r
+                               str += " /" + (Category)image[pc + 1];\r
+                               break;\r
+                       \r
+                       case OpCode.Range:\r
+                               str += " '" + FormatChar ((char)image[pc + 1]) + "', ";\r
+                               str += " '" + FormatChar ((char)image[pc + 2]) + "'";\r
+                               break;\r
+\r
+                       case OpCode.Set:\r
+                               str += " " + FormatSet (image, pc + 1);\r
+                               break;\r
+\r
+                       case OpCode.String:\r
+                               str += " '" + ReadString (image, pc + 1) + "'";\r
+                               break;\r
+\r
+                       case OpCode.Position:\r
+                               str += " /" + (Position)image[pc + 1];\r
+                               break;\r
+\r
+                       case OpCode.Open: case OpCode.Close: case OpCode.Reference:\r
+                               str += " " + image[pc + 1];\r
+                               break;\r
+\r
+                       case OpCode.Balance:\r
+                               str += " " + image[pc + 1] + " " + image[pc + 2];\r
+                               break;\r
+\r
+                       case OpCode.IfDefined: case OpCode.Anchor:\r
+                               str += " :" + FormatAddress (pc + image[pc + 1]);\r
+                               str += " " + image[pc + 2];\r
+                               break;\r
+                       \r
+                       case OpCode.Sub: case OpCode.Branch: case OpCode.Jump:\r
+                       case OpCode.In:\r
+                               str += " :" + FormatAddress (pc + image[pc + 1]);\r
+                               break;\r
+\r
+                       case OpCode.Test:\r
+                               str += " :" + FormatAddress (pc + image[pc + 1]);\r
+                               str += ", :" + FormatAddress (pc + image[pc + 2]);\r
+                               break;\r
+\r
+                       case OpCode.Repeat: case OpCode.FastRepeat:\r
+                               str += " :" + FormatAddress (pc + image[pc + 1]);\r
+                               str += " (" + image[pc + 2] + ", ";\r
+                               if (image[pc + 3] == 0xffff)\r
+                                       str += "Inf";\r
+                               else\r
+                                       str += image[pc + 3];\r
+                               str += ")";\r
+                               break;\r
+\r
+                       }\r
+\r
+                       return str;\r
+               }\r
+\r
+               // private static members\r
+       \r
+               private static string ReadString (ushort[] image, int pc) {\r
+                       int len = image[pc];\r
+                       char[] chars = new char[len];\r
+\r
+                       for (int i = 0; i < len; ++ i)\r
+                               chars[i] = (char)image[pc + i + 1];\r
+\r
+                       return new string (chars);\r
+               }\r
+\r
+               private static string FormatAddress (int pc) {\r
+                       return pc.ToString ("x4");\r
+               }\r
+\r
+               private static string FormatSet (ushort[] image, int pc) {\r
+                       int lo = image[pc ++];\r
+                       int hi = (image[pc ++] << 4) - 1;\r
+\r
+                       string str = "[";\r
+\r
+                       bool hot = false;\r
+                       char a = (char)0, b;\r
+                       for (int i = 0; i <= hi; ++ i) {\r
+                               bool m = (image[pc + (i >> 4)] & (1 << (i & 0xf))) != 0;\r
+\r
+                               if (m & !hot) {                         // start of range\r
+                                       a = (char)(lo + i);\r
+                                       hot = true;\r
+                               }\r
+                               else if (hot & (!m || i == hi)) {       // end of range\r
+                                       b = (char)(lo + i - 1);\r
+\r
+                                       str += FormatChar (a);\r
+                                       if (b != a)\r
+                                               str += "-" + FormatChar (b);\r
+                                       \r
+                                       hot = false;\r
+                               }\r
+                       }\r
+\r
+                       str += "]";\r
+                       return str;\r
+               }\r
+\r
+               private static string FormatChar (char c) {\r
+                       if (c == '-' || c == ']')\r
+                               return "\\" + c;\r
+\r
+                       if (Char.IsLetterOrDigit (c) || Char.IsSymbol (c))\r
+                               return c.ToString ();\r
+                       \r
+                       if (Char.IsControl (c)) {\r
+                               return "^" + (char)('@' + c);\r
+                       }\r
+\r
+                       return "\\u" + ((int)c).ToString ("x4");\r
+               }\r
+       }\r
+}\r
diff --git a/mcs/class/System/System.Text.RegularExpressions/interpreter.cs b/mcs/class/System/System.Text.RegularExpressions/interpreter.cs
new file mode 100644 (file)
index 0000000..809d02d
--- /dev/null
@@ -0,0 +1,889 @@
+//\r
+// assembly:   System\r
+// namespace:  System.Text.RegularExpressions\r
+// file:       interpreter.cs\r
+//\r
+// author:     Dan Lewis (dlewis@gmx.co.uk)\r
+//             (c) 2002\r
+\r
+using System;\r
+using System.Collections;\r
+using System.Globalization;\r
+\r
+namespace System.Text.RegularExpressions {\r
+\r
+       class Interpreter : IMachine {\r
+               public Interpreter (ushort[] program) {\r
+                       this.program = program;\r
+                       this.checkpoints = new Stack ();\r
+                       this.qs = null;\r
+\r
+                       // process info block\r
+\r
+                       if ((OpCode)program[0] != OpCode.Info)\r
+                               throw NewInterpretException ("Can't find info block.");\r
+\r
+                       this.group_count = program[1] + 1;\r
+                       this.match_min = program[2];\r
+                       this.match_max = program[3];\r
+\r
+                       // setup\r
+\r
+                       this.captures = new Capture[group_count];\r
+                       this.program_start = 4;\r
+               }\r
+\r
+               // IMachine implementation\r
+\r
+               public Match Scan (Regex regex, string text, int start, int end) {\r
+                       this.text = text;\r
+                       this.text_end = end;\r
+                       this.scan_ptr = start;\r
+\r
+                       if (Eval (Mode.Match, ref scan_ptr, program_start))\r
+                               return new Match (regex, this, end, captures);\r
+\r
+                       return Match.Empty;\r
+               }\r
+\r
+               // private methods\r
+\r
+               private void Reset () {\r
+                       for (int i = 0; i < group_count; ++ i)\r
+                               captures[i] = new Capture (text);\r
+               \r
+                       checkpoints.Clear ();\r
+                       checkpoint = 0;\r
+                       fast = repeat = null;\r
+               }\r
+\r
+               private bool Eval (Mode mode, ref int ref_ptr, int pc) {\r
+                       int ptr = ref_ptr;\r
+               Begin:\r
+                       for (;;) {\r
+                               ushort word = program[pc];\r
+                               OpCode op = (OpCode)(word & 0x00ff);\r
+                               OpFlags flags = (OpFlags)(word & 0xff00);\r
+\r
+                               switch (op) {\r
+                               case OpCode.Anchor: {\r
+                                       int skip = program[pc + 1];\r
+\r
+                                       int anch_offset = program[pc + 2];\r
+                                       int anch_ptr = ptr + anch_offset;\r
+                                       int anch_end = text_end - match_min + anch_offset;      // maximum anchor position\r
+\r
+                                       // the general case for an anchoring expression is at the bottom, however we\r
+                                       // do some checks for the common cases before to save processing time. the current\r
+                                       // optimizer only outputs three types of anchoring expressions: fixed position,\r
+                                       // fixed substring, and no anchor.\r
+\r
+                                       OpCode anch_op = (OpCode)(program[pc + 3] & 0x00ff);\r
+                                       if (anch_op == OpCode.Position && skip == 6) {                          // position anchor\r
+                                               // Anchor\r
+                                               //      Position\r
+                                               //      True\r
+\r
+                                               switch ((Position)program[pc + 4]) {\r
+                                               case Position.StartOfString:\r
+                                                       if (anch_ptr == 0) {\r
+                                                               ptr = 0;\r
+                                                               if (TryMatch (ref ptr, pc + skip))\r
+                                                                       goto Pass;\r
+                                                       }\r
+                                                       break;\r
+                                               \r
+                                               case Position.StartOfLine:\r
+                                                       if (anch_ptr == 0) {\r
+                                                               ptr = 0;\r
+                                                               if (TryMatch (ref ptr, pc + skip))\r
+                                                                       goto Pass;\r
+\r
+                                                               ++ anch_ptr;\r
+                                                       }\r
+\r
+                                                       while (anch_ptr <= anch_end) {\r
+                                                               if (text[anch_ptr - 1] == '\n') {\r
+                                                                       ptr = anch_ptr - anch_offset;\r
+                                                                       if (TryMatch (ref ptr, pc + skip))\r
+                                                                               goto Pass;\r
+                                                               }\r
+\r
+                                                               ++ anch_ptr;\r
+                                                       }\r
+                                                       break;\r
+                                               \r
+                                               case Position.StartOfScan:\r
+                                                       if (anch_ptr == scan_ptr) {\r
+                                                               ptr = scan_ptr - anch_offset;\r
+                                                               if (TryMatch (ref ptr, pc + skip))\r
+                                                                       goto Pass;\r
+                                                       }\r
+                                                       break;\r
+\r
+                                               default:\r
+                                                       // FIXME\r
+                                                       break;\r
+                                               }\r
+                                       }\r
+                                       else if (qs != null ||\r
+                                               (anch_op == OpCode.String && skip == 6 + program[pc + 4])) {    // substring anchor\r
+                                               // Anchor\r
+                                               //      String\r
+                                               //      True\r
+\r
+                                               if (qs == null) {\r
+                                                       bool ignore = ((OpFlags)program[pc + 3] & OpFlags.IgnoreCase) != 0;\r
+                                                       string substring = GetString (pc + 3);\r
+\r
+                                                       qs = new QuickSearch (substring, ignore);\r
+                                               }\r
+\r
+                                               while (anch_ptr <= anch_end) {\r
+                                                       anch_ptr = qs.Search (text, anch_ptr, anch_end);\r
+                                                       if (anch_ptr < 0)\r
+                                                               break;\r
+\r
+                                                       ptr = anch_ptr - anch_offset;\r
+                                                       if (TryMatch (ref ptr, pc + skip))\r
+                                                               goto Pass;\r
+\r
+                                                       ++ anch_ptr;\r
+                                               }\r
+                                       }\r
+                                       else if (anch_op == OpCode.True) {                                      // no anchor\r
+                                               // Anchor\r
+                                               //      True\r
+\r
+                                               while (anch_ptr <= anch_end) {\r
+                                                       ptr = anch_ptr;\r
+                                                       if (TryMatch (ref ptr, pc + skip))\r
+                                                               goto Pass;\r
+\r
+                                                       ++ anch_ptr;\r
+                                               }\r
+                                       }\r
+                                       else {                                                                  // general case\r
+                                               // Anchor\r
+                                               //      <expr>\r
+                                               //      True\r
+\r
+                                               while (anch_ptr <= anch_end) {\r
+                                                       ptr = anch_ptr;\r
+                                                       if (Eval (Mode.Match, ref ptr, pc + 3)) {\r
+                                                               // anchor expression passed: try real expression at the correct offset\r
+\r
+                                                               ptr = anch_ptr - anch_offset;\r
+                                                               if (TryMatch (ref ptr, pc + skip))\r
+                                                                       goto Pass;\r
+                                                       }\r
+\r
+                                                       ++ anch_ptr;\r
+                                               }\r
+                                       }\r
+\r
+                                       goto Fail;\r
+                               }\r
+                               \r
+                               case OpCode.False: {\r
+                                       goto Fail;\r
+                               }\r
+\r
+                               case OpCode.True: {\r
+                                       goto Pass;\r
+                               }\r
+\r
+                               case OpCode.Position: {\r
+                                       if (!IsPosition ((Position)program[pc + 1], ptr))\r
+                                               goto Fail;\r
+                                       pc += 2;\r
+                                       break;\r
+                               }\r
+\r
+                               case OpCode.String: {\r
+                                       bool reverse = (flags & OpFlags.RightToLeft) != 0;\r
+                                       bool ignore = (flags & OpFlags.IgnoreCase) != 0;\r
+                                       int len = program[pc + 1];\r
+\r
+                                       if (reverse) {\r
+                                               ptr -= len;\r
+                                               if (ptr < 0)\r
+                                                       goto Fail;\r
+                                       }\r
+                                       else if (ptr + len > text_end)\r
+                                               goto Fail;\r
+\r
+                                       pc += 2;\r
+                                       for (int i = 0; i < len; ++ i) {\r
+                                               char c = text[ptr + i];\r
+                                               if (ignore)\r
+                                                       c = Char.ToLower (c);\r
+\r
+                                               if (c != (char)program[pc ++])\r
+                                                       goto Fail;\r
+                                       }\r
+\r
+                                       if (!reverse)\r
+                                               ptr += len;\r
+                                       break;\r
+                               }\r
+\r
+                               case OpCode.Reference: {\r
+                                       bool reverse = (flags & OpFlags.RightToLeft) != 0;\r
+                                       bool ignore = (flags & OpFlags.IgnoreCase) != 0;\r
+                                       Capture cap = captures[program[pc + 1]].GetLastDefined ();\r
+                                       if (cap == null)\r
+                                               goto Fail;\r
+\r
+                                       int str = cap.Index;\r
+                                       int len = cap.Length;\r
+\r
+                                       if (reverse) {\r
+                                               ptr -= len;\r
+                                               if (ptr < 0)\r
+                                                       goto Fail;\r
+                                       }\r
+                                       else if (ptr + len > text_end)\r
+                                               goto Fail;\r
+\r
+                                       pc += 2;\r
+                                       for (int i = 0; i < len; ++ i) {\r
+                                               if (ignore) {\r
+                                                       if (Char.ToLower (text[ptr + i]) != Char.ToLower (text[str + i]))\r
+                                                               goto Fail;\r
+                                               }\r
+                                               else {\r
+                                                       if (text[ptr + i] != text[str + i])\r
+                                                               goto Fail;\r
+                                               }\r
+                                       }\r
+\r
+                                       if (!reverse)\r
+                                               ptr += len;\r
+                                       break;\r
+                               }\r
+\r
+                               case OpCode.Character: case OpCode.Category:\r
+                               case OpCode.Range: case OpCode.Set: {\r
+                                       if (!EvalChar (mode, ref ptr, ref pc, false))\r
+                                               goto Fail;\r
+                                       break;\r
+                               }\r
+\r
+                               case OpCode.In: {\r
+                                       int target = pc + program[pc + 1];\r
+                                       pc += 2;\r
+                                       if (!EvalChar (mode, ref ptr, ref pc, true))\r
+                                               goto Fail;\r
+\r
+                                       pc = target;\r
+                                       break;\r
+                               }\r
+\r
+                               case OpCode.Open: {\r
+                                       Open (program[pc + 1], ptr);\r
+                                       pc += 2;\r
+                                       break;\r
+                               }\r
+\r
+                               case OpCode.Close: {\r
+                                       Close (program[pc + 1], ptr);\r
+                                       pc += 2;\r
+                                       break;\r
+                               }\r
+\r
+                               case OpCode.Balance: {\r
+                                       Balance (program[pc + 1], program[pc + 2], ptr);\r
+                                       break;\r
+                               }\r
+\r
+                               case OpCode.IfDefined: {\r
+                                       Capture cap = captures[program[pc + 2]];\r
+                                       if (cap.GetLastDefined () == null)\r
+                                               pc += program[pc + 1];\r
+                                       else\r
+                                               pc += 3;\r
+                                       break;\r
+                               }\r
+\r
+                               case OpCode.Sub: {\r
+                                       if (!Eval (Mode.Match, ref ptr, pc + 2))\r
+                                               goto Fail;\r
+\r
+                                       pc += program[pc + 1];\r
+                                       break;\r
+                               }\r
+\r
+                               case OpCode.Test: {\r
+                                       int cp = Checkpoint ();\r
+                                       int test_ptr = ptr;\r
+                                       if (Eval (Mode.Match, ref test_ptr, pc + 3))\r
+                                               pc += program[pc + 1];\r
+                                       else {\r
+                                               Backtrack (cp);\r
+                                               pc += program[pc + 2];\r
+                                       }\r
+                                       break;\r
+                               }\r
+\r
+                               case OpCode.Branch: {\r
+                                       OpCode branch_op;\r
+                                       do {\r
+                                               int cp = Checkpoint ();\r
+                                               if (Eval (mode, ref ptr, pc + 2))\r
+                                                       goto Pass;\r
+                                               \r
+                                               Backtrack (cp);\r
+                                               \r
+                                               pc += program[pc + 1];\r
+                                               branch_op = (OpCode)(program[pc] & 0xff);\r
+                                       } while (branch_op != OpCode.False);\r
+\r
+                                       goto Fail;\r
+                               }\r
+\r
+                               case OpCode.Jump: {\r
+                                       pc += program[pc + 1];\r
+                                       break;\r
+                               }\r
+\r
+                               case OpCode.Repeat: {\r
+                                       this.repeat = new RepeatContext (\r
+                                               this.repeat,                    // previous context\r
+                                               program[pc + 2],                // minimum\r
+                                               program[pc + 3],                // maximum\r
+                                               (flags & OpFlags.Lazy) != 0,    // lazy\r
+                                               pc + 4                          // subexpression\r
+                                       );\r
+\r
+                                       if (Eval (Mode.Match, ref ptr, pc + program[pc + 1]))\r
+                                               goto Pass;\r
+                                       else {\r
+                                               this.repeat = this.repeat.Previous;\r
+                                               goto Fail;\r
+                                       }\r
+                               }\r
+\r
+                               case OpCode.Until: {\r
+                                       RepeatContext current = this.repeat;\r
+                                       int start = current.Start;\r
+\r
+                                       if (!current.IsMinimum) {\r
+                                               ++ current.Count;\r
+                                               current.Start = ptr;\r
+                                               if (Eval (Mode.Match, ref ptr, repeat.Expression))\r
+                                                       goto Pass;\r
+\r
+                                               current.Start = start;\r
+                                               -- current.Count;\r
+                                               goto Fail;\r
+                                       }\r
+\r
+                                       if (ptr == current.Start) {\r
+                                               // degenerate match ... match tail or fail\r
+\r
+                                               this.repeat = current.Previous;\r
+                                               if (Eval (Mode.Match, ref ptr, pc + 1))\r
+                                                       goto Pass;\r
+                                       \r
+                                               goto Fail;\r
+                                       }\r
+\r
+                                       if (current.IsLazy) {\r
+                                               // match tail first ...\r
+\r
+                                               this.repeat = current.Previous;\r
+                                               int cp = Checkpoint ();\r
+                                               if (Eval (Mode.Match, ref ptr, pc + 1))\r
+                                                       goto Pass;\r
+\r
+                                               Backtrack (cp);\r
+\r
+                                               // ... then match more\r
+\r
+                                               this.repeat = current;\r
+                                               if (!current.IsMaximum) {\r
+                                                       ++ current.Count;\r
+                                                       current.Start = ptr;\r
+                                                       if (Eval (Mode.Match, ref ptr, current.Expression))\r
+                                                               goto Pass;\r
+\r
+                                                       current.Start = start;\r
+                                                       -- current.Count;\r
+                                                       goto Fail;\r
+                                               }\r
+\r
+                                               return false;\r
+                                       }\r
+                                       else {\r
+                                               // match more first ...\r
+\r
+                                               if (!current.IsMaximum) {\r
+                                                       int cp = Checkpoint ();\r
+                                                       ++ current.Count;\r
+                                                       current.Start = ptr;\r
+                                                       if (Eval (Mode.Match, ref ptr, current.Expression))\r
+                                                               goto Pass;\r
+\r
+                                                       current.Start = start;\r
+                                                       -- current.Count;\r
+                                                       Backtrack (cp);\r
+                                               }\r
+\r
+                                               // ... then match tail\r
+\r
+                                               this.repeat = current.Previous;\r
+                                               if (Eval (Mode.Match, ref ptr, pc + 1))\r
+                                                       goto Pass;\r
+\r
+                                               this.repeat = current;\r
+                                               goto Fail;\r
+                                       }\r
+                               }\r
+\r
+                               case OpCode.FastRepeat: {\r
+                                       this.fast = new RepeatContext (\r
+                                               fast,\r
+                                               program[pc + 2],                // minimum\r
+                                               program[pc + 3],                // maximum\r
+                                               (flags & OpFlags.Lazy) != 0,    // lazy\r
+                                               pc + 4                          // subexpression\r
+                                       );\r
+                                       fast.Start = ptr;\r
+\r
+                                       int cp = Checkpoint ();\r
+\r
+                                       pc += program[pc + 1];          // tail expression\r
+                                       ushort tail_word = program[pc];\r
+\r
+                                       int c1, c2;                     // first character of tail operator\r
+                                       int coff;                       // 0 or -1 depending on direction\r
+\r
+                                       OpCode tail_op = (OpCode)(tail_word & 0xff);\r
+                                       if (tail_op == OpCode.Character || tail_op == OpCode.String) {\r
+                                               OpFlags tail_flags = (OpFlags)(tail_word & 0xff00);\r
+\r
+                                               if (tail_op == OpCode.String)\r
+                                                       c1 = program[pc + 2];                           // first char of string\r
+                                               else\r
+                                                       c1 = program[pc + 1];                           // character\r
+                                               \r
+                                               if ((tail_flags & OpFlags.IgnoreCase) != 0)\r
+                                                       c2 = Char.ToUpper ((char)c1);                   // ignore case\r
+                                               else\r
+                                                       c2 = c1;\r
+\r
+                                               if ((tail_flags & OpFlags.RightToLeft) != 0)\r
+                                                       coff = -1;                                      // reverse\r
+                                               else\r
+                                                       coff = 0;\r
+                                       }\r
+                                       else {\r
+                                               c1 = c2 = -1;\r
+                                               coff = 0;\r
+                                       }\r
+\r
+                                       if (fast.IsLazy) {\r
+                                               if (!fast.IsMinimum && !Eval (Mode.Count, ref ptr, fast.Expression)) {\r
+                                                       //Console.WriteLine ("lazy fast: failed mininum.");\r
+                                                       fast = fast.Previous;\r
+                                                       goto Fail;\r
+                                               }\r
+                                               \r
+                                               while (true) {\r
+                                                       int p = ptr + coff;\r
+                                                       if ((c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) &&\r
+                                                           Eval (Mode.Match, ref ptr, pc))\r
+                                                               break;\r
+\r
+                                                       if (fast.IsMaximum) {\r
+                                                               //Console.WriteLine ("lazy fast: failed with maximum.");\r
+                                                               fast = fast.Previous;\r
+                                                               goto Fail;\r
+                                                       }\r
+\r
+                                                       Backtrack (cp);\r
+                                                       if (!Eval (Mode.Count, ref ptr, fast.Expression)) {\r
+                                                               //Console.WriteLine ("lazy fast: no more.");\r
+                                                               fast = fast.Previous;\r
+                                                               goto Fail;\r
+                                                       }\r
+                                               }\r
+                                               fast = fast.Previous;\r
+                                               goto Pass;\r
+                                       }\r
+                                       else {\r
+                                               if (!Eval (Mode.Count, ref ptr, fast.Expression)) {\r
+                                                       fast = fast.Previous;\r
+                                                       goto Fail;\r
+                                               }\r
+                                       \r
+                                               int width;\r
+                                               if (fast.Count > 0)\r
+                                                       width = (ptr - fast.Start) / fast.Count;\r
+                                               else\r
+                                                       width = 0;\r
+\r
+                                               while (true) {\r
+                                                       int p = ptr + coff;\r
+                                                       if ((c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) &&\r
+                                                           Eval (Mode.Match, ref ptr, pc))\r
+                                                               break;\r
+\r
+                                                       -- fast.Count;\r
+                                                       if (!fast.IsMinimum) {\r
+                                                               fast = fast.Previous;\r
+                                                               goto Fail;\r
+                                                       }\r
+\r
+                                                       ptr -= width;\r
+                                                       Backtrack (cp);\r
+                                               }\r
+                                               fast = fast.Previous;\r
+                                               goto Pass;\r
+                                       }\r
+                               }\r
+\r
+                               case OpCode.Info: {\r
+                                       throw NewInterpretException ("Info block found in pattern.");\r
+                               }\r
+                               }\r
+                       }\r
+               Pass:\r
+                       ref_ptr = ptr;\r
+\r
+                       switch (mode) {\r
+                       case Mode.Match:\r
+                               return true;\r
+\r
+                       case Mode.Count: {\r
+                               ++ fast.Count;\r
+                               if (fast.IsMaximum || (fast.IsLazy && fast.IsMinimum))\r
+                                       return true;\r
+\r
+                               pc = fast.Expression;\r
+                               goto Begin;\r
+                       }\r
+                       }\r
+\r
+               Fail:\r
+                       switch (mode) {\r
+                       case Mode.Match:\r
+                               return false;\r
+\r
+                       case Mode.Count: {\r
+                               if (!fast.IsLazy && fast.IsMinimum)\r
+                                       return true;\r
+\r
+                               ref_ptr = fast.Start;\r
+                               return false;\r
+                       }\r
+                       }\r
+\r
+                       return false;\r
+               }\r
+\r
+               private bool EvalChar (Mode mode, ref int ptr, ref int pc, bool multi) {\r
+                       bool consumed = false;\r
+                       char c = '\0';\r
+                       bool negate;\r
+                       bool ignore;\r
+                       do {\r
+                               ushort word = program[pc];\r
+                               OpCode op = (OpCode)(word & 0x00ff);\r
+                               OpFlags flags = (OpFlags)(word & 0xff00);\r
+\r
+                               ++ pc;\r
+\r
+                               ignore = (flags & OpFlags.IgnoreCase) != 0;\r
+                               \r
+                               // consume character: the direction of an In construct is\r
+                               // determined by the direction of its first op\r
+\r
+                               if (!consumed) {\r
+                                       if ((flags & OpFlags.RightToLeft) != 0) {\r
+                                               if (ptr <= 0)\r
+                                                       return false;\r
+\r
+                                               c = text[-- ptr];\r
+                                       }\r
+                                       else {\r
+                                               if (ptr >= text_end)\r
+                                                       return false;\r
+\r
+                                               c = text[ptr ++];\r
+                                       }\r
+\r
+                                       if (ignore)\r
+                                               c = Char.ToLower (c);\r
+\r
+                                       consumed = true;\r
+                               }\r
+\r
+                               // negate flag\r
+\r
+                               negate = (flags & OpFlags.Negate) != 0;\r
+\r
+                               // execute op\r
+                               \r
+                               switch (op) {\r
+                               case OpCode.True:\r
+                                       return true;\r
+\r
+                               case OpCode.False:\r
+                                       return false;\r
+                               \r
+                               case OpCode.Character: {\r
+                                       if (c == (char)program[pc ++])\r
+                                               return !negate;\r
+                                       break;\r
+                               }\r
+\r
+                               case OpCode.Category: {\r
+                                       if (CategoryUtils.IsCategory ((Category)program[pc ++], c))\r
+                                               return !negate;\r
+\r
+                                       break;\r
+                               }\r
+                               \r
+                               case OpCode.Range: {\r
+                                       int lo = (char)program[pc ++];\r
+                                       int hi = (char)program[pc ++];\r
+                                       if (lo <= c && c <= hi)\r
+                                               return !negate;\r
+                                       break;\r
+                               }\r
+\r
+                               case OpCode.Set: {\r
+                                       int lo = (char)program[pc ++];\r
+                                       int len = (char)program[pc ++];\r
+                                       int bits = pc;\r
+                                       pc += len;\r
+\r
+                                       int i = (int)c - lo;\r
+                                       if (i < 0 || i >= len << 4)\r
+                                               break;\r
+\r
+                                       if ((program[bits + (i >> 4)] & (1 << (i & 0xf))) != 0)\r
+                                               return !negate;\r
+                                       break;\r
+                               }\r
+                               }\r
+                       } while (multi);\r
+\r
+                       return negate;\r
+               }\r
+\r
+               private bool TryMatch (ref int ref_ptr, int pc) {\r
+                       Reset ();\r
+                       \r
+                       int ptr = ref_ptr;\r
+                       captures[0].Open (ptr);\r
+                       if (Eval (Mode.Match, ref ptr, pc)) {\r
+                               captures[0].Close (ptr);\r
+                               ref_ptr = ptr;\r
+                               return true;\r
+                       }\r
+\r
+                       return false;\r
+               }\r
+               \r
+               private bool IsPosition (Position pos, int ptr) {\r
+                       switch (pos) {\r
+                       case Position.Start: case Position.StartOfString:\r
+                               return ptr == 0;\r
+\r
+                       case Position.StartOfLine:\r
+                               return ptr == 0 || text[ptr - 1] == '\n';\r
+                               \r
+                       case Position.StartOfScan:\r
+                               return ptr == scan_ptr;\r
+                       \r
+                       case Position.End:\r
+                               return ptr == text_end ||\r
+                                       (ptr == text_end - 1 && text[ptr] == '\n');\r
+\r
+                       case Position.EndOfLine:\r
+                               return ptr == text_end || text[ptr] == '\n';\r
+                               \r
+                       case Position.EndOfString:\r
+                               return ptr == text_end;\r
+                               \r
+                       case Position.Boundary:\r
+                               if (text_end == 0)\r
+                                       return false;\r
+\r
+                               if (ptr == 0)\r
+                                       return IsWordChar (text[ptr]);\r
+                               else if (ptr == text_end)\r
+                                       return IsWordChar (text[ptr - 1]);\r
+                               else\r
+                                       return IsWordChar (text[ptr]) != IsWordChar (text[ptr - 1]);\r
+\r
+                       case Position.NonBoundary:\r
+                               if (text_end == 0)\r
+                                       return false;\r
+\r
+                               if (ptr == 0)\r
+                                       return !IsWordChar (text[ptr]);\r
+                               else if (ptr == text_end)\r
+                                       return !IsWordChar (text[ptr - 1]);\r
+                               else\r
+                                       return IsWordChar (text[ptr]) == IsWordChar (text[ptr - 1]);\r
+                       \r
+                       default:\r
+                               return false;\r
+                       }\r
+               }\r
+\r
+               private bool IsWordChar (char c) {\r
+                       return CategoryUtils.IsCategory (Category.Word, c);\r
+               }\r
+\r
+               private string GetString (int pc) {\r
+                       int len = program[pc + 1];\r
+                       int str = pc + 2;\r
+\r
+                       char[] cs = new char[len];\r
+                       for (int i = 0; i < len; ++ i)\r
+                               cs[i] = (char)program[str ++];\r
+\r
+                       return new string (cs);\r
+               }\r
+\r
+               // capture management\r
+\r
+               private void Open (int gid, int ptr) {\r
+                       Capture cap = captures[gid];\r
+                       if (cap.IsDefined || cap.Checkpoint < checkpoint) {\r
+                               cap = new Capture (cap, checkpoint);\r
+                               captures[gid] = cap;\r
+                       }\r
+\r
+                       cap.Open (ptr);\r
+               }\r
+\r
+               private void Close (int gid, int ptr) {\r
+                       captures[gid].Close (ptr);\r
+               }\r
+\r
+               private void Balance (int gid, int balance_gid, int ptr) {\r
+                       Capture balance = captures[balance_gid];\r
+                       if (!balance.IsDefined)\r
+                               throw NewInterpretException ("Invalid state - balancing group not closed.");\r
+\r
+                       if (gid > 0) {\r
+                               Open (gid, balance.Index + balance.Length);\r
+                               Close (gid, ptr);\r
+                       }\r
+\r
+                       captures[balance_gid] = balance.Previous;\r
+               }\r
+\r
+               private int Checkpoint () {\r
+                       checkpoints.Push (captures);\r
+                       captures = (Capture[])captures.Clone ();\r
+                       checkpoint = checkpoints.Count;\r
+\r
+                       return checkpoint;\r
+               }\r
+\r
+               private void Backtrack (int cp) {\r
+                       if (cp > checkpoints.Count)\r
+                               throw NewInterpretException ("Can't backtrack forwards");\r
+\r
+                       while (checkpoints.Count > cp)\r
+                               checkpoints.Pop ();\r
+\r
+                       captures = (Capture[])checkpoints.Peek ();\r
+                       checkpoint = cp;\r
+\r
+                       // TODO optimize this\r
+               }\r
+\r
+               private Exception NewInterpretException (string msg) {\r
+                       return new ApplicationException (msg);\r
+               }\r
+\r
+               // interpreter attributes\r
+\r
+               private ushort[] program;               // regex program\r
+               private int program_start;              // first instruction after info block\r
+               private string text;                    // input text\r
+               private int text_end;                   // end of input text (last character + 1)\r
+               private int group_count;                // number of capturing groups\r
+               private int match_min, match_max;       // match width information\r
+               private QuickSearch qs;                 // fast substring matcher\r
+\r
+               // match state\r
+               \r
+               private int scan_ptr;                   // start of scan\r
+\r
+               private Capture[] captures;             // current captures\r
+\r
+               private int checkpoint;                 // last checkpoint\r
+               private Stack checkpoints;              // checkpointed captures\r
+               \r
+               private RepeatContext repeat;           // current repeat context\r
+               private RepeatContext fast;             // fast repeat context\r
+\r
+               // private classes\r
+\r
+               private class RepeatContext {\r
+                       public RepeatContext (RepeatContext previous, int min, int max, bool lazy, int expr_pc) {\r
+                               this.previous = previous;\r
+                               this.min = min;\r
+                               this.max = max;\r
+                               this.lazy = lazy;\r
+                               this.expr_pc = expr_pc;\r
+                               \r
+                               this.start = -1;\r
+                               this.count = 0;\r
+                       }\r
+\r
+                       public int Count {\r
+                               get { return count; }\r
+                               set { count = value; }\r
+                       }\r
+\r
+                       public int Start {\r
+                               get { return start; }\r
+                               set { start = value; }\r
+                       }\r
+\r
+                       public bool IsMinimum {\r
+                               get { return min <= count; }\r
+                       }\r
+\r
+                       public bool IsMaximum {\r
+                               get { return max <= count; }\r
+                       }\r
+\r
+                       public bool IsLazy {\r
+                               get { return lazy; }\r
+                       }\r
+\r
+                       public int Expression {\r
+                               get { return expr_pc; }\r
+                       }\r
+\r
+                       public RepeatContext Previous {\r
+                               get { return previous; }\r
+                       }\r
+               \r
+                       private int start;\r
+                       private int min, max;\r
+                       private bool lazy;\r
+                       private int expr_pc;\r
+                       private RepeatContext previous;\r
+\r
+                       private int count;\r
+               }\r
+\r
+               private enum Mode {\r
+                       Search,\r
+                       Match,\r
+                       Count\r
+               }\r
+       }\r
+}\r
diff --git a/mcs/class/System/System.Text.RegularExpressions/interval.cs b/mcs/class/System/System.Text.RegularExpressions/interval.cs
new file mode 100644 (file)
index 0000000..4c8d19f
--- /dev/null
@@ -0,0 +1,305 @@
+//\r
+// assembly:   System\r
+// namespace:  System.Text.RegularExpressions\r
+// file:       interval.cs\r
+//\r
+// author:     Dan Lewis (dlewis@gmx.co.uk)\r
+//             (c) 2002\r
+\r
+using System;\r
+using System.Collections;\r
+\r
+namespace System.Text.RegularExpressions {\r
+\r
+       struct Interval : IComparable {\r
+               public int low;\r
+               public int high;\r
+               public bool contiguous;\r
+\r
+               public static Interval Empty {\r
+                       get {\r
+                               Interval i;\r
+                               i.low = 0;\r
+                               i.high = i.low - 1;\r
+                               i.contiguous = true;\r
+\r
+                               return i;\r
+                       }\r
+               }\r
+\r
+               public static Interval Entire {\r
+                       get { return new Interval (Int32.MinValue, Int32.MaxValue); }\r
+               }\r
+\r
+               public Interval (int low, int high) {\r
+                       if (low > high) {\r
+                               int t = low;\r
+                               low = high;\r
+                               high = t;\r
+                       }\r
+               \r
+                       this.low = low;\r
+                       this.high = high;\r
+                       this.contiguous = true;\r
+               }\r
+\r
+               public bool IsDiscontiguous {\r
+                       get { return !contiguous; }\r
+               }\r
+               \r
+               public bool IsSingleton {\r
+                       get { return contiguous && low == high; }\r
+               }\r
+\r
+               public bool IsRange {\r
+                       get { return !IsSingleton && !IsEmpty; }\r
+               }\r
+\r
+               public bool IsEmpty {\r
+                       get { return low > high; }\r
+               }\r
+\r
+               public int Size {\r
+                       get {\r
+                               if (IsEmpty)\r
+                                       return 0;\r
+                               \r
+                               return high - low + 1;\r
+                       }\r
+               }\r
+\r
+               public bool IsDisjoint (Interval i) {\r
+                       if (IsEmpty || i.IsEmpty)\r
+                               return true;\r
+                       \r
+                       return !(low <= i.high && i.low <= high);\r
+               }\r
+\r
+               public bool IsAdjacent (Interval i) {\r
+                       if (IsEmpty || i.IsEmpty)\r
+                               return false;\r
+               \r
+                       return low == i.high + 1 || high == i.low - 1;\r
+               }\r
+\r
+               public bool Contains (Interval i) {\r
+                       if (!IsEmpty && i.IsEmpty)\r
+                               return true;\r
+                       if (IsEmpty)\r
+                               return false;\r
+               \r
+                       return low <= i.low && i.high <= high;\r
+               }\r
+\r
+               public bool Contains (int i) {\r
+                       return low <= i && i <= high;\r
+               }\r
+\r
+               public void Merge (Interval i) {\r
+                       if (i.IsEmpty)\r
+                               return;\r
+                       if (IsEmpty) {\r
+                               this.low = i.low;\r
+                               this.high = i.high;\r
+                       }\r
+               \r
+                       if (i.low < low)\r
+                               low = i.low;\r
+                       if (i.high > high)\r
+                               high = i.high;\r
+               }\r
+\r
+               public void Intersect (Interval i) {\r
+                       if (IsDisjoint (i)) {\r
+                               low = 0;\r
+                               high = low - 1;\r
+                               return;\r
+                       }\r
+               \r
+                       if (i.low > low)\r
+                               low = i.low;\r
+                       if (i.high > high)\r
+                               high = i.high;\r
+               }\r
+\r
+               public int CompareTo (object o) {\r
+                       return low - ((Interval)o).low;\r
+               }\r
+\r
+               public new string ToString () {\r
+                       if (IsEmpty)\r
+                               return "(EMPTY)";\r
+                       else if (!contiguous)\r
+                               return "{" + low + ", " + high + "}";\r
+                       else if (IsSingleton)\r
+                               return "(" + low + ")";\r
+                       else\r
+                               return "(" + low + ", " + high + ")";\r
+               }\r
+       }\r
+\r
+       class IntervalCollection : ICollection, IEnumerable {\r
+               public IntervalCollection () {\r
+                       intervals = new ArrayList ();\r
+               }\r
+\r
+               public Interval this[int i] {\r
+                       get { return (Interval)intervals[i]; }\r
+                       set { intervals[i] = value; }\r
+               }\r
+\r
+               public void Add (Interval i) {\r
+                       intervals.Add (i);\r
+               }\r
+                       \r
+               public void Clear () {\r
+                       intervals.Clear ();\r
+               }\r
+\r
+               public void Sort () {\r
+                       intervals.Sort ();\r
+               }\r
+               \r
+               public void Normalize () {\r
+                       intervals.Sort ();\r
+\r
+                       int j = 0;\r
+                       while (j < intervals.Count - 1) {\r
+                               Interval a = (Interval)intervals[j];\r
+                               Interval b = (Interval)intervals[j + 1];\r
+\r
+                               if (!a.IsDisjoint (b) || a.IsAdjacent (b)) {\r
+                                       a.Merge (b);\r
+                                       intervals[j] = a;\r
+                                       intervals.RemoveAt (j + 1);\r
+                               }\r
+                               else\r
+                                       ++ j;\r
+                       }\r
+\r
+               }\r
+\r
+               public delegate double CostDelegate (Interval i);\r
+\r
+               public IntervalCollection GetMetaCollection (CostDelegate cost_del) {\r
+                       IntervalCollection meta = new IntervalCollection ();\r
+               \r
+                       Normalize ();\r
+                       Optimize (0, Count - 1, meta, cost_del);\r
+                       meta.intervals.Sort ();\r
+\r
+                       return meta;\r
+               }\r
+\r
+               private void Optimize (int begin, int end, IntervalCollection meta, CostDelegate cost_del) {\r
+                       Interval set;\r
+                       set.contiguous = false;\r
+               \r
+                       int best_set_begin = -1;\r
+                       int best_set_end = -1;\r
+                       double best_set_cost = 0;\r
+\r
+                       for (int i = begin; i <= end; ++ i) {\r
+                               set.low = this[i].low;\r
+\r
+                               double cost = 0.0;\r
+                               for (int j = i; j <= end; ++ j) {\r
+                                       set.high = this[j].high;\r
+                                       cost += cost_del (this[j]);\r
+                                       \r
+                                       double set_cost = cost_del (set);\r
+                                       if (set_cost < cost && cost > best_set_cost) {\r
+                                               best_set_begin = i;\r
+                                               best_set_end = j;\r
+                                               best_set_cost = cost;\r
+                                       }\r
+                               }\r
+                       }\r
+\r
+                       if (best_set_begin < 0) {\r
+                               // didn't find an optimal set: add original members\r
+\r
+                               for (int i = begin; i <= end; ++ i)\r
+                                       meta.Add (this[i]);\r
+                       }\r
+                       else {\r
+                               // found set: add it ...\r
+\r
+                               set.low = this[best_set_begin].low;\r
+                               set.high = this[best_set_end].high;\r
+                               \r
+                               meta.Add (set);\r
+\r
+                               // ... and optimize to the left and right\r
+\r
+                               if (best_set_begin > begin)\r
+                                       Optimize (begin, best_set_begin - 1, meta, cost_del);\r
+                               if (best_set_end < end)\r
+                                       Optimize (best_set_end + 1, end, meta, cost_del);\r
+                       }\r
+               }\r
+\r
+               // ICollection implementation\r
+\r
+               public int Count {\r
+                       get { return intervals.Count; }\r
+               }\r
+\r
+               public bool IsSynchronized {\r
+                       get { return false; }\r
+               }\r
+\r
+               public object SyncRoot {\r
+                       get { return intervals; }\r
+               }\r
+\r
+               public void CopyTo (Array array, int index) {\r
+                       foreach (Interval i in intervals) {\r
+                               if (index > array.Length)\r
+                                       break;\r
+                               \r
+                               array.SetValue (i, index ++);\r
+                       }\r
+               }\r
+\r
+               // IEnumerator implementation\r
+\r
+               public IEnumerator GetEnumerator () {\r
+                       return new Enumerator (intervals);\r
+               }\r
+\r
+               private class Enumerator : IEnumerator {\r
+                       public Enumerator (IList list) {\r
+                               this.list = list;\r
+                               Reset ();\r
+                       }\r
+\r
+                       public object Current {\r
+                               get {\r
+                                       if (ptr >= list.Count)\r
+                                               throw new InvalidOperationException ();\r
+\r
+                                       return list[ptr];\r
+                               }\r
+                       }\r
+\r
+                       public bool MoveNext () {\r
+                               if (ptr > list.Count)\r
+                                       throw new InvalidOperationException ();\r
+                               \r
+                               return ++ ptr < list.Count;\r
+                       }\r
+\r
+                       public void Reset () {\r
+                               ptr = -1;\r
+                       }\r
+\r
+                       private IList list;\r
+                       private int ptr;\r
+               }\r
+\r
+               // private fields\r
+\r
+               private ArrayList intervals;\r
+       }\r
+}\r
diff --git a/mcs/class/System/System.Text.RegularExpressions/match.cs b/mcs/class/System/System.Text.RegularExpressions/match.cs
new file mode 100644 (file)
index 0000000..7bc3d70
--- /dev/null
@@ -0,0 +1,215 @@
+//\r
+// assembly:   System\r
+// namespace:  System.Text.RegularExpressions\r
+// file:       match.cs\r
+//\r
+// author:     Dan Lewis (dlewis@gmx.co.uk)\r
+//             (c) 2002\r
+\r
+using System;\r
+\r
+namespace System.Text.RegularExpressions {\r
+\r
+       public class Capture {\r
+               public int Index {\r
+                       get {\r
+                               if (!IsDefined)\r
+                                       return 0;               // capture not completed\r
+                               else if (start <= end)\r
+                                       return start;           // normal capture\r
+                               else\r
+                                       return end;             // reverse capture\r
+                       }\r
+               }\r
+\r
+               public int Length {\r
+                       get {\r
+                               if (!IsDefined)\r
+                                       return 0;\r
+                               else if (start <= end)\r
+                                       return end - start;\r
+                               else\r
+                                       return start - end;\r
+                       }\r
+               }\r
+\r
+               public string Value {\r
+                       get { return IsDefined ? text.Substring (Index, Length) : ""; }\r
+               }\r
+\r
+               public override string ToString () {\r
+                       return Value;\r
+               }\r
+\r
+               // internal members\r
+\r
+               internal Capture () {                   // empty capture\r
+                       this.previous = null;\r
+                       this.text = null;\r
+                       this.checkpoint = 0;\r
+\r
+                       this.start = -1;\r
+                       this.end = -1;\r
+               }\r
+\r
+               internal Capture (Capture cap) {        // copy constructor\r
+                       this.previous = cap.previous;\r
+                       this.text = cap.text;\r
+                       this.checkpoint = cap.checkpoint;\r
+\r
+                       this.start = cap.start;\r
+                       this.end = cap.end;\r
+               }\r
+\r
+               internal Capture (string text) {        // first capture\r
+                       this.previous = null;\r
+                       this.text = text;\r
+                       this.checkpoint = 0;\r
+\r
+                       this.start = -1;\r
+                       this.end = -1;\r
+               }\r
+               \r
+               internal Capture (Capture previous, int checkpoint) {\r
+                       this.previous = previous;\r
+                       this.text = previous.text;\r
+                       this.checkpoint = checkpoint;\r
+\r
+                       this.start = -1;\r
+                       this.end = -1;\r
+               }\r
+\r
+               internal Capture Previous {\r
+                       get { return previous; }\r
+               }\r
+\r
+               internal string Text {\r
+                       get { return text; }\r
+               }\r
+\r
+               internal int Checkpoint {\r
+                       get { return checkpoint; }\r
+               }\r
+\r
+               internal bool IsDefined {\r
+                       get { return start >= 0 && end >= 0; }\r
+               }\r
+\r
+               internal Capture GetLastDefined () {\r
+                       Capture cap = this;\r
+                       while (cap != null && !cap.IsDefined)\r
+                               cap = cap.Previous;\r
+\r
+                       return cap;\r
+               }\r
+\r
+               internal void Open (int ptr) {\r
+                       this.start = ptr;\r
+               }\r
+\r
+               internal void Close (int ptr) {\r
+                       this.end = ptr;\r
+               }\r
+\r
+               // private\r
+\r
+               private int start, end;\r
+               private string text;\r
+               private int checkpoint;\r
+               private Capture previous;\r
+       }\r
+\r
+       public class Group : Capture {\r
+               public static Group Synchronized (Group inner) {\r
+                       return inner;   // is this enough?\r
+               }\r
+\r
+               public CaptureCollection Captures {\r
+                       get { return captures; }\r
+               }\r
+\r
+               public bool Success {\r
+                       get { return GetLastDefined () != null; }\r
+               }\r
+\r
+               // internal\r
+\r
+               internal Group () : base () {\r
+               }\r
+               \r
+               internal Group (Capture last) : base (last) {\r
+                       captures = new CaptureCollection (last);\r
+\r
+                       // TODO make construction of captures lazy\r
+               }\r
+\r
+               private CaptureCollection captures;\r
+       }\r
+\r
+       public class Match : Group {\r
+               public static Match Empty {\r
+                       get { return empty; }\r
+               }\r
+               \r
+               public static Match Synchronized (Match inner) {\r
+                       return inner;   // FIXME need to sync on machine access\r
+               }\r
+               \r
+               public GroupCollection Groups {\r
+                       get { return groups; }\r
+               }\r
+\r
+               public Match NextMatch () {\r
+                       if (this == Empty)\r
+                               return Empty;\r
+\r
+                       int scan_ptr = regex.RightToLeft ? Index : Index + Length;\r
+\r
+                       // next match after an empty match: make sure scan ptr makes progress\r
+                       \r
+                       if (Length == 0)\r
+                               scan_ptr += regex.RightToLeft ? -1 : +1;\r
+\r
+                       return machine.Scan (regex, Text, scan_ptr, text_length);\r
+               }\r
+\r
+               public virtual string Result (string replacement) {\r
+                       return ReplacementEvaluator.Evaluate (replacement, this);\r
+               }\r
+\r
+               // internal\r
+\r
+               internal Match () : base () {\r
+                       this.regex = null;\r
+                       this.machine = null;\r
+                       this.text_length = 0;\r
+                       this.groups = new GroupCollection ();\r
+\r
+                       groups.Add (this);\r
+               }\r
+               \r
+               internal Match (Regex regex, IMachine machine, int text_length, Capture[] captures) : base (captures[0]) {\r
+                       this.regex = regex;\r
+                       this.machine = machine;\r
+                       this.text_length = text_length;\r
+                       this.groups = new GroupCollection ();\r
+\r
+                       groups.Add (this);\r
+                       for (int i = 1; i < captures.Length; ++ i)\r
+                               groups.Add (new Group (captures[i]));\r
+               }\r
+\r
+               internal Regex Regex {\r
+                       get { return regex; }\r
+               }\r
+\r
+               // private\r
+\r
+               private Regex regex;\r
+               private IMachine machine;\r
+               private int text_length;\r
+               private GroupCollection groups;\r
+\r
+               private static Match empty = new Match ();\r
+       }\r
+}\r
diff --git a/mcs/class/System/System.Text.RegularExpressions/notes.txt b/mcs/class/System/System.Text.RegularExpressions/notes.txt
new file mode 100644 (file)
index 0000000..ef5f5a2
--- /dev/null
@@ -0,0 +1,32 @@
+TODO:
+
+* Need to go through everything and square it with RightToLeft matching.
+  The support for this was built into an early version, and lots of things built
+  afterwards are not savvy about bi-directional matching. Things that spring to
+  mind: Regex match methods should start at 0 or text.Length depending on
+  direction. Do split and replace need changes? Match should be aware of its
+  direction (already applied some of this to NextMatch logic). The interpreter
+  needs to check left and right bounds. Anchoring and substring discovery need
+  to be reworked. RTL matches are going to have anchors on the right - ie $, \Z
+  and \z. This should be added to the anchor logic. QuickSearch needs to work in
+  reverse. There may be other stuff.... work through the code.
+
+* Add ECMAScript support to the parser. For example, [.\w\s\d] map to ECMA
+  categories instead of canonical ones. There's different behaviour on
+  backreference/octal disambiguation. Find out what the runtime behavioural
+  difference is for cyclic backreferences eg (?(1)abc\1) - this is only briefly 
+  mentioned in the spec. I couldn't find much on this in the ECMAScript
+  specification either.
+
+* Check the octal disambiguation for canonical syntax works as specced.
+
+* Add a check in QuickSearch for single character substrings. This is likely to
+  be a common case. There's no need to go through a shift table. Also, have a
+  look at just computing a relevant subset of the shift table and using an
+  (offset, size) pair to help test inclusion. Characters not in the table get
+  the default len + 1 shift.
+
+* Improve the perl test suite. Run under MS runtime to generate checksums for
+  each trial. Checksums should incorporate: all captures (index, length) for all
+  groups; names of explicit capturing groups, and the numbers they map to. Any
+  other state? RegexTrial.Execute() will then compare result and checksum.
diff --git a/mcs/class/System/System.Text.RegularExpressions/parser.cs b/mcs/class/System/System.Text.RegularExpressions/parser.cs
new file mode 100644 (file)
index 0000000..8bece92
--- /dev/null
@@ -0,0 +1,1044 @@
+//\r
+// assembly:   System\r
+// namespace:  System.Text.RegularExpressions\r
+// file:       parser.cs\r
+//\r
+// author:     Dan Lewis (dlewis@gmx.co.uk)\r
+//             (c) 2002\r
+\r
+using System;\r
+using System.Collections;\r
+using System.Globalization;\r
+\r
+namespace System.Text.RegularExpressions.Syntax {\r
+\r
+       class Parser {\r
+               public static int ParseDecimal (string str, ref int ptr) {\r
+                       return ParseNumber (str, ref ptr, 10, 1, Int32.MaxValue);\r
+               }\r
+\r
+               public static int ParseOctal (string str, ref int ptr) {\r
+                       return ParseNumber (str, ref ptr, 8, 1, 3);\r
+               }\r
+\r
+               public static int ParseHex (string str, ref int ptr, int digits) {\r
+                       return ParseNumber (str, ref ptr, 16, digits, digits);\r
+               }\r
+\r
+               public static int ParseNumber (string str, ref int ptr, int b, int min, int max) {\r
+                       int p = ptr, n = 0, digits = 0, d;\r
+                       if (max < min)\r
+                               max = Int32.MaxValue;\r
+\r
+                       while (digits < max && p < str.Length) {\r
+                               d = ParseDigit (str[p ++], b, digits);\r
+                               if (d < 0) {\r
+                                       -- p;\r
+                                       break;\r
+                               }\r
+\r
+                               n = n * b + d;\r
+                               ++ digits;\r
+                       }\r
+\r
+                       if (digits < min)\r
+                               return -1;\r
+\r
+                       ptr = p;\r
+                       return n;\r
+               }\r
+\r
+               public static string ParseName (string str, ref int ptr) {\r
+                       if (Char.IsDigit (str[ptr])) {\r
+                               int gid = ParseNumber (str, ref ptr, 10, 1, 0);\r
+                               if (gid > 0)\r
+                                       return gid.ToString ();\r
+                               \r
+                               return null;\r
+                       }\r
+\r
+                       int start = ptr;\r
+                       for (;;) {\r
+                               if (!IsNameChar (str[ptr]))\r
+                                       break;\r
+                               ++ ptr;\r
+                       }\r
+\r
+                       if (ptr - start > 0)\r
+                               return str.Substring (start, ptr - start);\r
+\r
+                       return null;\r
+               }\r
+\r
+               public static string Escape (string str) {\r
+                       string result = "";\r
+                       for (int i = 0; i < str.Length; ++ i) {\r
+                               char c = str[i];\r
+                               switch (c) {\r
+                               case '\\': case '*': case '+': case '?': case '|':\r
+                               case '{': case '[': case '(': case ')': case '^':\r
+                               case '$': case '.': case '#': case ' ':\r
+                                       result += "\\" + c;\r
+                                       break;\r
+\r
+                               case '\t': result += "\\t"; break;\r
+                               case '\n': result += "\\n"; break;\r
+                               case '\r': result += "\\r"; break;\r
+                               case '\f': result += "\\f"; break;\r
+\r
+                               default: result += c; break;\r
+                               }\r
+                       }\r
+\r
+                       return result;\r
+               }\r
+\r
+               public static string Unescape (string str) {\r
+                       return new Parser ().ParseString (str);\r
+               }\r
+\r
+               // public instance\r
+\r
+               public Parser () {\r
+                       this.caps = new ArrayList ();\r
+                       this.refs = new Hashtable ();\r
+               }\r
+\r
+               public RegularExpression ParseRegularExpression (string pattern, RegexOptions options) {\r
+                       this.pattern = pattern;\r
+                       this.ptr = 0;\r
+\r
+                       caps.Clear ();\r
+                       refs.Clear ();\r
+                       this.num_groups = 0;\r
+\r
+                       try {\r
+                               RegularExpression re = new RegularExpression ();\r
+                               ParseGroup (re, options, null);\r
+                               ResolveReferences ();\r
+\r
+                               re.GroupCount = num_groups;\r
+                               \r
+                               return re;\r
+                       }\r
+                       catch (IndexOutOfRangeException) {\r
+                               throw NewParseException ("Unexpected end of pattern.");\r
+                       }\r
+               }\r
+\r
+               public IDictionary GetMapping () {\r
+                       Hashtable mapping = new Hashtable ();\r
+                       foreach (CapturingGroup group in caps) {\r
+                               if (group.Name != null)\r
+                                       mapping.Add (group.Name, group.Number);\r
+                       }\r
+\r
+                       return mapping;\r
+               }\r
+\r
+               // private methods\r
+\r
+               private void ParseGroup (Group group, RegexOptions options, Assertion assertion) {\r
+                       bool is_top_level = group is RegularExpression;\r
+               \r
+                       Alternation alternation = null;\r
+                       string literal = null;\r
+\r
+                       Group current = new Group ();\r
+                       Expression expr = null;\r
+                       bool closed = false;\r
+\r
+                       while (true) {\r
+                               ConsumeWhitespace (IsIgnorePatternWhitespace (options));\r
+                               if (ptr >= pattern.Length)\r
+                                       break;\r
+                               \r
+                               // (1) Parse for Expressions\r
+                       \r
+                               char ch = pattern[ptr ++];\r
+                               \r
+                               switch (ch) {\r
+                               case '^': {\r
+                                       Position pos =\r
+                                               IsMultiline (options) ? Position.StartOfLine : Position.Start;\r
+                                       expr = new PositionAssertion (pos);\r
+                                       break;\r
+                               }\r
+\r
+                               case '$': {\r
+                                       Position pos =\r
+                                               IsMultiline (options) ? Position.EndOfLine : Position.End;\r
+                                       expr = new PositionAssertion (pos);\r
+                                       break;\r
+                               }\r
+\r
+                               case '.': {\r
+                                       Category cat =\r
+                                               IsSingleline (options) ? Category.AnySingleline : Category.Any;\r
+                                       expr = new CharacterClass (cat, false);\r
+                                       break;\r
+                               }\r
+\r
+                               case '\\': {\r
+                                       int c = ParseEscape ();\r
+                                       if (c >= 0)\r
+                                               ch = (char)c;\r
+                                       else {\r
+                                               expr = ParseSpecial (options);\r
+\r
+                                               if (expr == null)\r
+                                                       ch = pattern[ptr ++];           // default escape\r
+                                       }\r
+                                       break;\r
+                               }\r
+\r
+                               case '[': {\r
+                                       expr = ParseCharacterClass (options);\r
+                                       break;\r
+                               }\r
+\r
+                               case '(': {\r
+                                       bool ignore = IsIgnoreCase (options);\r
+                                       expr = ParseGroupingConstruct (ref options);\r
+                                       if (expr == null) {\r
+                                               if (literal != null && IsIgnoreCase (options) != ignore) {\r
+                                                       current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));\r
+                                                       literal = null;\r
+                                               }\r
+\r
+                                               continue;\r
+                                       }\r
+                                       break;\r
+                               }\r
+\r
+                               case ')': {\r
+                                       closed = true;\r
+                                       goto EndOfGroup;\r
+                               }\r
+\r
+                               case '|': {\r
+                                       if (literal != null) {\r
+                                               current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));\r
+                                               literal = null;\r
+                                       }\r
+\r
+                                       if (assertion != null) {\r
+                                               if (assertion.TrueExpression == null)\r
+                                                       assertion.TrueExpression = current;\r
+                                               else if (assertion.FalseExpression == null)\r
+                                                       assertion.FalseExpression = current;\r
+                                               else\r
+                                                       throw NewParseException ("Too many | in (?()|).");\r
+                                       }\r
+                                       else {\r
+                                               if (alternation == null)\r
+                                                       alternation = new Alternation ();\r
+\r
+                                               alternation.AddAlternative (current);\r
+                                       }\r
+\r
+                                       current = new Group ();\r
+                                       continue;\r
+                               }\r
+\r
+                               case '*': case '+': case '?': case '{': {\r
+                                       throw NewParseException ("Bad quantifier.");\r
+                               }\r
+\r
+                               default: \r
+                                       break;          // literal character\r
+                               }\r
+\r
+                               ConsumeWhitespace (IsIgnorePatternWhitespace (options));\r
+                               \r
+                               // (2) Check for Repetitions\r
+                               \r
+                               if (ptr < pattern.Length) {\r
+                                       char k = pattern[ptr];\r
+\r
+                                       if (k == '?' || k == '*' || k == '+' || k == '{') {\r
+                                               ++ ptr;\r
+\r
+                                               int min = 0, max = 0;\r
+                                               bool lazy = false;\r
+\r
+                                               switch (k) {\r
+                                               case '?': min = 0; max = 1; break;\r
+                                               case '*': min = 0; max = 0xffff; break;\r
+                                               case '+': min = 1; max = 0xffff; break;\r
+                                               case '{': ParseRepetitionBounds (out min, out max, options); break;\r
+                                               }\r
+\r
+                                               ConsumeWhitespace (IsIgnorePatternWhitespace (options));\r
+                                               if (ptr < pattern.Length && pattern[ptr] == '?') {\r
+                                                       ++ ptr;\r
+                                                       lazy = true;\r
+                                               }\r
+\r
+                                               Repetition repetition = new Repetition (min, max, lazy);\r
+\r
+                                               if (expr == null)\r
+                                                       repetition.Expression = new Literal (ch.ToString (), IsIgnoreCase (options));\r
+                                               else\r
+                                                       repetition.Expression = expr;\r
+\r
+                                               expr = repetition;\r
+                                       }\r
+                               }\r
+\r
+                               // (3) Append Expression and/or Literal\r
+\r
+                               if (expr == null) {\r
+                                       if (literal == null)\r
+                                               literal = "";\r
+                                       literal += ch;\r
+                               }\r
+                               else {\r
+                                       if (literal != null) {\r
+                                               current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));\r
+                                               literal = null;\r
+                                       }\r
+\r
+                                       current.AppendExpression (expr);\r
+                                       expr = null;\r
+                               }\r
+\r
+                               if (is_top_level && ptr >= pattern.Length)\r
+                                       goto EndOfGroup;\r
+                       }\r
+\r
+               EndOfGroup:\r
+                       if (is_top_level && closed)\r
+                               throw NewParseException ("Too many )'s.");\r
+                       if (!is_top_level && !closed)\r
+                               throw NewParseException ("Not enough )'s.");\r
+                               \r
+               \r
+                       // clean up literals and alternations\r
+\r
+                       if (literal != null)\r
+                               current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));\r
+\r
+                       if (assertion != null) {\r
+                               if (assertion.TrueExpression == null)\r
+                                       assertion.TrueExpression = current;\r
+                               else\r
+                                       assertion.FalseExpression = current;\r
+                               \r
+                               group.AppendExpression (assertion);\r
+                       }\r
+                       else if (alternation != null) {\r
+                               alternation.AddAlternative (current);\r
+                               group.AppendExpression (alternation);\r
+                       }\r
+                       else\r
+                               group.AppendExpression (current);\r
+               }\r
+\r
+               private Expression ParseGroupingConstruct (ref RegexOptions options) {\r
+                       if (pattern[ptr] != '?') {\r
+                               Group group;\r
+\r
+                               if (IsExplicitCapture (options))\r
+                                       group = new Group ();\r
+                               else {\r
+                                       group = new CapturingGroup ();\r
+                                       caps.Add (group);\r
+                               }\r
+\r
+                               ParseGroup (group, options, null);\r
+                               return group;\r
+                       }\r
+                       else\r
+                               ++ ptr;\r
+\r
+                       switch (pattern[ptr]) {\r
+                       case ':': {                                             // non-capturing group\r
+                               ++ ptr;\r
+                               Group group = new Group ();\r
+                               ParseGroup (group, options, null);\r
+\r
+                               return group;\r
+                       }\r
+\r
+                       case '>': {                                             // non-backtracking group\r
+                               ++ ptr;\r
+                               Group group = new NonBacktrackingGroup ();\r
+                               ParseGroup (group, options, null);\r
+                               \r
+                               return group;\r
+                       }\r
+\r
+                       case 'i': case 'm': case 'n':\r
+                       case 's': case 'x': case '-': {                         // options\r
+                               RegexOptions o = options;\r
+                               ParseOptions (ref o, false);\r
+                               if (pattern[ptr] == '-') {\r
+                                       ++ ptr;\r
+                                       ParseOptions (ref o, true);\r
+                               }\r
+\r
+                               if (pattern[ptr] == ':') {                      // pass options to child group\r
+                                       ++ ptr;\r
+                                       Group group = new Group ();\r
+                                       ParseGroup (group, o, null);\r
+                                       return group;\r
+                               }\r
+                               else if (pattern[ptr] == ')') {                 // change options of enclosing group\r
+                                       ++ ptr;\r
+                                       options = o;\r
+                                       return null;\r
+                               }\r
+                               else\r
+                                       throw NewParseException ("Bad options");\r
+                       }\r
+\r
+                       case '<': case '=': case '!': {                         // lookahead/lookbehind\r
+                               ExpressionAssertion asn = new ExpressionAssertion ();\r
+                               if (!ParseAssertionType (asn))\r
+                                       goto case '\'';                         // it's a (?<name> ) construct\r
+\r
+                               Group test = new Group ();\r
+                               ParseGroup (test, options, null);\r
+\r
+                               asn.TestExpression = test;\r
+                               return asn;\r
+                       }\r
+\r
+                       case '\'': {                                            // named/balancing group\r
+                               char delim;\r
+                               if (pattern[ptr] == '<')\r
+                                       delim = '>';\r
+                               else\r
+                                       delim = '\'';\r
+\r
+                               ++ ptr;\r
+                               string name = ParseName ();\r
+\r
+                               if (pattern[ptr] == delim) {\r
+                                       // capturing group\r
+\r
+                                       if (name == null)\r
+                                               throw NewParseException ("Bad group name.");\r
+\r
+                                       ++ ptr;\r
+                                       CapturingGroup cap = new CapturingGroup ();\r
+                                       cap.Name = name;\r
+                                       caps.Add (cap);\r
+                                       ParseGroup (cap, options, null);\r
+\r
+                                       return cap;\r
+                               }\r
+                               else if (pattern[ptr] == '-') {\r
+                                       // balancing group\r
+\r
+                                       ++ ptr;\r
+                                       string balance_name = ParseName ();\r
+                                       if (balance_name == null || pattern[ptr] != delim)\r
+                                               throw NewParseException ("Bad balancing group name.");\r
+\r
+                                       ++ ptr;\r
+                                       BalancingGroup bal = new BalancingGroup ();\r
+                                       bal.Name = name;\r
+                                       caps.Add (bal);\r
+                                       refs.Add (bal, balance_name);\r
+\r
+                                       return bal;\r
+                               }\r
+                               else\r
+                                       throw NewParseException ("Bad group name.");\r
+                       }\r
+\r
+                       case '(': {                                             // expression/capture test\r
+                               Assertion asn;\r
+                       \r
+                               ++ ptr;\r
+                               int p = ptr;\r
+                               string name = ParseName ();\r
+                               if (name == null || pattern[ptr] != ')') {      // expression test\r
+                                       // FIXME MS implementation doesn't seem to\r
+                                       // implement this version of (?(x) ...)\r
+\r
+                                       ptr = p;\r
+                                       ExpressionAssertion expr_asn = new ExpressionAssertion ();\r
+\r
+                                       if (pattern[ptr] == '?') {\r
+                                               ++ ptr;\r
+                                               if (!ParseAssertionType (expr_asn))\r
+                                                       throw NewParseException ("Bad conditional.");\r
+                                       }\r
+                                       else {\r
+                                               expr_asn.Negate = false;\r
+                                               expr_asn.Reverse = false;\r
+                                       }\r
+\r
+                                       Group test = new Group ();\r
+                                       ParseGroup (test, options, null);\r
+                                       expr_asn.TestExpression = test;\r
+                                       asn = expr_asn;\r
+                               }\r
+                               else {                                          // capture test\r
+                                       ++ ptr;\r
+                                       asn = new CaptureAssertion ();\r
+                                       refs.Add (asn, name);\r
+                               }\r
+\r
+                               Group group = new Group ();\r
+                               ParseGroup (group, options, asn);\r
+                               return group;\r
+                       }\r
+\r
+                       case '#': {                                             // comment\r
+                               ++ ptr;\r
+                               while (pattern[ptr ++] != ')') {\r
+                                       if (ptr >= pattern.Length)\r
+                                               throw NewParseException ("Unterminated (?#...) comment.");\r
+                               }\r
+                               return null;\r
+                       }\r
+\r
+                       default:                                                // error\r
+                               throw NewParseException ("Bad grouping construct.");\r
+                       }\r
+               }\r
+\r
+               private bool ParseAssertionType (ExpressionAssertion assertion) {\r
+                       if (pattern[ptr] == '<') {\r
+                               switch (pattern[ptr + 1]) {\r
+                               case '=':\r
+                                       assertion.Negate = false;\r
+                                       break;\r
+                               case '!':\r
+                                       assertion.Negate = true;\r
+                                       break;\r
+                               default:\r
+                                       return false;\r
+                               }\r
+\r
+                               assertion.Reverse = true;\r
+                               ptr += 2;\r
+                       }\r
+                       else {\r
+                               switch (pattern[ptr]) {\r
+                               case '=':\r
+                                       assertion.Negate = false;\r
+                                       break;\r
+                               case '!':\r
+                                       assertion.Negate = true;\r
+                                       break;\r
+                               default:\r
+                                       return false;\r
+                               }\r
+\r
+                               assertion.Reverse = false;\r
+                               ptr += 1;\r
+                       }\r
+\r
+                       return true;\r
+               }\r
+\r
+               private void ParseOptions (ref RegexOptions options, bool negate) {\r
+                       for (;;) {\r
+                               switch (pattern[ptr]) {\r
+                               case 'i':\r
+                                       if (negate)\r
+                                               options &= ~RegexOptions.IgnoreCase;\r
+                                       else\r
+                                               options |= RegexOptions.IgnoreCase;\r
+                                       break;\r
+\r
+                               case 'm':\r
+                                       if (negate)\r
+                                               options &= ~RegexOptions.Multiline;\r
+                                       else\r
+                                               options |= RegexOptions.Multiline;\r
+                                       break;\r
+                                       \r
+                               case 'n':\r
+                                       if (negate)\r
+                                               options &= ~RegexOptions.ExplicitCapture;\r
+                                       else\r
+                                               options |= RegexOptions.ExplicitCapture;\r
+                                       break;\r
+                                       \r
+                               case 's':\r
+                                       if (negate)\r
+                                               options &= ~RegexOptions.Singleline;\r
+                                       else\r
+                                               options |= RegexOptions.Singleline;\r
+                                       break;\r
+                                       \r
+                               case 'x':\r
+                                       if (negate)\r
+                                               options &= ~RegexOptions.IgnorePatternWhitespace;\r
+                                       else\r
+                                               options |= RegexOptions.IgnorePatternWhitespace;\r
+                                       break;\r
+\r
+                               default:\r
+                                       return;\r
+                               }\r
+\r
+                               ++ ptr;\r
+                       }\r
+               }\r
+\r
+               private Expression ParseCharacterClass (RegexOptions options) {\r
+                       bool negate;\r
+                       if (pattern[ptr] == '^') {\r
+                               negate = true;\r
+                               ++ ptr;\r
+                       }\r
+                       else\r
+                               negate = false;\r
+                       \r
+                       CharacterClass cls = new CharacterClass (negate, IsIgnoreCase (options));\r
+\r
+                       if (pattern[ptr] == ']') {\r
+                               cls.AddCharacter (']');\r
+                               ++ ptr;\r
+                       }\r
+\r
+                       int c = -1;\r
+                       int last = -1;\r
+                       bool range = false;\r
+                       bool closed = false;\r
+                       while (ptr < pattern.Length) {\r
+                               c = pattern[ptr ++];\r
+\r
+                               if (c == ']') {\r
+                                       closed = true;\r
+                                       break;\r
+                               }\r
+                               \r
+                               if (c == '-') {\r
+                                       range = true;\r
+                                       continue;\r
+                               }\r
+\r
+                               if (c == '\\') {\r
+                                       c = ParseEscape ();\r
+                                       if (c < 0) {\r
+                                               // didn't recognize escape\r
+\r
+                                               c = pattern[ptr ++];\r
+                                               switch (c) {\r
+                                               case 'b': c = '\b'; break;\r
+\r
+                                               case 'd': cls.AddCategory (Category.Digit, false); last = -1; continue;\r
+                                               case 'w': cls.AddCategory (Category.Word, false); last = -1; continue;\r
+                                               case 's': cls.AddCategory (Category.WhiteSpace, false); last = -1; continue;\r
+                                               case 'p': cls.AddCategory (ParseUnicodeCategory (), true); last = -1; continue;\r
+                                               case 'D': cls.AddCategory (Category.Digit, true); last = -1; continue;\r
+                                               case 'W': cls.AddCategory (Category.Word, true); last = -1; continue;\r
+                                               case 'S': cls.AddCategory (Category.WhiteSpace, true); last = -1; continue;\r
+                                               case 'P': cls.AddCategory (ParseUnicodeCategory (), true); last = -1; continue;\r
+\r
+                                               default: break;         // add escaped character\r
+                                               }\r
+                                       }\r
+                               }\r
+\r
+                               if (range) {\r
+                                       if (c < last)\r
+                                               throw NewParseException ("[x-y] range in reverse order.");\r
+\r
+                                       if (last >=0 )\r
+                                               cls.AddRange ((char)last, (char)c);\r
+                                       else {\r
+                                               cls.AddCharacter ((char)c);\r
+                                               cls.AddCharacter ('-');\r
+                                       }\r
+\r
+                                       range = false;\r
+                                       last = -1;\r
+                               }\r
+                               else {\r
+                                       cls.AddCharacter ((char)c);\r
+                                       last = c;\r
+                               }\r
+                       }\r
+\r
+                       if (!closed)\r
+                               throw NewParseException ("Unterminated [] set.");\r
+\r
+                       if (range)\r
+                               cls.AddCharacter ('-');\r
+\r
+                       return cls;\r
+               }\r
+\r
+               private void ParseRepetitionBounds (out int min, out int max, RegexOptions options) {\r
+                       int n, m;\r
+\r
+                       /* check syntax */\r
+\r
+                       ConsumeWhitespace (IsIgnorePatternWhitespace (options));\r
+                       n = ParseNumber (10, 1, 0);\r
+                       if (n < 0)\r
+                               throw NewParseException ("Illegal {x,y} - bad value of x.");\r
+\r
+                       ConsumeWhitespace (IsIgnorePatternWhitespace (options));\r
+                       switch (pattern[ptr ++]) {\r
+                       case '}':\r
+                               m = n;\r
+                               break;\r
+                       case ',':\r
+                               ConsumeWhitespace (IsIgnorePatternWhitespace (options));\r
+                               m = ParseNumber (10, 1, 0);\r
+                               ConsumeWhitespace (IsIgnorePatternWhitespace (options));\r
+                               if (pattern[ptr ++] != '}')\r
+                                       throw NewParseException ("Illegal {x,y} - bad value of y.");\r
+                               break;\r
+                       default:\r
+                               throw NewParseException ("Illegal {x,y}");\r
+                       }\r
+\r
+                       /* check bounds and ordering */\r
+\r
+                       if (n >= 0xffff || m >= 0xffff)\r
+                               throw NewParseException ("Illegal {x, y} - maximum of 65535.");\r
+                       if (m >= 0 && m < n)\r
+                               throw NewParseException ("Illegal {x, y} with x > y.");\r
+\r
+                       /* assign min and max */\r
+                       \r
+                       min = n;\r
+                       if (m > 0)\r
+                               max = m;\r
+                       else\r
+                               max = 0xffff;\r
+               }\r
+\r
+               private Category ParseUnicodeCategory () {\r
+                       if (pattern[ptr ++] != '{')\r
+                               throw NewParseException ("Incomplete \\p{X} character escape.");\r
+\r
+                       string name = ParseName (pattern, ref ptr);\r
+                       if (name == null)\r
+                               throw NewParseException ("Incomplete \\p{X} character escape.");\r
+\r
+                       Category cat = CategoryUtils.CategoryFromName (name);\r
+                       if (cat == Category.None)\r
+                               throw NewParseException ("Unknown property '" + name + "'.");\r
+\r
+                       if (pattern[ptr ++] != '}')\r
+                               throw NewParseException ("Incomplete \\p{X} character escape.");\r
+\r
+                       return cat;\r
+               }\r
+\r
+               private Expression ParseSpecial (RegexOptions options) {\r
+                       int p = ptr;\r
+                       Expression expr = null;\r
+                       \r
+                       switch (pattern[ptr ++]) {\r
+\r
+                       // categories\r
+\r
+                       case 'd': expr = new CharacterClass (Category.Digit, false); break;\r
+                       case 'w': expr = new CharacterClass (Category.Word, false); break;\r
+                       case 's': expr = new CharacterClass (Category.WhiteSpace, false); break;\r
+                       case 'D': expr = new CharacterClass (Category.Digit, true); break;\r
+                       case 'W': expr = new CharacterClass (Category.Word, true); break;\r
+                       case 'S': expr = new CharacterClass (Category.WhiteSpace, true); break;\r
+                       case 'p': expr = new CharacterClass (ParseUnicodeCategory (), true); break;\r
+                       case 'P': expr = new CharacterClass (ParseUnicodeCategory (), false); break;\r
+\r
+                       // positions\r
+\r
+                       case 'A': expr = new PositionAssertion (Position.StartOfString); break;\r
+                       case 'Z': expr = new PositionAssertion (Position.End); break;\r
+                       case 'z': expr = new PositionAssertion (Position.EndOfString); break;\r
+                       case 'G': expr = new PositionAssertion (Position.StartOfScan); break;\r
+                       case 'b': expr = new PositionAssertion (Position.Boundary); break;\r
+                       case 'B': expr = new PositionAssertion (Position.NonBoundary); break;\r
+                       \r
+                       // references\r
+\r
+                       case '1': case '2': case '3': case '4': case '5':\r
+                       case '6': case '7': case '8': case '9': {\r
+                               ptr --;\r
+                               int n = ParseNumber (10, 1, 0);\r
+                               if (n < 0) {\r
+                                       ptr = p;\r
+                                       return null;\r
+                               }\r
+\r
+                               // FIXME test if number is within number of assigned groups\r
+                               // this may present a problem for right-to-left matching\r
+\r
+                               Reference reference = new Reference (IsIgnoreCase (options));\r
+                               refs.Add (reference, n.ToString ());\r
+                               expr = reference;\r
+                               break;\r
+                       }\r
+\r
+                       case 'k': {\r
+                               char delim = pattern[ptr ++];\r
+                               if (delim == '<')\r
+                                       delim = '>';\r
+                               else if (delim != '\'')\r
+                                       throw NewParseException ("Malformed \\k<...> named backreference.");\r
+\r
+                               string name = ParseName ();\r
+                               if (name == null || pattern[ptr] != delim)\r
+                                       throw NewParseException ("Malformed \\k<...> named backreference.");\r
+\r
+                               ++ ptr;\r
+                               Reference reference = new Reference (IsIgnoreCase (options));\r
+                               refs.Add (reference, name);\r
+                               expr = reference;\r
+                               break;\r
+                       }\r
+\r
+                       default:\r
+                               expr = null;\r
+                               break;\r
+                       }\r
+\r
+                       if (expr == null)\r
+                               ptr = p;\r
+\r
+                       return expr;\r
+               }\r
+\r
+               private int ParseEscape () {\r
+                       int p = ptr;\r
+                       int c;\r
+\r
+                       switch (pattern[ptr ++]) {\r
+       \r
+                       // standard escapes (except \b)\r
+\r
+                       case 'a': return '\u0007';\r
+                       case 't': return '\u0009';\r
+                       case 'r': return '\u000d';\r
+                       case 'v': return '\u000b';\r
+                       case 'f': return '\u000c';\r
+                       case 'n': return '\u000a';\r
+                       case 'e': return '\u001b';\r
+                       case '\\': return '\\';\r
+\r
+                       // character codes\r
+\r
+                       case '0': return ParseOctal (pattern, ref ptr);\r
+\r
+                       case 'x':\r
+                               c = ParseHex (pattern, ref ptr, 2);\r
+                               if (c < 0)\r
+                                       throw NewParseException ("Insufficient hex digits");\r
+\r
+                               return c;\r
+\r
+                       case 'u':\r
+                               c = ParseHex (pattern, ref ptr, 4);\r
+                               if (c < 0)\r
+                                       throw NewParseException ("Insufficient hex digits");\r
+                               \r
+                               return c;\r
+\r
+                       // control characters\r
+\r
+                       case 'c':\r
+                               c = pattern[p ++];\r
+                               if (c >= 'A' && c <= 'Z')\r
+                                       return c - 'A';\r
+                               else if (c >= '@' && c <= '_')\r
+                                       return c - '@';\r
+                               else\r
+                                       throw NewParseException ("Unrecognized control character.");\r
+\r
+                       // unknown escape\r
+\r
+                       default:\r
+                               ptr = p;\r
+                               return -1;\r
+                       }\r
+               }\r
+\r
+               private string ParseName () {\r
+                       return Parser.ParseName (pattern, ref ptr);\r
+               }\r
+\r
+               private static bool IsNameChar (char c) {\r
+                       UnicodeCategory cat = Char.GetUnicodeCategory (c);\r
+                       if (cat == UnicodeCategory.ModifierLetter)\r
+                               return false;\r
+                       if (cat == UnicodeCategory.ConnectorPunctuation)\r
+                               return true;\r
+                       return Char.IsLetterOrDigit (c);\r
+               }\r
+       \r
+               private int ParseNumber (int b, int min, int max) {\r
+                       return Parser.ParseNumber (pattern, ref ptr, b, min, max);\r
+               }\r
+\r
+               private int ParseDecimal () {\r
+                       return Parser.ParseDecimal (pattern, ref ptr);\r
+               }\r
+\r
+               private static int ParseDigit (char c, int b, int n) {\r
+                       switch (b) {\r
+                       case 8:\r
+                               if (c >= '0' && c <= '7')\r
+                                       return c - '0';\r
+                               else\r
+                                       return -1;\r
+                       case 10:\r
+                               if (c >= '0' && c <= '9')\r
+                                       return c - '0';\r
+                               else\r
+                                       return -1;\r
+                       case 16:\r
+                               if (c >= '0' && c <= '9')\r
+                                       return c - '0';\r
+                               else if (c >= 'a' && c <= 'f')\r
+                                       return 10 + c - 'a';\r
+                               else if (c >= 'A' && c <= 'F')\r
+                                       return 10 + c - 'A';\r
+                               else\r
+                                       return -1;\r
+                       default:\r
+                               return -1;\r
+                       }\r
+               }\r
+\r
+               private void ConsumeWhitespace (bool ignore) {\r
+                       while (true) {\r
+                               if (ptr >= pattern.Length)\r
+                                       break;\r
+                       \r
+                               if (pattern[ptr] == '(') {\r
+                                       if (ptr + 3 >= pattern.Length)\r
+                                               return;\r
+\r
+                                       if (pattern[ptr + 1] != '?' || pattern[ptr + 2] != '#')\r
+                                               return;\r
+\r
+                                       ptr += 3;\r
+                                       while (pattern[ptr ++] != ')')\r
+                                               /* ignore */ ;\r
+                               }\r
+                               else if (ignore && pattern[ptr] == '#') {\r
+                                       while (ptr < pattern.Length && pattern[ptr ++] != '\n')\r
+                                               /* ignore */ ;\r
+                               }\r
+                               else if (ignore && Char.IsWhiteSpace (pattern[ptr])) {\r
+                                       while (ptr < pattern.Length && Char.IsWhiteSpace (pattern[ptr]))\r
+                                               ++ ptr;\r
+                               }\r
+                               else\r
+                                       return;\r
+                       }\r
+               }\r
+\r
+               private string ParseString (string pattern) {\r
+                       this.pattern = pattern;\r
+                       this.ptr = 0;\r
+\r
+                       string result = "";\r
+                       while (ptr < pattern.Length) {\r
+                               int c = pattern[ptr ++];\r
+                               if (c == '\\')\r
+                                       c = ParseEscape ();\r
+                               \r
+                               result += (char)c;\r
+                       }\r
+\r
+                       return result;\r
+               }\r
+\r
+               private void ResolveReferences () {\r
+                       int gid = 1;\r
+                       Hashtable dict = new Hashtable ();\r
+\r
+                       // number unnamed groups\r
+\r
+                       foreach (CapturingGroup group in caps) {\r
+                               if (group.Name == null) {\r
+                                       dict.Add (gid.ToString (), group);\r
+                                       group.Number = gid ++;\r
+\r
+                                       ++ num_groups;\r
+                               }\r
+                       }\r
+\r
+                       // number named groups\r
+\r
+                       foreach (CapturingGroup group in caps) {\r
+                               if (group.Name != null) {\r
+                                       if (!dict.Contains (group.Name)) {\r
+                                               dict.Add (group.Name, group);\r
+                                               group.Number = gid ++;\r
+\r
+                                               ++ num_groups;\r
+                                       }\r
+                                       else {\r
+                                               CapturingGroup prev = (CapturingGroup)dict[group.Name];\r
+                                               group.Number = prev.Number;\r
+                                       }\r
+                               }\r
+                       }\r
+\r
+                       // resolve references\r
+\r
+                       foreach (Expression expr in refs.Keys) {\r
+                               string name = (string)refs[expr];\r
+                               if (!dict.Contains (name)) {\r
+                                       throw NewParseException ("Reference to undefined group " +\r
+                                               (Char.IsDigit (name[0]) ? "number " : "name ") +\r
+                                               name);\r
+                               }\r
+\r
+                               CapturingGroup group = (CapturingGroup)dict[name];\r
+                               if (expr is Reference)\r
+                                       ((Reference)expr).CapturingGroup = group;\r
+                               else if (expr is CaptureAssertion)\r
+                                       ((CaptureAssertion)expr).CapturingGroup = group;\r
+                               else if (expr is BalancingGroup)\r
+                                       ((BalancingGroup)expr).Balance = group;\r
+                       }\r
+               }\r
+\r
+               // flag helper functions\r
+\r
+               private static bool IsIgnoreCase (RegexOptions options) {\r
+                       return (options & RegexOptions.IgnoreCase) != 0;\r
+               }\r
+\r
+               private static bool IsMultiline (RegexOptions options) {\r
+                       return (options & RegexOptions.Multiline) != 0;\r
+               }\r
+\r
+               private static bool IsExplicitCapture (RegexOptions options) {\r
+                       return (options & RegexOptions.ExplicitCapture) != 0;\r
+               }\r
+       \r
+               private static bool IsSingleline (RegexOptions options) {\r
+                       return (options & RegexOptions.Singleline) != 0;\r
+               }\r
+\r
+               private static bool IsIgnorePatternWhitespace (RegexOptions options) {\r
+                       return (options & RegexOptions.IgnorePatternWhitespace) != 0;\r
+               }\r
+\r
+               private static bool IsRightToLeft (RegexOptions options) {\r
+                       return (options & RegexOptions.RightToLeft) != 0;\r
+               }\r
+\r
+               // exception creation\r
+\r
+               private ArgumentException NewParseException (string msg) {\r
+                       msg = "parsing \"" + pattern + "\" - " + msg;\r
+                       return new ArgumentException (msg, pattern);\r
+               }\r
+\r
+               private string pattern;\r
+               private int ptr;\r
+\r
+               private ArrayList caps;\r
+               private Hashtable refs;\r
+               private int num_groups;\r
+       }\r
+}\r
diff --git a/mcs/class/System/System.Text.RegularExpressions/quicksearch.cs b/mcs/class/System/System.Text.RegularExpressions/quicksearch.cs
new file mode 100644 (file)
index 0000000..65665a2
--- /dev/null
@@ -0,0 +1,108 @@
+//\r
+// assembly:   System\r
+// namespace:  System.Text.RegularExpressions\r
+// file:       quicksearch.cs\r
+//\r
+// author:     Dan Lewis (dlewis@gmx.co.uk)\r
+//             (c) 2002\r
+\r
+using System;\r
+\r
+namespace System.Text.RegularExpressions {\r
+\r
+       // TODO use simple test for single character strings\r
+\r
+       class QuickSearch {\r
+               // simplified boyer-moore for fast substring matching\r
+       \r
+               public QuickSearch (string str, bool ignore) {\r
+                       this.str = str;\r
+                       this.len = str.Length;\r
+                       this.ignore = ignore;\r
+               \r
+                       Setup ();\r
+               }\r
+               \r
+               public string String {\r
+                       get { return str; }\r
+               }\r
+\r
+               public int Length {\r
+                       get { return len; }\r
+               }\r
+\r
+               public bool IgnoreCase {\r
+                       get { return ignore; }\r
+               }\r
+\r
+               public int Search (string text, int start, int end) {\r
+                       if (end > text.Length - len)\r
+                               end = text.Length - len;\r
+               \r
+                       int ptr = start;\r
+                       if (!ignore) {\r
+                               while (ptr <= end) {\r
+                                       int i = len - 1;\r
+                                       while (str[i] == text[ptr + i]) {\r
+                                               if (-- i < 0)\r
+                                                       return ptr;\r
+                                       }\r
+\r
+                                       if (ptr < end)\r
+                                               ptr += shift[text[ptr + len]];\r
+                                       else\r
+                                               break;\r
+                               }\r
+                       }\r
+                       else {\r
+                               // ignore case: same as above, but we convert text\r
+                               // to lower case before doing the string compare\r
+                       \r
+                               while (ptr <= end) {\r
+                                       int i = len - 1;\r
+                                       while (str[i] == Char.ToLower (text[ptr + i])) {\r
+                                               if (-- i < 0)\r
+                                                       return ptr;\r
+                                       }\r
+\r
+                                       if (ptr < end)\r
+                                               ptr += shift[text[ptr + len]];\r
+                                       else\r
+                                               break;\r
+                               }\r
+                       }\r
+\r
+                       return -1;\r
+               }\r
+\r
+               // private\r
+\r
+               private void Setup () {\r
+                       if (ignore)\r
+                               str = str.ToLower ();\r
+\r
+                       // this is a 64k entry shift table. that's 128kb per pattern!\r
+                       // is it worth compressing this by only storing shifts within\r
+                       // a (lo, hi) character range? for most substrings this would\r
+                       // be around 50 bytes...\r
+\r
+                       shift = new int[0x1000];\r
+                       for (int i = 0; i < 0x1000; ++ i)\r
+                               shift[i] = len + 1;\r
+\r
+                       for (int i = 0; i < len; ++ i) {\r
+                               char c = str[i];\r
+\r
+                               shift[c] = len - i;\r
+                               if (ignore)\r
+                                       shift[Char.ToUpper (c)] = len - i;\r
+                       }\r
+               }\r
+\r
+               private string str;\r
+               private int len;\r
+               private bool ignore;\r
+\r
+               private int[] shift;\r
+       }\r
+}\r
diff --git a/mcs/class/System/System.Text.RegularExpressions/regex.cs b/mcs/class/System/System.Text.RegularExpressions/regex.cs
new file mode 100644 (file)
index 0000000..df08365
--- /dev/null
@@ -0,0 +1,386 @@
+//\r
+// assembly:   System\r
+// namespace:  System.Text.RegularExpressions\r
+// file:       regex.cs\r
+//\r
+// author:     Dan Lewis (dlewis@gmx.co.uk)\r
+//             (c) 2002\r
+\r
+using System;\r
+using System.Text;\r
+using System.Collections;\r
+using System.Reflection;\r
+using System.Reflection.Emit;\r
+using System.Runtime.Serialization;\r
+\r
+using RegularExpression = System.Text.RegularExpressions.Syntax.RegularExpression;\r
+using Parser = System.Text.RegularExpressions.Syntax.Parser;\r
+\r
+namespace System.Text.RegularExpressions {\r
+       \r
+       public delegate string MatchEvaluator (Match match);\r
+\r
+       [Flags]\r
+       public enum RegexOptions {\r
+               None                            = 0x000,\r
+               IgnoreCase                      = 0x001,\r
+               Multiline                       = 0x002,\r
+               ExplicitCapture                 = 0x004,\r
+               Compiled                        = 0x008,\r
+               Singleline                      = 0x010,\r
+               IgnorePatternWhitespace         = 0x020,\r
+               RightToLeft                     = 0x040,\r
+               ECMAScript                      = 0x100\r
+       }\r
+       \r
+       public class Regex : ISerializable {\r
+               public static void CompileToAssembly\r
+                       (RegexCompilationInfo[] regexes, AssemblyName aname)\r
+               {\r
+                       throw new Exception ("Not implemented.");\r
+               }\r
+\r
+               public static void CompileToAssembly\r
+                       (RegexCompilationInfo[] regexes, AssemblyName aname,\r
+                        CustomAttributeBuilder[] attribs)\r
+               {\r
+                       throw new Exception ("Not implemented.");\r
+               }\r
+\r
+               public static void CompileToAssembly\r
+                       (RegexCompilationInfo[] regexes, AssemblyName aname,\r
+                        CustomAttributeBuilder[] attribs, string resourceFile)\r
+               {\r
+                       throw new Exception ("Not implemented.");\r
+               }\r
+               \r
+               public static string Escape (string str) {\r
+                       return Parser.Escape (str);\r
+               }\r
+\r
+               public static string Unescape (string str) {\r
+                       return Parser.Unescape (str);\r
+               }\r
+\r
+               public static bool IsMatch (string input, string pattern) {\r
+                       Regex re = new Regex (pattern, RegexOptions.None);\r
+                       return re.IsMatch (input);\r
+               }\r
+\r
+               public static Match Match (string input, string pattern) {\r
+                       return Regex.Match (input, pattern, RegexOptions.None);\r
+               }\r
+\r
+               public static Match Match (string input, string pattern, RegexOptions options) {\r
+                       Regex re = new Regex (pattern, options);\r
+                       return re.Match (input);\r
+               }\r
+\r
+               public static string Replace\r
+                       (string input, string pattern, MatchEvaluator evaluator)\r
+               {\r
+                       return Regex.Replace (input, pattern, evaluator, RegexOptions.None);\r
+               }\r
+\r
+               public static string Replace\r
+                       (string input, string pattern, MatchEvaluator evaluator,\r
+                        RegexOptions options)\r
+               {\r
+                       Regex re = new Regex (pattern, options);\r
+                       return re.Replace (input, evaluator);\r
+               }\r
+\r
+               public static string Replace\r
+                       (string input, string pattern, string replacement)\r
+               {\r
+                       return Regex.Replace (input, pattern, replacement, RegexOptions.None);\r
+               }\r
+\r
+               public static string Replace\r
+                       (string input, string pattern, string replacement,\r
+                        RegexOptions options)\r
+               {\r
+                       Regex re = new Regex (pattern, options);\r
+                       return re.Replace (input, pattern);\r
+               }\r
+\r
+               public static string[] Split (string input, string pattern) {\r
+                       return Regex.Split (input, pattern, RegexOptions.None);\r
+               }\r
+\r
+               public static string[] Split (string input, string pattern, RegexOptions options) {\r
+                       Regex re = new Regex (input, options);\r
+                       return re.Split (input);\r
+               }\r
+\r
+               // private\r
+\r
+               private static FactoryCache cache = new FactoryCache (200);     // TODO put some meaningful number here\r
+\r
+               // constructors\r
+\r
+               protected Regex () {\r
+                       // XXX what's this constructor for?\r
+               }\r
+\r
+               public Regex (string pattern) : this (pattern, RegexOptions.None) {\r
+               }\r
+\r
+               public Regex (string pattern, RegexOptions options) {\r
+                       this.pattern = pattern;\r
+                       this.options = options;\r
+               \r
+                       this.factory = cache.Lookup (pattern, options);\r
+\r
+                       if (this.factory == null) {\r
+                               // parse and install group mapping\r
+\r
+                               Parser psr = new Parser ();\r
+                               RegularExpression re = psr.ParseRegularExpression (pattern, options);\r
+                               this.group_count = re.GroupCount;\r
+                               this.mapping = psr.GetMapping ();\r
+\r
+                               // compile\r
+                               \r
+                               ICompiler cmp;\r
+                               if ((options & RegexOptions.Compiled) != 0)\r
+                                       throw new Exception ("Not implemented.");\r
+                                       //cmp = new CILCompiler ();\r
+                               else\r
+                                       cmp = new PatternCompiler ();\r
+\r
+                               re.Compile (cmp, RightToLeft);\r
+\r
+                               // install machine factory and add to pattern cache\r
+\r
+                               this.factory = cmp.GetMachineFactory ();\r
+                               cache.Add (pattern, options, this.factory);\r
+                       }\r
+               }\r
+\r
+               // public instance properties\r
+               \r
+               public RegexOptions Options {\r
+                       get { return options; }\r
+               }\r
+\r
+               public bool RightToLeft {\r
+                       get { return (options & RegexOptions.RightToLeft) != 0; }\r
+               }\r
+\r
+               // public instance methods\r
+               \r
+               public string[] GetGroupNames () {\r
+                       string[] names = new string[mapping.Count];\r
+                       mapping.Keys.CopyTo (names, 0);\r
+\r
+                       return names;\r
+               }\r
+\r
+               public int[] GetGroupNumbers () {\r
+                       int[] numbers = new int[mapping.Count];\r
+                       mapping.Values.CopyTo (numbers, 0);\r
+\r
+                       return numbers;\r
+               }\r
+\r
+               public string GroupNameFromNumber (int i) {\r
+                       if (i >= group_count)\r
+                               return "";\r
+               \r
+                       foreach (string name in mapping.Keys) {\r
+                               if ((int)mapping[name] == i)\r
+                                       return name;\r
+                       }\r
+\r
+                       return "";\r
+               }\r
+\r
+               public int GroupNumberFromName (string name) {\r
+                       if (mapping.Contains (name))\r
+                               return (int)mapping[name];\r
+\r
+                       return -1;\r
+               }\r
+\r
+               // match methods\r
+               \r
+               public bool IsMatch (string input) {\r
+                       return IsMatch (input, 0);\r
+               }\r
+\r
+               public bool IsMatch (string input, int startat) {\r
+                       return Match (input, startat).Success;\r
+               }\r
+\r
+               public Match Match (string input) {\r
+                       return Match (input, 0);\r
+               }\r
+\r
+               public Match Match (string input, int startat) {\r
+                       return CreateMachine ().Scan (this, input, startat, input.Length);\r
+               }\r
+\r
+               public Match Match (string input, int startat, int length) {\r
+                       return CreateMachine ().Scan (this, input, startat, startat + length);\r
+               }\r
+\r
+               public MatchCollection Matches (string input) {\r
+                       return Matches (input, 0);\r
+               }\r
+\r
+               public MatchCollection Matches (string input, int startat) {\r
+                       MatchCollection ms = new MatchCollection ();\r
+                       Match m = Match (input, startat);\r
+                       while (m.Success) {\r
+                               ms.Add (m);\r
+                               m = m.NextMatch ();\r
+                       }\r
+\r
+                       return ms;\r
+               }\r
+\r
+               // replace methods\r
+\r
+               public string Replace (string input, MatchEvaluator evaluator) {\r
+                       return Replace (input, evaluator, Int32.MaxValue, 0);\r
+               }\r
+\r
+               public string Replace (string input, MatchEvaluator evaluator, int count) {\r
+                       return Replace (input, evaluator, count, 0);\r
+               }\r
+\r
+               public string Replace (string input, MatchEvaluator evaluator, int count, int startat)\r
+               {\r
+                       StringBuilder result = new StringBuilder ();\r
+                       int ptr = startat;\r
+\r
+                       Match m = Match (input, startat);\r
+                       while (m.Success && count -- > 0) {\r
+                               result.Append (input.Substring (ptr, m.Index - ptr));\r
+                               result.Append (evaluator (m));\r
+\r
+                               ptr = m.Index + m.Length;\r
+                               m = m.NextMatch ();\r
+                       }\r
+                       result.Append (input.Substring (ptr));\r
+\r
+                       return result.ToString ();\r
+               }\r
+\r
+               public string Replace (string input, string replacement) {\r
+                       return Replace (input, replacement, Int32.MaxValue, 0);\r
+               }\r
+\r
+               public string Replace (string input, string replacement, int count) {\r
+                       return Replace (input, replacement, count, 0);\r
+               }\r
+\r
+               public string Replace (string input, string replacement, int count, int startat) {\r
+                       ReplacementEvaluator ev = new ReplacementEvaluator (this, replacement);\r
+                       return Replace (input, new MatchEvaluator (ev.Evaluate), count, startat);\r
+               }\r
+\r
+               // split methods\r
+\r
+               public string[] Split (string input) {\r
+                       return Split (input, Int32.MaxValue, 0);\r
+               }\r
+\r
+               public string[] Split (string input, int count) {\r
+                       return Split (input, count, 0);\r
+               }\r
+\r
+               public string[] Split (string input, int count, int startat) {\r
+                       ArrayList splits = new ArrayList ();\r
+                       if (count == 0)\r
+                               count = Int32.MaxValue;\r
+\r
+                       int ptr = startat;\r
+                       Match m = Match (input, startat);\r
+                       while (count -- > 0 && m.Success) {\r
+                               splits.Add (input.Substring (ptr, m.Index - ptr));\r
+                               ptr = m.Index + m.Length;\r
+                       }\r
+\r
+                       if (count > 0)\r
+                               splits.Add (input.Substring (ptr));\r
+\r
+                       string[] result = new string[splits.Count];\r
+                       splits.CopyTo (result);\r
+                       return result;\r
+               }\r
+\r
+               // object methods\r
+               \r
+               public override string ToString () {\r
+                       return pattern;\r
+               }\r
+\r
+               // ISerializable interface\r
+\r
+               public void GetObjectData (SerializationInfo info, StreamingContext context) {\r
+                       throw new Exception ("Not implemented.");\r
+               }\r
+\r
+               // internal\r
+\r
+               internal int GroupCount {\r
+                       get { return group_count; }\r
+               }\r
+\r
+               // private\r
+\r
+               private IMachine CreateMachine () {\r
+                       return factory.NewInstance ();\r
+               }\r
+\r
+               private string pattern;\r
+               private RegexOptions options;\r
+\r
+               private IMachineFactory factory;\r
+               private IDictionary mapping;\r
+               private int group_count;\r
+       }\r
+\r
+       public class RegexCompilationInfo {\r
+               public RegexCompilationInfo (string pattern, RegexOptions options, string name, string full_namespace, bool is_public) {\r
+                       this.pattern = pattern;\r
+                       this.options = options;\r
+                       this.name = name;\r
+                       this.full_namespace = full_namespace;\r
+                       this.is_public = is_public;\r
+               }\r
+\r
+               public bool IsPublic {\r
+                       get { return is_public; }\r
+                       set { is_public = value; }\r
+               }\r
+\r
+               public string Name {\r
+                       get { return name; }\r
+                       set { name = value; }\r
+               }\r
+\r
+               public string Namespace {\r
+                       get { return full_namespace; }\r
+                       set { full_namespace = value; }\r
+               }\r
+\r
+               public RegexOptions Options {\r
+                       get { return options; }\r
+                       set { options = value; }\r
+               }\r
+\r
+               public string Pattern {\r
+                       get { return pattern; }\r
+                       set { pattern = value; }\r
+               }\r
+\r
+               // private\r
+\r
+               private string pattern, name, full_namespace;\r
+               private RegexOptions options;\r
+               private bool is_public;\r
+       }\r
+}\r
diff --git a/mcs/class/System/System.Text.RegularExpressions/replace.cs b/mcs/class/System/System.Text.RegularExpressions/replace.cs
new file mode 100644 (file)
index 0000000..88b99f3
--- /dev/null
@@ -0,0 +1,181 @@
+//\r
+// assembly:   System\r
+// namespace:  System.Text.RegularExpressions\r
+// file:       replace.cs\r
+//\r
+// author:     Dan Lewis (dlewis@gmx.co.uk)\r
+//             (c) 2002\r
+\r
+using System;\r
+using System.Text;\r
+using System.Collections;\r
+\r
+using Parser = System.Text.RegularExpressions.Syntax.Parser;\r
+\r
+namespace System.Text.RegularExpressions {\r
+\r
+       class ReplacementEvaluator {\r
+               public static string Evaluate (string replacement, Match match) {\r
+                       ReplacementEvaluator ev = new ReplacementEvaluator (match.Regex, replacement);\r
+                       return ev.Evaluate (match);\r
+               }\r
+\r
+               public ReplacementEvaluator (Regex regex, string replacement) {\r
+                       this.regex = regex;\r
+                       terms = new ArrayList ();\r
+                       Compile (replacement);\r
+               }\r
+\r
+               public string Evaluate (Match match) {\r
+                       StringBuilder result = new StringBuilder ();\r
+                       foreach (Term term in terms)\r
+                               result.Append (term.GetResult (match));\r
+\r
+                       return result.ToString ();\r
+               }\r
+\r
+               // private\r
+\r
+               private void Compile (string replacement) {\r
+                       replacement = Parser.Unescape (replacement);\r
+                       string literal = "";\r
+\r
+                       int ptr = 0;\r
+                       char c;\r
+                       Term term = null;\r
+                       while (ptr < replacement.Length) {\r
+                               c = replacement[ptr ++];\r
+\r
+                               if (c == '$') {\r
+                                       if (replacement[ptr] == '$') {\r
+                                               ++ ptr;\r
+                                               break;\r
+                                       }\r
+\r
+                                       term = CompileTerm (replacement, ref ptr);\r
+                               }\r
+\r
+                               if (term != null) {\r
+                                       term.Literal = literal;\r
+                                       terms.Add (term);\r
+\r
+                                       term = null;\r
+                                       literal = "";\r
+                               }\r
+                               else\r
+                                       literal += c;\r
+                       }\r
+\r
+                       if (term == null && literal.Length > 0) {\r
+                               terms.Add (new Term (literal));\r
+                       }\r
+               }\r
+\r
+               private Term CompileTerm (string str, ref int ptr) {\r
+                       char c = str[ptr];\r
+\r
+                       if (Char.IsDigit (c)) {         // numbered group\r
+                               int n = Parser.ParseDecimal (str, ref ptr);\r
+                               if (n < 0 || n > regex.GroupCount)\r
+                                       throw new ArgumentException ("Bad group number.");\r
+                               \r
+                               return new Term (TermOp.Match, n);\r
+                       }\r
+                       \r
+                       ++ ptr;\r
+\r
+                       switch (c) {\r
+                       case '{': {                     // named group\r
+                               string name = Parser.ParseName (str, ref ptr);\r
+                               if (str[ptr ++] != '}' || name == null)\r
+                                       throw new ArgumentException ("Bad group name.");\r
+                               \r
+                               int n = regex.GroupNumberFromName (name);\r
+                               \r
+                               if (n < 0)\r
+                                       throw new ArgumentException ("Bad group name.");\r
+\r
+                               return new Term (TermOp.Match, n);\r
+                       }\r
+\r
+                       case '&':                       // entire match\r
+                               return new Term (TermOp.Match, 0);\r
+\r
+                       case '`':                       // text before match\r
+                               return new Term (TermOp.PreMatch, 0);\r
+\r
+                       case '\'':                      // text after match\r
+                               return new Term (TermOp.PostMatch, 0);\r
+\r
+                       case '+':                       // last group\r
+                               return new Term (TermOp.Match, regex.GroupCount - 1);\r
+\r
+                       case '_':                       // entire text\r
+                               return new Term (TermOp.All, 0);\r
+\r
+                       default:\r
+                               throw new ArgumentException ("Bad replacement pattern.");\r
+                       }\r
+               }\r
+\r
+               private Regex regex;\r
+               private ArrayList terms;\r
+\r
+               private enum TermOp {\r
+                       None,                           // no action\r
+                       Match,                          // input within group\r
+                       PreMatch,                       // input before group\r
+                       PostMatch,                      // input after group\r
+                       All                             // entire input\r
+               }\r
+\r
+               private class Term {\r
+                       public Term (TermOp op, int arg) {\r
+                               this.op = op;\r
+                               this.arg = arg;\r
+                               this.literal = "";\r
+                       }\r
+\r
+                       public Term (string literal) {\r
+                               this.op = TermOp.None;\r
+                               this.arg = 0;\r
+                               this.literal = literal;\r
+                       }\r
+\r
+                       public string Literal {\r
+                               set { literal = value; }\r
+                       }\r
+\r
+                       public string GetResult (Match match) {\r
+                               Group group = match.Groups[arg];\r
+                       \r
+                               switch (op) {\r
+                               case TermOp.None:\r
+                                       return literal;\r
+\r
+                               case TermOp.Match:\r
+                                       return literal + group.Value;\r
+\r
+                               case TermOp.PreMatch:\r
+                                       return literal + group.Text.Substring (0, group.Index);\r
+\r
+                               case TermOp.PostMatch:\r
+                                       return literal + group.Text.Substring (group.Index + group.Length);\r
+\r
+                               case TermOp.All:\r
+                                       return literal + group.Text;\r
+                               }\r
+\r
+                               return "";\r
+                       }\r
+               \r
+                       public TermOp op;               // term type\r
+                       public int arg;                 // group argument\r
+                       public string literal;          // literal to prepend\r
+\r
+                       public override string ToString () {\r
+                               return op.ToString () + "(" + arg + ") " + literal;\r
+                       }\r
+               }\r
+       }\r
+}\r
diff --git a/mcs/class/System/System.Text.RegularExpressions/syntax.cs b/mcs/class/System/System.Text.RegularExpressions/syntax.cs
new file mode 100644 (file)
index 0000000..f0d0bc4
--- /dev/null
@@ -0,0 +1,976 @@
+//\r
+// assembly:   System\r
+// namespace:  System.Text.RegularExpressions\r
+// file:       syntax.cs\r
+//\r
+// author:     Dan Lewis (dlewis@gmx.co.uk)\r
+//             (c) 2002\r
+\r
+using System;\r
+using System.Collections;\r
+\r
+namespace System.Text.RegularExpressions.Syntax {\r
+       // collection classes\r
+       \r
+       class ExpressionCollection : CollectionBase {\r
+               public void Add (Expression e) {\r
+                       List.Add (e);\r
+               }\r
+\r
+               public Expression this[int i] {\r
+                       get { return (Expression)List[i]; }\r
+                       set { List[i] = value; }\r
+               }\r
+\r
+               protected override void OnValidate (object o) {\r
+                       // allow null elements\r
+               }\r
+       }\r
+\r
+       // abstract classes\r
+       \r
+       abstract class Expression {\r
+               public abstract void Compile (ICompiler cmp, bool reverse);\r
+               public abstract void GetWidth (out int min, out int max);\r
+\r
+               public int GetFixedWidth () {\r
+                       int min, max;\r
+                       GetWidth (out min, out max);\r
+\r
+                       if (min == max)\r
+                               return min;\r
+\r
+                       return -1;\r
+               }\r
+\r
+               public virtual AnchorInfo GetAnchorInfo () {\r
+                       return new AnchorInfo (this, GetFixedWidth ());\r
+               }\r
+\r
+               public virtual bool IsComplex () {\r
+                       return true;\r
+               }\r
+       }\r
+\r
+       // composite expressions\r
+       \r
+       abstract class CompositeExpression : Expression {\r
+               public CompositeExpression () {\r
+                       expressions = new ExpressionCollection ();\r
+               }\r
+\r
+               protected ExpressionCollection Expressions {\r
+                       get { return expressions; }\r
+               }\r
+\r
+               protected void GetWidth (out int min, out int max, int count) {\r
+                       min = Int32.MaxValue;\r
+                       max = 0;\r
+                       bool empty = true;\r
+\r
+                       for (int i = 0; i < count; ++ i) {\r
+                               Expression e = Expressions[i];\r
+                               if (e == null)\r
+                                       continue;\r
+                       \r
+                               empty = false;\r
+                               int a, b;\r
+                               e.GetWidth (out a, out b);\r
+                               if (a < min) min = a;\r
+                               if (b > max) max = b;\r
+                       }\r
+\r
+                       if (empty)\r
+                               min = max = 0;\r
+               }\r
+\r
+               private ExpressionCollection expressions;\r
+       }\r
+\r
+       // groups\r
+       \r
+       class Group : CompositeExpression {\r
+               public Group () {\r
+               }\r
+\r
+               public Expression Expression {\r
+                       get { return Expressions[0]; }\r
+                       set { Expressions[0] = value; }\r
+               }\r
+\r
+               public void AppendExpression (Expression e) {\r
+                       Expressions.Add (e);\r
+               }\r
+\r
+               public override void Compile (ICompiler cmp, bool reverse) {\r
+                       int count = Expressions.Count;\r
+                       for (int i = 0; i < count; ++ i) {\r
+                               Expression e;\r
+                               if (reverse)\r
+                                       e = Expressions[count - i - 1];\r
+                               else\r
+                                       e = Expressions[i];\r
+\r
+                               e.Compile (cmp, reverse);\r
+                       }\r
+               }\r
+\r
+               public override void GetWidth (out int min, out int max) {\r
+                       min = 0;\r
+                       max = 0;\r
+\r
+                       foreach (Expression e in Expressions) {\r
+                               int a, b;\r
+                               e.GetWidth (out a, out b);\r
+                               min += a;\r
+                               if (max == Int32.MaxValue || b == Int32.MaxValue)\r
+                                       max = Int32.MaxValue;\r
+                               else\r
+                                       max += b;\r
+                       }\r
+               }\r
+\r
+               public override AnchorInfo GetAnchorInfo () {\r
+                       int ptr;\r
+                       int width = GetFixedWidth ();\r
+\r
+                       ArrayList infos = new ArrayList ();\r
+                       IntervalCollection segments = new IntervalCollection ();\r
+\r
+                       // accumulate segments\r
+\r
+                       ptr = 0;\r
+                       foreach (Expression e in Expressions) {\r
+                               AnchorInfo info = e.GetAnchorInfo ();\r
+                               infos.Add (info);\r
+\r
+                               if (info.IsPosition)\r
+                                       return new AnchorInfo (this, ptr + info.Offset, width, info.Position);\r
+\r
+                               if (info.IsSubstring)\r
+                                       segments.Add (info.GetInterval (ptr));\r
+\r
+                               if (info.IsUnknownWidth)\r
+                                       break;\r
+\r
+                               ptr += info.Width;\r
+                       }\r
+\r
+                       // normalize and find the longest segment\r
+\r
+                       segments.Normalize ();\r
+\r
+                       Interval longest = Interval.Empty;\r
+                       foreach (Interval segment in segments) {\r
+                               if (segment.Size > longest.Size)\r
+                                       longest = segment;\r
+                       }\r
+\r
+                       // now chain the substrings that made this segment together\r
+\r
+                       if (!longest.IsEmpty) {\r
+                               string str = "";\r
+                               bool ignore = false;\r
+\r
+                               ptr = 0;\r
+                               foreach (AnchorInfo info in infos) {\r
+                                       if (info.IsSubstring && longest.Contains (info.GetInterval (ptr))) {\r
+                                               str += info.Substring;  // TODO mark subexpressions\r
+                                               ignore |= info.IgnoreCase;\r
+                                       }\r
+\r
+                                       if (info.IsUnknownWidth)\r
+                                               break;\r
+\r
+                                       ptr += info.Width;\r
+                               }\r
+\r
+                               return new AnchorInfo (this, longest.low, width, str, ignore);\r
+                       }\r
+\r
+                       return new AnchorInfo (this, width);\r
+               }\r
+\r
+               public override bool IsComplex () {\r
+                       bool comp = false;\r
+                       foreach (Expression e in Expressions) {\r
+                               comp |= e.IsComplex ();\r
+                       }\r
+\r
+                       return comp | GetFixedWidth () <= 0;\r
+               }\r
+       }\r
+\r
+       class RegularExpression : Group {\r
+               public RegularExpression () {\r
+                       group_count = 0;\r
+               }\r
+\r
+               public int GroupCount {\r
+                       get { return group_count; }\r
+                       set { group_count = value; }\r
+               }\r
+\r
+               public override void Compile (ICompiler cmp, bool reverse) {\r
+                       // info block\r
+\r
+                       int min, max;\r
+                       GetWidth (out min, out max);\r
+                       cmp.EmitInfo (group_count, min, max);\r
+\r
+                       // anchoring expression\r
+\r
+                       AnchorInfo info = GetAnchorInfo ();\r
+                       if (reverse)\r
+                               info = new AnchorInfo (this, GetFixedWidth ()); // FIXME\r
+\r
+                       LinkRef pattern = cmp.NewLink ();\r
+                       cmp.EmitAnchor (info.Offset, pattern);\r
+\r
+                       if (info.IsPosition)\r
+                               cmp.EmitPosition (info.Position);\r
+                       else if (info.IsSubstring)\r
+                               cmp.EmitString (info.Substring, info.IgnoreCase, reverse);\r
+                       \r
+                       cmp.EmitTrue ();\r
+                       \r
+                       // pattern\r
+\r
+                       cmp.ResolveLink (pattern);\r
+                       base.Compile (cmp, reverse);\r
+                       cmp.EmitTrue ();\r
+               }\r
+\r
+               private int group_count;\r
+       }\r
+\r
+       class CapturingGroup : Group {\r
+               public CapturingGroup () {\r
+                       this.gid = 0;\r
+                       this.name = null;\r
+               }\r
+\r
+               public int Number {\r
+                       get { return gid; }\r
+                       set { gid = value; }\r
+               }\r
+\r
+               public string Name {\r
+                       get { return name; }\r
+                       set { name = value; }\r
+               }\r
+\r
+               public bool IsNamed {\r
+                       get { return name != null; }\r
+               }\r
+\r
+               public override void Compile (ICompiler cmp, bool reverse) {\r
+                       cmp.EmitOpen (gid);\r
+                       base.Compile (cmp, reverse);\r
+                       cmp.EmitClose (gid);\r
+               }\r
+\r
+               public override bool IsComplex () {\r
+                       return true;\r
+               }\r
+\r
+               private int gid;\r
+               private string name;\r
+       }\r
+\r
+       class BalancingGroup : CapturingGroup {\r
+               public BalancingGroup () {\r
+                       this.balance = null;\r
+               }\r
+\r
+               public CapturingGroup Balance {\r
+                       get { return balance; }\r
+                       set { balance = value; }\r
+               }\r
+\r
+               public override void Compile (ICompiler cmp, bool reverse) {\r
+                       // can't invoke Group.Compile from here :(\r
+                       // so I'll just repeat the code\r
+               \r
+                       int count = Expressions.Count;\r
+                       for (int i = 0; i < count; ++ i) {\r
+                               Expression e;\r
+                               if (reverse)\r
+                                       e = Expressions[count - i - 1];\r
+                               else\r
+                                       e = Expressions[i];\r
+\r
+                               e.Compile (cmp, reverse);\r
+                       }\r
+\r
+                       cmp.EmitBalance (this.Number, balance.Number);\r
+               }\r
+\r
+               private CapturingGroup balance;\r
+       }\r
+\r
+       class NonBacktrackingGroup : Group {\r
+               public NonBacktrackingGroup () {\r
+               }\r
+\r
+               public override void Compile (ICompiler cmp, bool reverse) {\r
+                       LinkRef tail = cmp.NewLink ();\r
+\r
+                       cmp.EmitSub (tail);\r
+                       base.Compile (cmp, reverse);\r
+                       cmp.EmitTrue ();\r
+                       cmp.ResolveLink (tail);\r
+               }\r
+\r
+               public override bool IsComplex () {\r
+                       return true;\r
+               }\r
+       }\r
+\r
+       // repetition\r
+\r
+       class Repetition : CompositeExpression {\r
+               public Repetition (int min, int max, bool lazy) {\r
+                       Expressions.Add (null);\r
+                       \r
+                       this.min = min;\r
+                       this.max = max;\r
+                       this.lazy = lazy;\r
+               }\r
+\r
+               public Expression Expression {\r
+                       get { return Expressions[0]; }\r
+                       set { Expressions[0] = value; }\r
+               }\r
+\r
+               public int Minimum {\r
+                       get { return min; }\r
+                       set { min = value; }\r
+               }\r
+\r
+               public int Maximum {\r
+                       get { return max; }\r
+                       set { max = value; }\r
+               }\r
+\r
+               public bool Lazy {\r
+                       get { return lazy; }\r
+                       set { lazy = value; }\r
+               }\r
+\r
+               public override void Compile (ICompiler cmp, bool reverse) {\r
+                       if (Expression.IsComplex ()) {\r
+                               LinkRef until = cmp.NewLink ();\r
+                               \r
+                               cmp.EmitRepeat (min, max, lazy, until);\r
+                               Expression.Compile (cmp, reverse);\r
+                               cmp.EmitUntil (until);\r
+                       }\r
+                       else {\r
+                               LinkRef tail = cmp.NewLink ();\r
+\r
+                               cmp.EmitFastRepeat (min, max, lazy, tail);\r
+                               Expression.Compile (cmp, reverse);\r
+                               cmp.EmitTrue ();\r
+                               cmp.ResolveLink (tail);\r
+                       }\r
+               }\r
+\r
+               public override void GetWidth (out int min, out int max) {\r
+                       Expression.GetWidth (out min, out max);\r
+                       min = min * this.min;\r
+                       if (max == Int32.MaxValue || this.max == 0xffff)\r
+                               max = Int32.MaxValue;\r
+                       else\r
+                               max = max * this.max;\r
+               }\r
+\r
+               public override AnchorInfo GetAnchorInfo () {\r
+                       int width = GetFixedWidth ();\r
+                       if (Minimum == 0)\r
+                               return new AnchorInfo (this, width);\r
+                       \r
+                       AnchorInfo info = Expression.GetAnchorInfo ();\r
+                       if (info.IsPosition)\r
+                               return new AnchorInfo (this, info.Offset, width, info.Position);\r
+                       \r
+                       if (info.IsSubstring) {\r
+                               if (info.IsComplete) {\r
+                                       string str = "";\r
+                                       for (int i = 0; i < Minimum; ++ i)\r
+                                               str += info.Substring;\r
+\r
+                                       return new AnchorInfo (this, 0, width, str, info.IgnoreCase);\r
+                               }\r
+\r
+                               return new AnchorInfo (this, info.Offset, width, info.Substring, info.IgnoreCase);\r
+                       }\r
+\r
+                       return new AnchorInfo (this, width);\r
+               }\r
+\r
+               private int min, max;\r
+               private bool lazy;\r
+       }\r
+\r
+       // assertions\r
+\r
+       abstract class Assertion : CompositeExpression {\r
+               public Assertion () {\r
+                       Expressions.Add (null);         // true expression\r
+                       Expressions.Add (null);         // false expression\r
+               }\r
+\r
+               public Expression TrueExpression {\r
+                       get { return Expressions[0]; }\r
+                       set { Expressions[0] = value; }\r
+               }\r
+\r
+               public Expression FalseExpression {\r
+                       get { return Expressions[1]; }\r
+                       set { Expressions[1] = value; }\r
+               }\r
+\r
+               public override void GetWidth (out int min, out int max) {\r
+                       GetWidth (out min, out max, 2);\r
+\r
+                       if (TrueExpression == null || FalseExpression == null)\r
+                               min = 0;\r
+               }\r
+       }\r
+\r
+       class CaptureAssertion : Assertion {\r
+               public CaptureAssertion () {\r
+               }\r
+\r
+               public CapturingGroup CapturingGroup {\r
+                       get { return group; }\r
+                       set { group = value; }\r
+               }\r
+\r
+               public override void Compile (ICompiler cmp, bool reverse) {\r
+                       int gid = group.Number;\r
+                       LinkRef tail = cmp.NewLink ();\r
+\r
+                       if (FalseExpression == null) {\r
+                               //    IfDefined :1\r
+                               //      <yes_exp>\r
+                               // 1: <tail>\r
+                       \r
+                               cmp.EmitIfDefined (gid, tail);\r
+                               TrueExpression.Compile (cmp, reverse);\r
+                       }\r
+                       else {\r
+                               //    IfDefined :1\r
+                               //      <yes_expr>\r
+                               //      Jump :2\r
+                               // 1:   <no_expr>\r
+                               // 2: <tail>\r
+                       \r
+                               LinkRef false_expr = cmp.NewLink ();\r
+                               cmp.EmitIfDefined (gid, false_expr);\r
+                               TrueExpression.Compile (cmp, reverse);\r
+                               cmp.EmitJump (tail);\r
+                               cmp.ResolveLink (false_expr);\r
+                               FalseExpression.Compile (cmp, reverse);\r
+                       }\r
+\r
+                       cmp.ResolveLink (tail);\r
+               }\r
+\r
+               public override bool IsComplex () {\r
+                       bool comp = false;\r
+                       if (TrueExpression != null)\r
+                               comp |= TrueExpression.IsComplex ();\r
+                       if (FalseExpression != null)\r
+                               comp |= FalseExpression.IsComplex ();\r
+\r
+                       return comp | GetFixedWidth () <= 0;\r
+               }\r
+\r
+               private CapturingGroup group;\r
+       }\r
+\r
+       class ExpressionAssertion : Assertion {\r
+               public ExpressionAssertion () {\r
+                       Expressions.Add (null);         // test expression\r
+               }\r
+\r
+               public bool Reverse {\r
+                       get { return reverse; }\r
+                       set { reverse = value; }\r
+               }\r
+\r
+               public bool Negate {\r
+                       get { return negate; }\r
+                       set { negate = value; }\r
+               }\r
+\r
+               public Expression TestExpression {\r
+                       get { return Expressions[2]; }\r
+                       set { Expressions[2] = value; }\r
+               }\r
+\r
+               public override void Compile (ICompiler cmp, bool reverse) {\r
+                       LinkRef true_expr = cmp.NewLink ();\r
+                       LinkRef false_expr = cmp.NewLink ();\r
+\r
+                       // test op: positive / negative\r
+\r
+                       if (!negate)\r
+                               cmp.EmitTest (true_expr, false_expr);\r
+                       else\r
+                               cmp.EmitTest (false_expr, true_expr);\r
+                       \r
+                       // test expression: lookahead / lookbehind\r
+\r
+                       TestExpression.Compile (cmp, reverse ^ this.reverse);\r
+                       cmp.EmitTrue ();\r
+\r
+                       // target expressions\r
+\r
+                       if (TrueExpression == null) {                   // (?= ...)\r
+                               //    Test :1, :2\r
+                               //      <test_expr>\r
+                               // :2   False\r
+                               // :1   <tail>\r
+                       \r
+                               cmp.ResolveLink (false_expr);\r
+                               cmp.EmitFalse ();\r
+                               cmp.ResolveLink (true_expr);\r
+                       }\r
+                       else {\r
+                               cmp.ResolveLink (true_expr);\r
+                               TrueExpression.Compile (cmp, reverse);\r
+                               \r
+                               if (FalseExpression == null) {          // (?(...) ...)\r
+                                       //    Test :1, :2\r
+                                       //      <test_expr>\r
+                                       // :1   <yes_expr>\r
+                                       // :2   <tail>\r
+\r
+                                       cmp.ResolveLink (false_expr);\r
+                               }\r
+                               else {                                  // (?(...) ... | ...)\r
+                                       //    Test :1, :2\r
+                                       //      <test_expr>\r
+                                       // :1   <yes_expr>\r
+                                       //      Jump :3\r
+                                       // :2   <no_expr>\r
+                                       // :3   <tail>\r
+                               \r
+                                       LinkRef tail = cmp.NewLink ();\r
+                               \r
+                                       cmp.EmitJump (tail);\r
+                                       cmp.ResolveLink (false_expr);\r
+                                       FalseExpression.Compile (cmp, reverse);\r
+                                       cmp.ResolveLink (tail);\r
+                               }\r
+                       }\r
+               }\r
+\r
+               private bool reverse, negate;\r
+       }\r
+\r
+       // alternation\r
+\r
+       class Alternation : CompositeExpression {\r
+               public Alternation () {\r
+               }\r
+\r
+               public ExpressionCollection Alternatives {\r
+                       get { return Expressions; }\r
+               }\r
+\r
+               public void AddAlternative (Expression e) {\r
+                       Alternatives.Add (e);\r
+               }\r
+\r
+               public override void Compile (ICompiler cmp, bool reverse) {\r
+                       LinkRef next = cmp.NewLink ();\r
+                       LinkRef tail = cmp.NewLink ();\r
+               \r
+                       foreach (Expression e in Alternatives) {\r
+                               cmp.EmitBranch (next);\r
+                               e.Compile (cmp, reverse);\r
+                               cmp.EmitJump (tail);\r
+                               cmp.ResolveLink (next);\r
+                       }\r
+\r
+                       cmp.EmitFalse ();\r
+                       cmp.ResolveLink (tail);\r
+               }\r
+\r
+               public override void GetWidth (out int min, out int max) {\r
+                       GetWidth (out min, out max, Alternatives.Count);\r
+               }\r
+\r
+               public override bool IsComplex () {\r
+                       bool comp = false;\r
+                       foreach (Expression e in Alternatives) {\r
+                               comp |= e.IsComplex ();\r
+                       }\r
+\r
+                       return comp | GetFixedWidth () <= 0;\r
+               }\r
+       }\r
+\r
+       // terminal expressions\r
+\r
+       class Literal : Expression {\r
+               public Literal (string str, bool ignore) {\r
+                       this.str = str;\r
+                       this.ignore = ignore;\r
+               }\r
+\r
+               public string String {\r
+                       get { return str; }\r
+                       set { str = value; }\r
+               }\r
+\r
+               public bool IgnoreCase {\r
+                       get { return ignore; }\r
+                       set { ignore = value; }\r
+               }\r
+\r
+               public override void Compile (ICompiler cmp, bool reverse) {\r
+                       if (str.Length == 0)\r
+                               return;\r
+\r
+                       if (str.Length == 1)\r
+                               cmp.EmitCharacter (str[0], false, ignore, reverse);\r
+                       else\r
+                               cmp.EmitString (str, ignore, reverse);\r
+               }\r
+\r
+               public override void GetWidth (out int min, out int max) {\r
+                       min = max = str.Length;\r
+               }\r
+\r
+               public override AnchorInfo GetAnchorInfo () {\r
+                       return new AnchorInfo (this, 0, str.Length, str, ignore);\r
+               }\r
+\r
+               public override bool IsComplex () {\r
+                       return false;\r
+               }\r
+\r
+               private string str;\r
+               private bool ignore;\r
+       }\r
+\r
+       class PositionAssertion : Expression {\r
+               public PositionAssertion (Position pos) {\r
+                       this.pos = pos;\r
+               }\r
+\r
+               public Position Position {\r
+                       get { return pos; }\r
+                       set { pos = value; }\r
+               }\r
+\r
+               public override void Compile (ICompiler cmp, bool reverse) {\r
+                       cmp.EmitPosition (pos);\r
+               }\r
+\r
+               public override void GetWidth (out int min, out int max) {\r
+                       min = max = 0;\r
+               }\r
+\r
+               public override bool IsComplex () {\r
+                       return false;\r
+               }\r
+\r
+               public override AnchorInfo GetAnchorInfo () {\r
+                       switch (pos) {\r
+                       case Position.StartOfString: case Position.StartOfLine: case Position.StartOfScan:\r
+                               return new AnchorInfo (this, 0, 0, pos);\r
+\r
+                       default:\r
+                               return new AnchorInfo (this, 0);\r
+                       }\r
+               }\r
+\r
+               private Position pos;\r
+       }\r
+\r
+       class Reference : Expression {\r
+               public Reference (bool ignore) {\r
+                       this.ignore = ignore;\r
+               }\r
+\r
+               public CapturingGroup CapturingGroup {\r
+                       get { return group; }\r
+                       set { group = value; }\r
+               }\r
+\r
+               public bool IgnoreCase {\r
+                       get { return ignore; }\r
+                       set { ignore = value; }\r
+               }\r
+\r
+               public override void Compile (ICompiler cmp, bool reverse) {\r
+                       cmp.EmitReference (group.Number, ignore, reverse);\r
+               }\r
+\r
+               public override void GetWidth (out int min, out int max) {\r
+                       //group.GetWidth (out min, out max);\r
+                       // TODO set width to referenced group for non-cyclical references\r
+                       min = 0;\r
+                       max = Int32.MaxValue;\r
+               }\r
+\r
+               public override bool IsComplex () {\r
+                       return true;    // FIXME incorporate cyclic check\r
+               }\r
+\r
+               private CapturingGroup group;\r
+               private bool ignore;\r
+       }\r
+\r
+       class CharacterClass : Expression {\r
+               public CharacterClass (bool negate, bool ignore) {\r
+                       this.negate = negate;\r
+                       this.ignore = ignore;\r
+\r
+                       intervals = new IntervalCollection ();\r
+\r
+                       // initialize pos/neg category arrays\r
+\r
+                       Array cat_values = Enum.GetValues (typeof (Category));\r
+                       int cat_size = (int)(Category)cat_values.GetValue (cat_values.Length - 1) + 1;\r
+                       pos_cats = new bool[cat_size];\r
+                       neg_cats = new bool[cat_size];\r
+                       for (int i = 0; i < cat_size; ++ i) {\r
+                               pos_cats[i] = false;\r
+                               neg_cats[i] = false;\r
+                       }\r
+               }\r
+\r
+               public CharacterClass (Category cat, bool negate) : this (false, false) {\r
+                       this.AddCategory (cat, negate);\r
+               }\r
+\r
+               public bool Negate {\r
+                       get { return negate; }\r
+                       set { negate = value; }\r
+               }\r
+\r
+               public bool IgnoreCase {\r
+                       get { return ignore; }\r
+                       set { ignore = value; }\r
+               }\r
+\r
+               public void AddCategory (Category cat, bool negate) {\r
+                       int n = (int)cat;\r
+                       \r
+                       if (negate) {\r
+                               if (pos_cats[n])\r
+                                       pos_cats[n] = false;\r
+\r
+                               neg_cats[n] = true;\r
+                       }\r
+                       else {\r
+                               if (neg_cats[n])\r
+                                       neg_cats[n] = false;\r
+\r
+                               pos_cats[n] = true;\r
+                       }\r
+               }\r
+\r
+               public void AddCharacter (char c) {\r
+                       intervals.Add (new Interval (c, c));\r
+               }\r
+\r
+               public void AddRange (char lo, char hi) {\r
+                       intervals.Add (new Interval (lo, hi));\r
+               }\r
+\r
+               public override void Compile (ICompiler cmp, bool reverse) {\r
+                       // create the meta-collection\r
+\r
+                       IntervalCollection meta =\r
+                               intervals.GetMetaCollection (new IntervalCollection.CostDelegate (GetIntervalCost));\r
+\r
+                       // count ops\r
+                       \r
+                       int count = meta.Count;\r
+                       for (int i = 0; i < pos_cats.Length; ++ i) {\r
+                               if (pos_cats[i]) ++ count;\r
+                               if (neg_cats[i]) ++ count;\r
+                       }\r
+\r
+                       if (count == 0)\r
+                               return;\r
+\r
+                       // emit in op for |meta| > 1\r
+\r
+                       LinkRef tail = cmp.NewLink ();\r
+                       if (count > 1)\r
+                               cmp.EmitIn (tail);\r
+\r
+                       // emit categories\r
+\r
+                       for (int i = 0; i < pos_cats.Length; ++ i) {\r
+                               if (pos_cats[i])\r
+                                       cmp.EmitCategory ((Category)i, negate, reverse);\r
+                               else if (neg_cats[i])\r
+                                       cmp.EmitCategory ((Category)i, !negate, reverse);\r
+                       }\r
+\r
+                       // emit character/range/sets from meta-collection\r
+\r
+                       foreach (Interval a in meta) {\r
+                               if (a.IsDiscontiguous) {                        // Set\r
+                                       BitArray bits = new BitArray (a.Size);\r
+                                       foreach (Interval b in intervals) {\r
+                                               if (a.Contains (b)) {\r
+                                                       for (int i = b.low; i <= b.high; ++ i)\r
+                                                               bits[i - a.low] = true;\r
+                                               }\r
+                                       }\r
+\r
+                                       cmp.EmitSet ((char)a.low, bits, negate, ignore, reverse);\r
+                               }\r
+                               else if (a.IsSingleton)                         // Character\r
+                                       cmp.EmitCharacter ((char)a.low, negate, ignore, reverse);\r
+                               else                                            // Range\r
+                                       cmp.EmitRange ((char)a.low, (char)a.high, negate, ignore, reverse);\r
+                       }\r
+                       \r
+                       // finish up\r
+\r
+                       if (count > 1) {\r
+                               if (negate)\r
+                                       cmp.EmitTrue ();\r
+                               else\r
+                                       cmp.EmitFalse ();\r
+\r
+                               cmp.ResolveLink (tail);\r
+                       }\r
+               }\r
+\r
+               public override void GetWidth (out int min, out int max) {\r
+                       min = max = 1;\r
+               }\r
+\r
+               public override bool IsComplex () {\r
+                       return false;\r
+               }\r
+\r
+               // private\r
+\r
+               private static double GetIntervalCost (Interval i) {\r
+                       // use op length as cost metric (=> optimize for space)\r
+               \r
+                       if (i.IsDiscontiguous)\r
+                               return 3 + ((i.Size + 0xf) >> 4);               // Set\r
+                       else if (i.IsSingleton)\r
+                               return 2;                                       // Character\r
+                       else\r
+                               return 3;                                       // Range\r
+               }\r
+\r
+               private bool negate, ignore;\r
+               private bool[] pos_cats, neg_cats;\r
+               private IntervalCollection intervals;\r
+       }\r
+\r
+       class AnchorInfo {\r
+               private Expression expr;\r
+\r
+               private Position pos;\r
+               private int offset;\r
+\r
+               private string str;\r
+               private int width;\r
+               private bool ignore;\r
+\r
+               public AnchorInfo (Expression expr, int width) {\r
+                       this.expr = expr;\r
+                       this.offset = 0;\r
+                       this.width = width;\r
+\r
+                       this.str = null;\r
+                       this.ignore = false;\r
+                       this.pos = Position.Any;\r
+               }\r
+               \r
+               public AnchorInfo (Expression expr, int offset, int width, string str, bool ignore) {\r
+                       this.expr = expr;\r
+                       this.offset = offset;\r
+                       this.width = width;\r
+\r
+                       this.str = ignore ? str.ToLower () : str;\r
+\r
+                       this.ignore = ignore;\r
+                       this.pos = Position.Any;\r
+               }\r
+\r
+               public AnchorInfo (Expression expr, int offset, int width, Position pos) {\r
+                       this.expr = expr;\r
+                       this.offset = offset;\r
+                       this.width = width;\r
+\r
+                       this.pos = pos;\r
+\r
+                       this.str = null;\r
+                       this.ignore = false;\r
+               }\r
+\r
+               public Expression Expression {\r
+                       get { return expr; }\r
+               }\r
+\r
+               public int Offset {\r
+                       get { return offset; }\r
+               }\r
+\r
+               public int Width {\r
+                       get { return width; }\r
+               }\r
+\r
+               public int Length {\r
+                       get { return (str != null) ? str.Length : 0; }\r
+               }\r
+\r
+               public bool IsUnknownWidth {\r
+                       get { return width < 0; }\r
+               }\r
+\r
+               public bool IsComplete {\r
+                       get { return Length == Width; }\r
+               }\r
+\r
+               public string Substring {\r
+                       get { return str; }\r
+               }\r
+\r
+               public bool IgnoreCase {\r
+                       get { return ignore; }\r
+               }\r
+\r
+               public Position Position {\r
+                       get { return pos; }\r
+               }\r
+\r
+               public bool IsSubstring {\r
+                       get { return str != null; }\r
+               }\r
+\r
+               public bool IsPosition {\r
+                       get { return pos != Position.Any; }\r
+               }\r
+\r
+               public Interval GetInterval () {\r
+                       return GetInterval (0);\r
+               }\r
+\r
+               public Interval GetInterval (int start) {\r
+                       if (!IsSubstring)\r
+                               return Interval.Empty;\r
+\r
+                       return new Interval (start + Offset, start + Offset + Length - 1);\r
+               }\r
+       }\r
+}\r