Bug 10670 fix.
[mono.git] / mcs / class / System / System.Text.RegularExpressions / parser.cs
index 3543b4b09312acd87bd8b5960e28ed03c31a0aa0..f3ddf196730bb8879cbdc0dd8fc5a82829c91486 100644 (file)
-//\r
-// assembly:   System\r
-// namespace:  System.Text.RegularExpressions\r
-// file:       parser.cs\r
-//\r
-// author:     Dan Lewis (dlewis@gmx.co.uk)\r
-//             (c) 2002\r
-\r
-using System;\r
-using System.Collections;\r
-using System.Globalization;\r
-\r
-namespace System.Text.RegularExpressions.Syntax {\r
-\r
-       class Parser {\r
-               public static int ParseDecimal (string str, ref int ptr) {\r
-                       return ParseNumber (str, ref ptr, 10, 1, Int32.MaxValue);\r
-               }\r
-\r
-               public static int ParseOctal (string str, ref int ptr) {\r
-                       return ParseNumber (str, ref ptr, 8, 1, 3);\r
-               }\r
-\r
-               public static int ParseHex (string str, ref int ptr, int digits) {\r
-                       return ParseNumber (str, ref ptr, 16, digits, digits);\r
-               }\r
-\r
-               public static int ParseNumber (string str, ref int ptr, int b, int min, int max) {\r
-                       int p = ptr, n = 0, digits = 0, d;\r
-                       if (max < min)\r
-                               max = Int32.MaxValue;\r
-\r
-                       while (digits < max && p < str.Length) {\r
-                               d = ParseDigit (str[p ++], b, digits);\r
-                               if (d < 0) {\r
-                                       -- p;\r
-                                       break;\r
-                               }\r
-\r
-                               n = n * b + d;\r
-                               ++ digits;\r
-                       }\r
-\r
-                       if (digits < min)\r
-                               return -1;\r
-\r
-                       ptr = p;\r
-                       return n;\r
-               }\r
-\r
-               public static string ParseName (string str, ref int ptr) {\r
-                       if (Char.IsDigit (str[ptr])) {\r
-                               int gid = ParseNumber (str, ref ptr, 10, 1, 0);\r
-                               if (gid > 0)\r
-                                       return gid.ToString ();\r
-                               \r
-                               return null;\r
-                       }\r
-\r
-                       int start = ptr;\r
-                       for (;;) {\r
-                               if (!IsNameChar (str[ptr]))\r
-                                       break;\r
-                               ++ ptr;\r
-                       }\r
-\r
-                       if (ptr - start > 0)\r
-                               return str.Substring (start, ptr - start);\r
-\r
-                       return null;\r
-               }\r
-\r
-               public static string Escape (string str) {\r
-                       string result = "";\r
-                       for (int i = 0; i < str.Length; ++ i) {\r
-                               char c = str[i];\r
-                               switch (c) {\r
-                               case '\\': case '*': case '+': case '?': case '|':\r
-                               case '{': case '[': case '(': case ')': case '^':\r
-                               case '$': case '.': case '#': case ' ':\r
-                                       result += "\\" + c;\r
-                                       break;\r
-\r
-                               case '\t': result += "\\t"; break;\r
-                               case '\n': result += "\\n"; break;\r
-                               case '\r': result += "\\r"; break;\r
-                               case '\f': result += "\\f"; break;\r
-\r
-                               default: result += c; break;\r
-                               }\r
-                       }\r
-\r
-                       return result;\r
-               }\r
-\r
-               public static string Unescape (string str) {\r
-                       return new Parser ().ParseString (str);\r
-               }\r
-\r
-               // public instance\r
-\r
-               public Parser () {\r
-                       this.caps = new ArrayList ();\r
-                       this.refs = new Hashtable ();\r
-               }\r
-\r
-               public RegularExpression ParseRegularExpression (string pattern, RegexOptions options) {\r
-                       this.pattern = pattern;\r
-                       this.ptr = 0;\r
-\r
-                       caps.Clear ();\r
-                       refs.Clear ();\r
-                       this.num_groups = 0;\r
-\r
-                       try {\r
-                               RegularExpression re = new RegularExpression ();\r
-                               ParseGroup (re, options, null);\r
-                               ResolveReferences ();\r
-\r
-                               re.GroupCount = num_groups;\r
-                               \r
-                               return re;\r
-                       }\r
-                       catch (IndexOutOfRangeException) {\r
-                               throw NewParseException ("Unexpected end of pattern.");\r
-                       }\r
-               }\r
-\r
-               public IDictionary GetMapping () {\r
-                       Hashtable mapping = new Hashtable ();\r
-                       int end = caps.Count;\r
-                       mapping.Add ("0", 0);\r
-                       for (int i = 0; i < end;) {\r
-                               CapturingGroup group = (CapturingGroup) caps [i];\r
-                               i++;\r
-                               if (group.Name != null && !mapping.Contains (group.Name))\r
-                                       mapping.Add (group.Name, group.Number);\r
-                               else\r
-                                       mapping.Add (i.ToString (), i);\r
-                       }\r
-\r
-                       return mapping;\r
-               }\r
-\r
-               // private methods\r
-\r
-               private void ParseGroup (Group group, RegexOptions options, Assertion assertion) {\r
-                       bool is_top_level = group is RegularExpression;\r
-               \r
-                       Alternation alternation = null;\r
-                       string literal = null;\r
-\r
-                       Group current = new Group ();\r
-                       Expression expr = null;\r
-                       bool closed = false;\r
-\r
-                       while (true) {\r
-                               ConsumeWhitespace (IsIgnorePatternWhitespace (options));\r
-                               if (ptr >= pattern.Length)\r
-                                       break;\r
-                               \r
-                               // (1) Parse for Expressions\r
-                       \r
-                               char ch = pattern[ptr ++];\r
-                               \r
-                               switch (ch) {\r
-                               case '^': {\r
-                                       Position pos =\r
-                                               IsMultiline (options) ? Position.StartOfLine : Position.Start;\r
-                                       expr = new PositionAssertion (pos);\r
-                                       break;\r
-                               }\r
-\r
-                               case '$': {\r
-                                       Position pos =\r
-                                               IsMultiline (options) ? Position.EndOfLine : Position.End;\r
-                                       expr = new PositionAssertion (pos);\r
-                                       break;\r
-                               }\r
-\r
-                               case '.': {\r
-                                       Category cat =\r
-                                               IsSingleline (options) ? Category.AnySingleline : Category.Any;\r
-                                       expr = new CharacterClass (cat, false);\r
-                                       break;\r
-                               }\r
-\r
-                               case '\\': {\r
-                                       int c = ParseEscape ();\r
-                                       if (c >= 0)\r
-                                               ch = (char)c;\r
-                                       else {\r
-                                               expr = ParseSpecial (options);\r
-\r
-                                               if (expr == null)\r
-                                                       ch = pattern[ptr ++];           // default escape\r
-                                       }\r
-                                       break;\r
-                               }\r
-\r
-                               case '[': {\r
-                                       expr = ParseCharacterClass (options);\r
-                                       break;\r
-                               }\r
-\r
-                               case '(': {\r
-                                       bool ignore = IsIgnoreCase (options);\r
-                                       expr = ParseGroupingConstruct (ref options);\r
-                                       if (expr == null) {\r
-                                               if (literal != null && IsIgnoreCase (options) != ignore) {\r
-                                                       current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));\r
-                                                       literal = null;\r
-                                               }\r
-\r
-                                               continue;\r
-                                       }\r
-                                       break;\r
-                               }\r
-\r
-                               case ')': {\r
-                                       closed = true;\r
-                                       goto EndOfGroup;\r
-                               }\r
-\r
-                               case '|': {\r
-                                       if (literal != null) {\r
-                                               current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));\r
-                                               literal = null;\r
-                                       }\r
-\r
-                                       if (assertion != null) {\r
-                                               if (assertion.TrueExpression == null)\r
-                                                       assertion.TrueExpression = current;\r
-                                               else if (assertion.FalseExpression == null)\r
-                                                       assertion.FalseExpression = current;\r
-                                               else\r
-                                                       throw NewParseException ("Too many | in (?()|).");\r
-                                       }\r
-                                       else {\r
-                                               if (alternation == null)\r
-                                                       alternation = new Alternation ();\r
-\r
-                                               alternation.AddAlternative (current);\r
-                                       }\r
-\r
-                                       current = new Group ();\r
-                                       continue;\r
-                               }\r
-\r
-                               case '*': case '+': case '?': {\r
-                                       throw NewParseException ("Bad quantifier.");\r
-                               }\r
-\r
-                               default: \r
-                                       break;          // literal character\r
-                               }\r
-\r
-                               ConsumeWhitespace (IsIgnorePatternWhitespace (options));\r
-                               \r
-                               // (2) Check for Repetitions\r
-                               \r
-                               if (ptr < pattern.Length) {\r
-                                       char k = pattern[ptr];\r
-\r
-                                       if (k == '?' || k == '*' || k == '+' || k == '{') {\r
-                                               ++ ptr;\r
-\r
-                                               int min = 0, max = 0;\r
-                                               bool lazy = false;\r
-\r
-                                               switch (k) {\r
-                                               case '?': min = 0; max = 1; break;\r
-                                               case '*': min = 0; max = 0xffff; break;\r
-                                               case '+': min = 1; max = 0xffff; break;\r
-                                               case '{': ParseRepetitionBounds (out min, out max, options); break;\r
-                                               }\r
-\r
-                                               ConsumeWhitespace (IsIgnorePatternWhitespace (options));\r
-                                               if (ptr < pattern.Length && pattern[ptr] == '?') {\r
-                                                       ++ ptr;\r
-                                                       lazy = true;\r
-                                               }\r
-\r
-                                               Repetition repetition = new Repetition (min, max, lazy);\r
-\r
-                                               if (expr == null)\r
-                                                       repetition.Expression = new Literal (ch.ToString (), IsIgnoreCase (options));\r
-                                               else\r
-                                                       repetition.Expression = expr;\r
-\r
-                                               expr = repetition;\r
-                                       }\r
-                               }\r
-\r
-                               // (3) Append Expression and/or Literal\r
-\r
-                               if (expr == null) {\r
-                                       if (literal == null)\r
-                                               literal = "";\r
-                                       literal += ch;\r
-                               }\r
-                               else {\r
-                                       if (literal != null) {\r
-                                               current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));\r
-                                               literal = null;\r
-                                       }\r
-\r
-                                       current.AppendExpression (expr);\r
-                                       expr = null;\r
-                               }\r
-\r
-                               if (is_top_level && ptr >= pattern.Length)\r
-                                       goto EndOfGroup;\r
-                       }\r
-\r
-               EndOfGroup:\r
-                       if (is_top_level && closed)\r
-                               throw NewParseException ("Too many )'s.");\r
-                       if (!is_top_level && !closed)\r
-                               throw NewParseException ("Not enough )'s.");\r
-                               \r
-               \r
-                       // clean up literals and alternations\r
-\r
-                       if (literal != null)\r
-                               current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));\r
-\r
-                       if (assertion != null) {\r
-                               if (assertion.TrueExpression == null)\r
-                                       assertion.TrueExpression = current;\r
-                               else\r
-                                       assertion.FalseExpression = current;\r
-                               \r
-                               group.AppendExpression (assertion);\r
-                       }\r
-                       else if (alternation != null) {\r
-                               alternation.AddAlternative (current);\r
-                               group.AppendExpression (alternation);\r
-                       }\r
-                       else\r
-                               group.AppendExpression (current);\r
-               }\r
-\r
-               private Expression ParseGroupingConstruct (ref RegexOptions options) {\r
-                       if (pattern[ptr] != '?') {\r
-                               Group group;\r
-\r
-                               if (IsExplicitCapture (options))\r
-                                       group = new Group ();\r
-                               else {\r
-                                       group = new CapturingGroup ();\r
-                                       caps.Add (group);\r
-                               }\r
-\r
-                               ParseGroup (group, options, null);\r
-                               return group;\r
-                       }\r
-                       else\r
-                               ++ ptr;\r
-\r
-                       switch (pattern[ptr]) {\r
-                       case ':': {                                             // non-capturing group\r
-                               ++ ptr;\r
-                               Group group = new Group ();\r
-                               ParseGroup (group, options, null);\r
-\r
-                               return group;\r
-                       }\r
-\r
-                       case '>': {                                             // non-backtracking group\r
-                               ++ ptr;\r
-                               Group group = new NonBacktrackingGroup ();\r
-                               ParseGroup (group, options, null);\r
-                               \r
-                               return group;\r
-                       }\r
-\r
-                       case 'i': case 'm': case 'n':\r
-                       case 's': case 'x': case '-': {                         // options\r
-                               RegexOptions o = options;\r
-                               ParseOptions (ref o, false);\r
-                               if (pattern[ptr] == '-') {\r
-                                       ++ ptr;\r
-                                       ParseOptions (ref o, true);\r
-                               }\r
-\r
-                               if (pattern[ptr] == ':') {                      // pass options to child group\r
-                                       ++ ptr;\r
-                                       Group group = new Group ();\r
-                                       ParseGroup (group, o, null);\r
-                                       return group;\r
-                               }\r
-                               else if (pattern[ptr] == ')') {                 // change options of enclosing group\r
-                                       ++ ptr;\r
-                                       options = o;\r
-                                       return null;\r
-                               }\r
-                               else\r
-                                       throw NewParseException ("Bad options");\r
-                       }\r
-\r
-                       case '<': case '=': case '!': {                         // lookahead/lookbehind\r
-                               ExpressionAssertion asn = new ExpressionAssertion ();\r
-                               if (!ParseAssertionType (asn))\r
-                                       goto case '\'';                         // it's a (?<name> ) construct\r
-\r
-                               Group test = new Group ();\r
-                               ParseGroup (test, options, null);\r
-\r
-                               asn.TestExpression = test;\r
-                               return asn;\r
-                       }\r
-\r
-                       case '\'': {                                            // named/balancing group\r
-                               char delim;\r
-                               if (pattern[ptr] == '<')\r
-                                       delim = '>';\r
-                               else\r
-                                       delim = '\'';\r
-\r
-                               ++ ptr;\r
-                               string name = ParseName ();\r
-\r
-                               if (pattern[ptr] == delim) {\r
-                                       // capturing group\r
-\r
-                                       if (name == null)\r
-                                               throw NewParseException ("Bad group name.");\r
-\r
-                                       ++ ptr;\r
-                                       CapturingGroup cap = new CapturingGroup ();\r
-                                       cap.Name = name;\r
-                                       caps.Add (cap);\r
-                                       ParseGroup (cap, options, null);\r
-\r
-                                       return cap;\r
-                               }\r
-                               else if (pattern[ptr] == '-') {\r
-                                       // balancing group\r
-\r
-                                       ++ ptr;\r
-                                       string balance_name = ParseName ();\r
-                                       if (balance_name == null || pattern[ptr] != delim)\r
-                                               throw NewParseException ("Bad balancing group name.");\r
-\r
-                                       ++ ptr;\r
-                                       BalancingGroup bal = new BalancingGroup ();\r
-                                       bal.Name = name;\r
-                                       caps.Add (bal);\r
-                                       refs.Add (bal, balance_name);\r
-\r
-                                       return bal;\r
-                               }\r
-                               else\r
-                                       throw NewParseException ("Bad group name.");\r
-                       }\r
-\r
-                       case '(': {                                             // expression/capture test\r
-                               Assertion asn;\r
-                       \r
-                               ++ ptr;\r
-                               int p = ptr;\r
-                               string name = ParseName ();\r
-                               if (name == null || pattern[ptr] != ')') {      // expression test\r
-                                       // FIXME MS implementation doesn't seem to\r
-                                       // implement this version of (?(x) ...)\r
-\r
-                                       ptr = p;\r
-                                       ExpressionAssertion expr_asn = new ExpressionAssertion ();\r
-\r
-                                       if (pattern[ptr] == '?') {\r
-                                               ++ ptr;\r
-                                               if (!ParseAssertionType (expr_asn))\r
-                                                       throw NewParseException ("Bad conditional.");\r
-                                       }\r
-                                       else {\r
-                                               expr_asn.Negate = false;\r
-                                               expr_asn.Reverse = false;\r
-                                       }\r
-\r
-                                       Group test = new Group ();\r
-                                       ParseGroup (test, options, null);\r
-                                       expr_asn.TestExpression = test;\r
-                                       asn = expr_asn;\r
-                               }\r
-                               else {                                          // capture test\r
-                                       ++ ptr;\r
-                                       asn = new CaptureAssertion ();\r
-                                       refs.Add (asn, name);\r
-                               }\r
-\r
-                               Group group = new Group ();\r
-                               ParseGroup (group, options, asn);\r
-                               return group;\r
-                       }\r
-\r
-                       case '#': {                                             // comment\r
-                               ++ ptr;\r
-                               while (pattern[ptr ++] != ')') {\r
-                                       if (ptr >= pattern.Length)\r
-                                               throw NewParseException ("Unterminated (?#...) comment.");\r
-                               }\r
-                               return null;\r
-                       }\r
-\r
-                       default:                                                // error\r
-                               throw NewParseException ("Bad grouping construct.");\r
-                       }\r
-               }\r
-\r
-               private bool ParseAssertionType (ExpressionAssertion assertion) {\r
-                       if (pattern[ptr] == '<') {\r
-                               switch (pattern[ptr + 1]) {\r
-                               case '=':\r
-                                       assertion.Negate = false;\r
-                                       break;\r
-                               case '!':\r
-                                       assertion.Negate = true;\r
-                                       break;\r
-                               default:\r
-                                       return false;\r
-                               }\r
-\r
-                               assertion.Reverse = true;\r
-                               ptr += 2;\r
-                       }\r
-                       else {\r
-                               switch (pattern[ptr]) {\r
-                               case '=':\r
-                                       assertion.Negate = false;\r
-                                       break;\r
-                               case '!':\r
-                                       assertion.Negate = true;\r
-                                       break;\r
-                               default:\r
-                                       return false;\r
-                               }\r
-\r
-                               assertion.Reverse = false;\r
-                               ptr += 1;\r
-                       }\r
-\r
-                       return true;\r
-               }\r
-\r
-               private void ParseOptions (ref RegexOptions options, bool negate) {\r
-                       for (;;) {\r
-                               switch (pattern[ptr]) {\r
-                               case 'i':\r
-                                       if (negate)\r
-                                               options &= ~RegexOptions.IgnoreCase;\r
-                                       else\r
-                                               options |= RegexOptions.IgnoreCase;\r
-                                       break;\r
-\r
-                               case 'm':\r
-                                       if (negate)\r
-                                               options &= ~RegexOptions.Multiline;\r
-                                       else\r
-                                               options |= RegexOptions.Multiline;\r
-                                       break;\r
-                                       \r
-                               case 'n':\r
-                                       if (negate)\r
-                                               options &= ~RegexOptions.ExplicitCapture;\r
-                                       else\r
-                                               options |= RegexOptions.ExplicitCapture;\r
-                                       break;\r
-                                       \r
-                               case 's':\r
-                                       if (negate)\r
-                                               options &= ~RegexOptions.Singleline;\r
-                                       else\r
-                                               options |= RegexOptions.Singleline;\r
-                                       break;\r
-                                       \r
-                               case 'x':\r
-                                       if (negate)\r
-                                               options &= ~RegexOptions.IgnorePatternWhitespace;\r
-                                       else\r
-                                               options |= RegexOptions.IgnorePatternWhitespace;\r
-                                       break;\r
-\r
-                               default:\r
-                                       return;\r
-                               }\r
-\r
-                               ++ ptr;\r
-                       }\r
-               }\r
-\r
-               private Expression ParseCharacterClass (RegexOptions options) {\r
-                       bool negate, ecma;\r
-                       if (pattern[ptr] == '^') {\r
-                               negate = true;\r
-                               ++ ptr;\r
-                       }\r
-                       else\r
-                               negate = false;\r
-                       \r
-                       ecma = IsECMAScript (options);\r
-                       CharacterClass cls = new CharacterClass (negate, IsIgnoreCase (options));\r
-\r
-                       if (pattern[ptr] == ']') {\r
-                               cls.AddCharacter (']');\r
-                               ++ ptr;\r
-                       }\r
-\r
-                       int c = -1;\r
-                       int last = -1;\r
-                       bool range = false;\r
-                       bool closed = false;\r
-                       while (ptr < pattern.Length) {\r
-                               c = pattern[ptr ++];\r
-\r
-                               if (c == ']') {\r
-                                       closed = true;\r
-                                       break;\r
-                               }\r
-                               \r
-                               if (c == '-') {\r
-                                       range = true;\r
-                                       continue;\r
-                               }\r
-\r
-                               if (c == '\\') {\r
-                                       c = ParseEscape ();\r
-                                       if (c < 0) {\r
-                                               // didn't recognize escape\r
-\r
-                                               c = pattern[ptr ++];\r
-                                               switch (c) {\r
-                                               case 'b': c = '\b'; break;\r
-\r
-                                               case 'd':\r
-                                                       cls.AddCategory (ecma ? Category.EcmaDigit : Category.Digit, false);\r
-                                                       last = -1;\r
-                                                       continue;\r
-                                                       \r
-                                               case 'w':\r
-                                                       cls.AddCategory (ecma ? Category.EcmaWord : Category.Word, false);\r
-                                                       last = -1;\r
-                                                       continue;\r
-                                                       \r
-                                               case 's':\r
-                                                       cls.AddCategory (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, false);\r
-                                                       last = -1;\r
-                                                       continue;\r
-                                                       \r
-                                               case 'p':\r
-                                                       cls.AddCategory (ParseUnicodeCategory (), false);       // ignore ecma\r
-                                                       last = -1;\r
-                                                       continue;\r
-                                                       \r
-                                               case 'D':\r
-                                                       cls.AddCategory (ecma ? Category.EcmaDigit : Category.Digit, true);\r
-                                                       last = -1;\r
-                                                       continue;\r
-                                                       \r
-                                               case 'W':\r
-                                                       cls.AddCategory (ecma ? Category.EcmaWord : Category.Word, true);\r
-                                                       last = -1;\r
-                                                       continue;\r
-                                                       \r
-                                               case 'S':\r
-                                                       cls.AddCategory (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, true);\r
-                                                       last = -1;\r
-                                                       continue;\r
-                                                       \r
-                                               case 'P':\r
-                                                       cls.AddCategory (ParseUnicodeCategory (), true);\r
-                                                       last = -1;\r
-                                                       continue;\r
-\r
-                                               default: break;         // add escaped character\r
-                                               }\r
-                                       }\r
-                               }\r
-\r
-                               if (range) {\r
-                                       if (c < last)\r
-                                               throw NewParseException ("[x-y] range in reverse order.");\r
-\r
-                                       if (last >=0 )\r
-                                               cls.AddRange ((char)last, (char)c);\r
-                                       else {\r
-                                               cls.AddCharacter ((char)c);\r
-                                               cls.AddCharacter ('-');\r
-                                       }\r
-\r
-                                       range = false;\r
-                                       last = -1;\r
-                               }\r
-                               else {\r
-                                       cls.AddCharacter ((char)c);\r
-                                       last = c;\r
-                               }\r
-                       }\r
-\r
-                       if (!closed)\r
-                               throw NewParseException ("Unterminated [] set.");\r
-\r
-                       if (range)\r
-                               cls.AddCharacter ('-');\r
-\r
-                       return cls;\r
-               }\r
-\r
-               private void ParseRepetitionBounds (out int min, out int max, RegexOptions options) {\r
-                       int n, m;\r
-\r
-                       /* check syntax */\r
-\r
-                       ConsumeWhitespace (IsIgnorePatternWhitespace (options));\r
-                       n = ParseNumber (10, 1, 0);\r
-                       if (n < 0)\r
-                               throw NewParseException ("Illegal {x,y} - bad value of x.");\r
-\r
-                       ConsumeWhitespace (IsIgnorePatternWhitespace (options));\r
-                       switch (pattern[ptr ++]) {\r
-                       case '}':\r
-                               m = n;\r
-                               break;\r
-                       case ',':\r
-                               ConsumeWhitespace (IsIgnorePatternWhitespace (options));\r
-                               m = ParseNumber (10, 1, 0);\r
-                               ConsumeWhitespace (IsIgnorePatternWhitespace (options));\r
-                               if (pattern[ptr ++] != '}')\r
-                                       throw NewParseException ("Illegal {x,y} - bad value of y.");\r
-                               break;\r
-                       default:\r
-                               throw NewParseException ("Illegal {x,y}");\r
-                       }\r
-\r
-                       /* check bounds and ordering */\r
-\r
-                       if (n >= 0xffff || m >= 0xffff)\r
-                               throw NewParseException ("Illegal {x, y} - maximum of 65535.");\r
-                       if (m >= 0 && m < n)\r
-                               throw NewParseException ("Illegal {x, y} with x > y.");\r
-\r
-                       /* assign min and max */\r
-                       \r
-                       min = n;\r
-                       if (m > 0)\r
-                               max = m;\r
-                       else\r
-                               max = 0xffff;\r
-               }\r
-\r
-               private Category ParseUnicodeCategory () {\r
-                       if (pattern[ptr ++] != '{')\r
-                               throw NewParseException ("Incomplete \\p{X} character escape.");\r
-\r
-                       string name = ParseName (pattern, ref ptr);\r
-                       if (name == null)\r
-                               throw NewParseException ("Incomplete \\p{X} character escape.");\r
-\r
-                       Category cat = CategoryUtils.CategoryFromName (name);\r
-                       if (cat == Category.None)\r
-                               throw NewParseException ("Unknown property '" + name + "'.");\r
-\r
-                       if (pattern[ptr ++] != '}')\r
-                               throw NewParseException ("Incomplete \\p{X} character escape.");\r
-\r
-                       return cat;\r
-               }\r
-\r
-               private Expression ParseSpecial (RegexOptions options) {\r
-                       int p = ptr;\r
-                       bool ecma = IsECMAScript (options);\r
-                       Expression expr = null;\r
-                       \r
-                       switch (pattern[ptr ++]) {\r
-\r
-                       // categories\r
-\r
-                       case 'd':\r
-                               expr = new CharacterClass (ecma ? Category.EcmaDigit : Category.Digit, false);\r
-                               break;\r
-                               \r
-                       case 'w':\r
-                               expr = new CharacterClass (ecma ? Category.EcmaWord : Category.Word, false);\r
-                               break;\r
-                               \r
-                       case 's':\r
-                               expr = new CharacterClass (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, false);\r
-                               break;\r
-                               \r
-                       case 'p':\r
-                               // this is odd - ECMAScript isn't supposed to support Unicode,\r
-                               // yet \p{..} compiles and runs under the MS implementation\r
-                               // identically to canonical mode. That's why I'm ignoring the\r
-                               // value of ecma here.\r
-                       \r
-                               expr = new CharacterClass (ParseUnicodeCategory (), false);\r
-                               break;\r
-                               \r
-                       case 'D':\r
-                               expr = new CharacterClass (ecma ? Category.EcmaDigit : Category.Digit, true);\r
-                               break;\r
-                               \r
-                       case 'W':\r
-                               expr = new CharacterClass (ecma ? Category.EcmaWord : Category.Word, true);\r
-                               break;\r
-                               \r
-                       case 'S':\r
-                               expr = new CharacterClass (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, true);\r
-                               break;\r
-                               \r
-                       case 'P':\r
-                               expr = new CharacterClass (ParseUnicodeCategory (), true);\r
-                               break;\r
-\r
-                       // positions\r
-\r
-                       case 'A': expr = new PositionAssertion (Position.StartOfString); break;\r
-                       case 'Z': expr = new PositionAssertion (Position.End); break;\r
-                       case 'z': expr = new PositionAssertion (Position.EndOfString); break;\r
-                       case 'G': expr = new PositionAssertion (Position.StartOfScan); break;\r
-                       case 'b': expr = new PositionAssertion (Position.Boundary); break;\r
-                       case 'B': expr = new PositionAssertion (Position.NonBoundary); break;\r
-                       \r
-                       // references\r
-\r
-                       case '1': case '2': case '3': case '4': case '5':\r
-                       case '6': case '7': case '8': case '9': {\r
-                               ptr --;\r
-                               int n = ParseNumber (10, 1, 0);\r
-                               if (n < 0) {\r
-                                       ptr = p;\r
-                                       return null;\r
-                               }\r
-\r
-                               // FIXME test if number is within number of assigned groups\r
-                               // this may present a problem for right-to-left matching\r
-\r
-                               Reference reference = new Reference (IsIgnoreCase (options));\r
-                               refs.Add (reference, n.ToString ());\r
-                               expr = reference;\r
-                               break;\r
-                       }\r
-\r
-                       case 'k': {\r
-                               char delim = pattern[ptr ++];\r
-                               if (delim == '<')\r
-                                       delim = '>';\r
-                               else if (delim != '\'')\r
-                                       throw NewParseException ("Malformed \\k<...> named backreference.");\r
-\r
-                               string name = ParseName ();\r
-                               if (name == null || pattern[ptr] != delim)\r
-                                       throw NewParseException ("Malformed \\k<...> named backreference.");\r
-\r
-                               ++ ptr;\r
-                               Reference reference = new Reference (IsIgnoreCase (options));\r
-                               refs.Add (reference, name);\r
-                               expr = reference;\r
-                               break;\r
-                       }\r
-\r
-                       default:\r
-                               expr = null;\r
-                               break;\r
-                       }\r
-\r
-                       if (expr == null)\r
-                               ptr = p;\r
-\r
-                       return expr;\r
-               }\r
-\r
-               private int ParseEscape () {\r
-                       int p = ptr;\r
-                       int c;\r
-\r
-                       if (p >= pattern.Length)\r
-                               throw new ArgumentException (\r
-                                               String.Format ("Parsing \"{0}\" - Illegal \\ at end of " + \r
-                                                               "pattern.", pattern), pattern);\r
-                       \r
-                       switch (pattern[ptr ++]) {\r
-       \r
-                       // standard escapes (except \b)\r
-\r
-                       case 'a': return '\u0007';\r
-                       case 't': return '\u0009';\r
-                       case 'r': return '\u000d';\r
-                       case 'v': return '\u000b';\r
-                       case 'f': return '\u000c';\r
-                       case 'n': return '\u000a';\r
-                       case 'e': return '\u001b';\r
-                       case '\\': return '\\';\r
-\r
-                       // character codes\r
-\r
-                       case '0': return ParseOctal (pattern, ref ptr);\r
-\r
-                       case 'x':\r
-                               c = ParseHex (pattern, ref ptr, 2);\r
-                               if (c < 0)\r
-                                       throw NewParseException ("Insufficient hex digits");\r
-\r
-                               return c;\r
-\r
-                       case 'u':\r
-                               c = ParseHex (pattern, ref ptr, 4);\r
-                               if (c < 0)\r
-                                       throw NewParseException ("Insufficient hex digits");\r
-                               \r
-                               return c;\r
-\r
-                       // control characters\r
-\r
-                       case 'c':\r
-                               c = pattern[p ++];\r
-                               if (c >= 'A' && c <= 'Z')\r
-                                       return c - 'A';\r
-                               else if (c >= '@' && c <= '_')\r
-                                       return c - '@';\r
-                               else\r
-                                       throw NewParseException ("Unrecognized control character.");\r
-\r
-                       // unknown escape\r
-\r
-                       default:\r
-                               ptr = p;\r
-                               return -1;\r
-                       }\r
-               }\r
-\r
-               private string ParseName () {\r
-                       return Parser.ParseName (pattern, ref ptr);\r
-               }\r
-\r
-               private static bool IsNameChar (char c) {\r
-                       UnicodeCategory cat = Char.GetUnicodeCategory (c);\r
-                       if (cat == UnicodeCategory.ModifierLetter)\r
-                               return false;\r
-                       if (cat == UnicodeCategory.ConnectorPunctuation)\r
-                               return true;\r
-                       return Char.IsLetterOrDigit (c);\r
-               }\r
-       \r
-               private int ParseNumber (int b, int min, int max) {\r
-                       return Parser.ParseNumber (pattern, ref ptr, b, min, max);\r
-               }\r
-\r
-               private int ParseDecimal () {\r
-                       return Parser.ParseDecimal (pattern, ref ptr);\r
-               }\r
-\r
-               private static int ParseDigit (char c, int b, int n) {\r
-                       switch (b) {\r
-                       case 8:\r
-                               if (c >= '0' && c <= '7')\r
-                                       return c - '0';\r
-                               else\r
-                                       return -1;\r
-                       case 10:\r
-                               if (c >= '0' && c <= '9')\r
-                                       return c - '0';\r
-                               else\r
-                                       return -1;\r
-                       case 16:\r
-                               if (c >= '0' && c <= '9')\r
-                                       return c - '0';\r
-                               else if (c >= 'a' && c <= 'f')\r
-                                       return 10 + c - 'a';\r
-                               else if (c >= 'A' && c <= 'F')\r
-                                       return 10 + c - 'A';\r
-                               else\r
-                                       return -1;\r
-                       default:\r
-                               return -1;\r
-                       }\r
-               }\r
-\r
-               private void ConsumeWhitespace (bool ignore) {\r
-                       while (true) {\r
-                               if (ptr >= pattern.Length)\r
-                                       break;\r
-                       \r
-                               if (pattern[ptr] == '(') {\r
-                                       if (ptr + 3 >= pattern.Length)\r
-                                               return;\r
-\r
-                                       if (pattern[ptr + 1] != '?' || pattern[ptr + 2] != '#')\r
-                                               return;\r
-\r
-                                       ptr += 3;\r
-                                       while (pattern[ptr ++] != ')')\r
-                                               /* ignore */ ;\r
-                               }\r
-                               else if (ignore && pattern[ptr] == '#') {\r
-                                       while (ptr < pattern.Length && pattern[ptr ++] != '\n')\r
-                                               /* ignore */ ;\r
-                               }\r
-                               else if (ignore && Char.IsWhiteSpace (pattern[ptr])) {\r
-                                       while (ptr < pattern.Length && Char.IsWhiteSpace (pattern[ptr]))\r
-                                               ++ ptr;\r
-                               }\r
-                               else\r
-                                       return;\r
-                       }\r
-               }\r
-\r
-               private string ParseString (string pattern) {\r
-                       this.pattern = pattern;\r
-                       this.ptr = 0;\r
-\r
-                       string result = "";\r
-                       while (ptr < pattern.Length) {\r
-                               int c = pattern[ptr];\r
-                               if (c == '\\')\r
-                                       c = ParseEscape ();\r
-                               ptr ++; \r
-                               result += (char)c;\r
-                       }\r
-\r
-                       return result;\r
-               }\r
-\r
-               private void ResolveReferences () {\r
-                       int gid = 1;\r
-                       Hashtable dict = new Hashtable ();\r
-\r
-                       // number unnamed groups\r
-\r
-                       foreach (CapturingGroup group in caps) {\r
-                               if (group.Name == null) {\r
-                                       dict.Add (gid.ToString (), group);\r
-                                       group.Number = gid ++;\r
-\r
-                                       ++ num_groups;\r
-                               }\r
-                       }\r
-\r
-                       // number named groups\r
-\r
-                       foreach (CapturingGroup group in caps) {\r
-                               if (group.Name != null) {\r
-                                       if (!dict.Contains (group.Name)) {\r
-                                               dict.Add (group.Name, group);\r
-                                               group.Number = gid ++;\r
-\r
-                                               ++ num_groups;\r
-                                       }\r
-                                       else {\r
-                                               CapturingGroup prev = (CapturingGroup)dict[group.Name];\r
-                                               group.Number = prev.Number;\r
-                                       }\r
-                               }\r
-                       }\r
-\r
-                       // resolve references\r
-\r
-                       foreach (Expression expr in refs.Keys) {\r
-                               string name = (string)refs[expr];\r
-                               if (!dict.Contains (name)) {\r
-                                       throw NewParseException ("Reference to undefined group " +\r
-                                               (Char.IsDigit (name[0]) ? "number " : "name ") +\r
-                                               name);\r
-                               }\r
-\r
-                               CapturingGroup group = (CapturingGroup)dict[name];\r
-                               if (expr is Reference)\r
-                                       ((Reference)expr).CapturingGroup = group;\r
-                               else if (expr is CaptureAssertion)\r
-                                       ((CaptureAssertion)expr).CapturingGroup = group;\r
-                               else if (expr is BalancingGroup)\r
-                                       ((BalancingGroup)expr).Balance = group;\r
-                       }\r
-               }\r
-\r
-               // flag helper functions\r
-\r
-               private static bool IsIgnoreCase (RegexOptions options) {\r
-                       return (options & RegexOptions.IgnoreCase) != 0;\r
-               }\r
-\r
-               private static bool IsMultiline (RegexOptions options) {\r
-                       return (options & RegexOptions.Multiline) != 0;\r
-               }\r
-\r
-               private static bool IsExplicitCapture (RegexOptions options) {\r
-                       return (options & RegexOptions.ExplicitCapture) != 0;\r
-               }\r
-       \r
-               private static bool IsSingleline (RegexOptions options) {\r
-                       return (options & RegexOptions.Singleline) != 0;\r
-               }\r
-\r
-               private static bool IsIgnorePatternWhitespace (RegexOptions options) {\r
-                       return (options & RegexOptions.IgnorePatternWhitespace) != 0;\r
-               }\r
-\r
-               private static bool IsRightToLeft (RegexOptions options) {\r
-                       return (options & RegexOptions.RightToLeft) != 0;\r
-               }\r
-\r
-               private static bool IsECMAScript (RegexOptions options) {\r
-                       return (options & RegexOptions.ECMAScript) != 0;\r
-               }\r
-\r
-               // exception creation\r
-\r
-               private ArgumentException NewParseException (string msg) {\r
-                       msg = "parsing \"" + pattern + "\" - " + msg;\r
-                       return new ArgumentException (msg, pattern);\r
-               }\r
-\r
-               private string pattern;\r
-               private int ptr;\r
-\r
-               private ArrayList caps;\r
-               private Hashtable refs;\r
-               private int num_groups;\r
-       }\r
-}\r
+//
+// assembly:   System
+// namespace:  System.Text.RegularExpressions
+// file:       parser.cs
+//
+// author:     Dan Lewis (dlewis@gmx.co.uk)
+//             (c) 2002
+
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the
+// "Software"), to deal in the Software without restriction, including
+// without limitation the rights to use, copy, modify, merge, publish,
+// distribute, sublicense, and/or sell copies of the Software, and to
+// permit persons to whom the Software is furnished to do so, subject to
+// the following conditions:
+// 
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
+using System;
+using System.Collections;
+using System.Globalization;
+
+namespace System.Text.RegularExpressions.Syntax {
+
+       class Parser {
+               public static int ParseDecimal (string str, ref int ptr) {
+                       return ParseNumber (str, ref ptr, 10, 1, Int32.MaxValue);
+               }
+
+               public static int ParseOctal (string str, ref int ptr) {
+                       return ParseNumber (str, ref ptr, 8, 1, 3);
+               }
+
+               public static int ParseHex (string str, ref int ptr, int digits) {
+                       return ParseNumber (str, ref ptr, 16, digits, digits);
+               }
+
+               public static int ParseNumber (string str, ref int ptr, int b, int min, int max) {
+                       int p = ptr, n = 0, digits = 0, d;
+                       if (max < min)
+                               max = Int32.MaxValue;
+
+                       while (digits < max && p < str.Length) {
+                               d = ParseDigit (str[p ++], b, digits);
+                               if (d < 0) {
+                                       -- p;
+                                       break;
+                               }
+
+                               n = n * b + d;
+                               ++ digits;
+                       }
+
+                       if (digits < min)
+                               return -1;
+
+                       ptr = p;
+                       return n;
+               }
+
+               public static string ParseName (string str, ref int ptr) {
+                       if (Char.IsDigit (str[ptr])) {
+                               int gid = ParseNumber (str, ref ptr, 10, 1, 0);
+                               if (gid > 0)
+                                       return gid.ToString ();
+                               
+                               return null;
+                       }
+
+                       int start = ptr;
+                       for (;;) {
+                               if (!IsNameChar (str[ptr]))
+                                       break;
+                               ++ ptr;
+                       }
+
+                       if (ptr - start > 0)
+                               return str.Substring (start, ptr - start);
+
+                       return null;
+               }
+
+               public static string Escape (string str) {
+                       string result = "";
+                       for (int i = 0; i < str.Length; ++ i) {
+                               char c = str[i];
+                               switch (c) {
+                               case '\\': case '*': case '+': case '?': case '|':
+                               case '{': case '[': case '(': case ')': case '^':
+                               case '$': case '.': case '#': case ' ':
+                                       result += "\\" + c;
+                                       break;
+
+                               case '\t': result += "\\t"; break;
+                               case '\n': result += "\\n"; break;
+                               case '\r': result += "\\r"; break;
+                               case '\f': result += "\\f"; break;
+
+                               default: result += c; break;
+                               }
+                       }
+
+                       return result;
+               }
+
+               public static string Unescape (string str) {
+                       if (str.IndexOf ('\\') == -1)
+                               return str;
+                       return new Parser ().ParseString (str);
+               }
+
+               // public instance
+
+               public Parser () {
+                       this.caps = new ArrayList ();
+                       this.refs = new Hashtable ();
+               }
+
+               public RegularExpression ParseRegularExpression (string pattern, RegexOptions options) {
+                       this.pattern = pattern;
+                       this.ptr = 0;
+
+                       caps.Clear ();
+                       refs.Clear ();
+                       this.num_groups = 0;
+
+                       try {
+                               RegularExpression re = new RegularExpression ();
+                               ParseGroup (re, options, null);
+                               ResolveReferences ();
+
+                               re.GroupCount = num_groups;
+                               
+                               return re;
+                       }
+                       catch (IndexOutOfRangeException) {
+                               throw NewParseException ("Unexpected end of pattern.");
+                       }
+               }
+
+               public IDictionary GetMapping ()
+               {
+                       Hashtable mapping = new Hashtable ();
+                       int end = caps.Count;
+                       mapping.Add ("0", 0);
+                       for (int i = 0; i < end; i++) {
+                               CapturingGroup group = (CapturingGroup) caps [i];
+                               if (group.Name != null) {
+                                       if (mapping.Contains (group.Name)) {
+                                               if ((int) mapping [group.Name] != group.Number)
+                                                       throw new SystemException ("invalid state");
+                                               continue;
+                                       }
+                                       mapping.Add (group.Name, group.Number);
+                               } else {
+                                       mapping.Add (group.Number.ToString (), group.Number);
+                               }
+                       }
+
+                       return mapping;
+               }
+
+               // private methods
+
+               private void ParseGroup (Group group, RegexOptions options, Assertion assertion) {
+                       bool is_top_level = group is RegularExpression;
+               
+                       Alternation alternation = null;
+                       string literal = null;
+
+                       Group current = new Group ();
+                       Expression expr = null;
+                       bool closed = false;
+
+                       while (true) {
+                               ConsumeWhitespace (IsIgnorePatternWhitespace (options));
+                               if (ptr >= pattern.Length)
+                                       break;
+                               
+                               // (1) Parse for Expressions
+                       
+                               char ch = pattern[ptr ++];
+                               
+                               switch (ch) {
+                               case '^': {
+                                       Position pos =
+                                               IsMultiline (options) ? Position.StartOfLine : Position.Start;
+                                       expr = new PositionAssertion (pos);
+                                       break;
+                               }
+
+                               case '$': {
+                                       Position pos =
+                                               IsMultiline (options) ? Position.EndOfLine : Position.End;
+                                       expr = new PositionAssertion (pos);
+                                       break;
+                               }
+
+                               case '.': {
+                                       Category cat =
+                                               IsSingleline (options) ? Category.AnySingleline : Category.Any;
+                                       expr = new CharacterClass (cat, false);
+                                       break;
+                               }
+
+                               case '\\': {
+                                       int c = ParseEscape ();
+                                       if (c >= 0)
+                                               ch = (char)c;
+                                       else {
+                                               expr = ParseSpecial (options);
+
+                                               if (expr == null)
+                                                       ch = pattern[ptr ++];           // default escape
+                                       }
+                                       break;
+                               }
+
+                               case '[': {
+                                       expr = ParseCharacterClass (options);
+                                       break;
+                               }
+
+                               case '(': {
+                                       bool ignore = IsIgnoreCase (options);
+                                       expr = ParseGroupingConstruct (ref options);
+                                       if (expr == null) {
+                                               if (literal != null && IsIgnoreCase (options) != ignore) {
+                                                       current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));
+                                                       literal = null;
+                                               }
+
+                                               continue;
+                                       }
+                                       break;
+                               }
+
+                               case ')': {
+                                       closed = true;
+                                       goto EndOfGroup;
+                               }
+
+                               case '|': {
+                                       if (literal != null) {
+                                               current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));
+                                               literal = null;
+                                       }
+
+                                       if (assertion != null) {
+                                               if (assertion.TrueExpression == null)
+                                                       assertion.TrueExpression = current;
+                                               else if (assertion.FalseExpression == null)
+                                                       assertion.FalseExpression = current;
+                                               else
+                                                       throw NewParseException ("Too many | in (?()|).");
+                                       }
+                                       else {
+                                               if (alternation == null)
+                                                       alternation = new Alternation ();
+
+                                               alternation.AddAlternative (current);
+                                       }
+
+                                       current = new Group ();
+                                       continue;
+                               }
+
+                               case '*': case '+': case '?': {
+                                       throw NewParseException ("Bad quantifier.");
+                               }
+
+                               default: 
+                                       break;          // literal character
+                               }
+
+                               ConsumeWhitespace (IsIgnorePatternWhitespace (options));
+                               
+                               // (2) Check for Repetitions
+                               
+                               if (ptr < pattern.Length) {
+                                       char k = pattern[ptr];
+                                       int min = 0, max = 0;
+                                       bool lazy = false;
+                                       bool haveRep = false;
+
+
+                                       if (k == '?' || k == '*' || k == '+') {
+                                               ++ ptr;
+                                               haveRep = true;
+
+                                               switch (k) {
+                                               case '?': min = 0; max = 1; break;
+                                               case '*': min = 0; max = 0x7fffffff; break;
+                                               case '+': min = 1; max = 0x7fffffff; break;
+                                               }
+                                       } else if (k == '{' && ptr + 1 < pattern.Length) {
+                                               int saved_ptr = ptr;
+                                               ++ptr;
+                                               haveRep = ParseRepetitionBounds (out min, out max, options);
+                                               if (!haveRep)
+                                                       ptr = saved_ptr;
+                                       }
+
+                                       if (haveRep) {
+                                               ConsumeWhitespace (IsIgnorePatternWhitespace (options));
+                                               if (ptr < pattern.Length && pattern[ptr] == '?') {
+                                                       ++ ptr;
+                                                       lazy = true;
+                                               }
+
+                                               Repetition repetition = new Repetition (min, max, lazy);
+
+                                               if (expr == null)
+                                                       repetition.Expression = new Literal (ch.ToString (), IsIgnoreCase (options));
+                                               else
+                                                       repetition.Expression = expr;
+
+                                               expr = repetition;
+                                       }
+                               }
+
+                               // (3) Append Expression and/or Literal
+
+                               if (expr == null) {
+                                       if (literal == null)
+                                               literal = "";
+                                       literal += ch;
+                               }
+                               else {
+                                       if (literal != null) {
+                                               current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));
+                                               literal = null;
+                                       }
+
+                                       current.AppendExpression (expr);
+                                       expr = null;
+                               }
+
+                               if (is_top_level && ptr >= pattern.Length)
+                                       goto EndOfGroup;
+                       }
+
+               EndOfGroup:
+                       if (is_top_level && closed)
+                               throw NewParseException ("Too many )'s.");
+                       if (!is_top_level && !closed)
+                               throw NewParseException ("Not enough )'s.");
+                               
+               
+                       // clean up literals and alternations
+
+                       if (literal != null)
+                               current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));
+
+                       if (assertion != null) {
+                               if (assertion.TrueExpression == null)
+                                       assertion.TrueExpression = current;
+                               else
+                                       assertion.FalseExpression = current;
+                               
+                               group.AppendExpression (assertion);
+                       }
+                       else if (alternation != null) {
+                               alternation.AddAlternative (current);
+                               group.AppendExpression (alternation);
+                       }
+                       else
+                               group.AppendExpression (current);
+               }
+
+               private Expression ParseGroupingConstruct (ref RegexOptions options) {
+                       if (pattern[ptr] != '?') {
+                               Group group;
+
+                               if (IsExplicitCapture (options))
+                                       group = new Group ();
+                               else {
+                                       group = new CapturingGroup ();
+                                       caps.Add (group);
+                               }
+
+                               ParseGroup (group, options, null);
+                               return group;
+                       }
+                       else
+                               ++ ptr;
+
+                       switch (pattern[ptr]) {
+                       case ':': {                                             // non-capturing group
+                               ++ ptr;
+                               Group group = new Group ();
+                               ParseGroup (group, options, null);
+
+                               return group;
+                       }
+
+                       case '>': {                                             // non-backtracking group
+                               ++ ptr;
+                               Group group = new NonBacktrackingGroup ();
+                               ParseGroup (group, options, null);
+                               
+                               return group;
+                       }
+
+                       case 'i': case 'm': case 'n':
+                       case 's': case 'x': case '-': {                         // options
+                               RegexOptions o = options;
+                               ParseOptions (ref o, false);
+                               if (pattern[ptr] == '-') {
+                                       ++ ptr;
+                                       ParseOptions (ref o, true);
+                               }
+
+                               if (pattern[ptr] == ':') {                      // pass options to child group
+                                       ++ ptr;
+                                       Group group = new Group ();
+                                       ParseGroup (group, o, null);
+                                       return group;
+                               }
+                               else if (pattern[ptr] == ')') {                 // change options of enclosing group
+                                       ++ ptr;
+                                       options = o;
+                                       return null;
+                               }
+                               else
+                                       throw NewParseException ("Bad options");
+                       }
+
+                       case '<': case '=': case '!': {                         // lookahead/lookbehind
+                               ExpressionAssertion asn = new ExpressionAssertion ();
+                               if (!ParseAssertionType (asn))
+                                       goto case '\'';                         // it's a (?<name> ) construct
+
+                               Group test = new Group ();
+                               ParseGroup (test, options, null);
+
+                               asn.TestExpression = test;
+                               return asn;
+                       }
+
+                       case '\'': {                                            // named/balancing group
+                               char delim;
+                               if (pattern[ptr] == '<')
+                                       delim = '>';
+                               else
+                                       delim = '\'';
+
+                               ++ ptr;
+                               string name = ParseName ();
+
+                               if (pattern[ptr] == delim) {
+                                       // capturing group
+
+                                       if (name == null)
+                                               throw NewParseException ("Bad group name.");
+
+                                       ++ ptr;
+                                       CapturingGroup cap = new CapturingGroup ();
+                                       cap.Name = name;
+                                       caps.Add (cap);
+                                       ParseGroup (cap, options, null);
+
+                                       return cap;
+                               }
+                               else if (pattern[ptr] == '-') {
+                                       // balancing group
+
+                                       ++ ptr;
+                                       string balance_name = ParseName ();
+                                       if (balance_name == null || pattern[ptr] != delim)
+                                               throw NewParseException ("Bad balancing group name.");
+
+                                       ++ ptr;
+                                       BalancingGroup bal = new BalancingGroup ();
+                                       bal.Name = name;
+                                       
+                                       if(bal.IsNamed) {
+                                               caps.Add (bal);
+                                       }
+
+                                       refs.Add (bal, balance_name);
+
+                                       ParseGroup (bal, options, null);
+
+                                       return bal;
+                               }
+                               else
+                                       throw NewParseException ("Bad group name.");
+                       }
+
+                       case '(': {                                             // expression/capture test
+                               Assertion asn;
+                       
+                               ++ ptr;
+                               int p = ptr;
+                               string name = ParseName ();
+                               if (name == null || pattern[ptr] != ')') {      // expression test
+                                       // FIXME MS implementation doesn't seem to
+                                       // implement this version of (?(x) ...)
+
+                                       ptr = p;
+                                       ExpressionAssertion expr_asn = new ExpressionAssertion ();
+
+                                       if (pattern[ptr] == '?') {
+                                               ++ ptr;
+                                               if (!ParseAssertionType (expr_asn))
+                                                       throw NewParseException ("Bad conditional.");
+                                       }
+                                       else {
+                                               expr_asn.Negate = false;
+                                               expr_asn.Reverse = false;
+                                       }
+
+                                       Group test = new Group ();
+                                       ParseGroup (test, options, null);
+                                       expr_asn.TestExpression = test;
+                                       asn = expr_asn;
+                               }
+                               else {                                          // capture test
+                                       ++ ptr;
+                                       asn = new CaptureAssertion (new Literal (name, IsIgnoreCase (options)));
+                                       refs.Add (asn, name);
+                               }
+
+                               Group group = new Group ();
+                               ParseGroup (group, options, asn);
+                               return group;
+                       }
+
+                       case '#': {                                             // comment
+                               ++ ptr;
+                               while (pattern[ptr ++] != ')') {
+                                       if (ptr >= pattern.Length)
+                                               throw NewParseException ("Unterminated (?#...) comment.");
+                               }
+                               return null;
+                       }
+
+                       default:                                                // error
+                               throw NewParseException ("Bad grouping construct.");
+                       }
+               }
+
+               private bool ParseAssertionType (ExpressionAssertion assertion) {
+                       if (pattern[ptr] == '<') {
+                               switch (pattern[ptr + 1]) {
+                               case '=':
+                                       assertion.Negate = false;
+                                       break;
+                               case '!':
+                                       assertion.Negate = true;
+                                       break;
+                               default:
+                                       return false;
+                               }
+
+                               assertion.Reverse = true;
+                               ptr += 2;
+                       }
+                       else {
+                               switch (pattern[ptr]) {
+                               case '=':
+                                       assertion.Negate = false;
+                                       break;
+                               case '!':
+                                       assertion.Negate = true;
+                                       break;
+                               default:
+                                       return false;
+                               }
+
+                               assertion.Reverse = false;
+                               ptr += 1;
+                       }
+
+                       return true;
+               }
+
+               private void ParseOptions (ref RegexOptions options, bool negate) {
+                       for (;;) {
+                               switch (pattern[ptr]) {
+                               case 'i':
+                                       if (negate)
+                                               options &= ~RegexOptions.IgnoreCase;
+                                       else
+                                               options |= RegexOptions.IgnoreCase;
+                                       break;
+
+                               case 'm':
+                                       if (negate)
+                                               options &= ~RegexOptions.Multiline;
+                                       else
+                                               options |= RegexOptions.Multiline;
+                                       break;
+                                       
+                               case 'n':
+                                       if (negate)
+                                               options &= ~RegexOptions.ExplicitCapture;
+                                       else
+                                               options |= RegexOptions.ExplicitCapture;
+                                       break;
+                                       
+                               case 's':
+                                       if (negate)
+                                               options &= ~RegexOptions.Singleline;
+                                       else
+                                               options |= RegexOptions.Singleline;
+                                       break;
+                                       
+                               case 'x':
+                                       if (negate)
+                                               options &= ~RegexOptions.IgnorePatternWhitespace;
+                                       else
+                                               options |= RegexOptions.IgnorePatternWhitespace;
+                                       break;
+
+                               default:
+                                       return;
+                               }
+
+                               ++ ptr;
+                       }
+               }
+
+               private Expression ParseCharacterClass (RegexOptions options) {
+                       bool negate = false;
+                       if (pattern[ptr] == '^') {
+                               negate = true;
+                               ++ ptr;
+                       }
+                       
+                       bool ecma = IsECMAScript (options);
+                       CharacterClass cls = new CharacterClass (negate, IsIgnoreCase (options));
+
+                       if (pattern[ptr] == ']') {
+                               cls.AddCharacter (']');
+                               ++ ptr;
+                       }
+
+                       int c = -1;
+                       int last = -1;
+                       bool range = false;
+                       bool closed = false;
+                       while (ptr < pattern.Length) {
+                               c = pattern[ptr ++];
+
+                               if (c == ']') {
+                                       closed = true;
+                                       break;
+                               }
+
+                               if (c == '-' && last >= 0 && !range) {
+                                       range = true;
+                                       continue;
+                               }
+
+                               if (c == '\\') {
+                                       c = ParseEscape ();
+                                       if (c >= 0)
+                                               goto char_recognized;
+
+                                       // didn't recognize escape
+                                       c = pattern [ptr ++];
+                                       switch (c) {
+                                       case 'b':
+                                               c = '\b';
+                                               goto char_recognized;
+
+                                       case 'd': case 'D':
+                                               cls.AddCategory (ecma ? Category.EcmaDigit : Category.Digit, c == 'D');
+                                               break;
+                                               
+                                       case 'w': case 'W':
+                                               cls.AddCategory (ecma ? Category.EcmaWord : Category.Word, c == 'W');
+                                               break;
+                                               
+                                       case 's': case 'S':
+                                               cls.AddCategory (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, c == 'S');
+                                               break;
+                                               
+                                       case 'p': case 'P':
+                                               cls.AddCategory (ParseUnicodeCategory (), c == 'P');    // ignore ecma
+                                               break;
+
+                                       default:                // add escaped character
+                                               goto char_recognized;
+                                       }
+
+                                       // if the pattern looks like [a-\s] ...
+                                       if (range)
+                                               throw NewParseException ("character range cannot have category \\" + c);
+
+                                       last = -1;
+                                       continue;
+                               }
+
+                       char_recognized:
+                               if (range) {
+                                       // if 'range' is true, we know that 'last >= 0'
+                                       if (c < last)
+                                               throw NewParseException ("[" + last + "-" + c + "] range in reverse order.");
+                                       cls.AddRange ((char)last, (char)c);
+                                       last = -1;
+                                       range = false;
+                                       continue;
+                               }
+
+                               cls.AddCharacter ((char)c);
+                               last = c;
+                       }
+
+                       if (!closed)
+                               throw NewParseException ("Unterminated [] set.");
+
+                       if (range)
+                               cls.AddCharacter ('-');
+
+                       return cls;
+               }
+
+               private bool ParseRepetitionBounds (out int min, out int max, RegexOptions options) {
+                       int n, m;
+                       min = max = 0;
+
+                       /* check syntax */
+
+                       ConsumeWhitespace (IsIgnorePatternWhitespace (options));
+                   
+                       if (pattern[ptr] == ',') {
+                                n = -1;
+                       } else {
+                                n = ParseNumber (10, 1, 0);
+                                ConsumeWhitespace (IsIgnorePatternWhitespace (options));
+                       }
+                       
+                       switch (pattern[ptr ++]) {
+                       case '}':
+                               m = n;
+                               break;
+                       case ',':
+                               ConsumeWhitespace (IsIgnorePatternWhitespace (options));
+                               m = ParseNumber (10, 1, 0);
+                               ConsumeWhitespace (IsIgnorePatternWhitespace (options));
+                               if (pattern[ptr ++] != '}')
+                                       return false;
+                               break;
+                       default:
+                               return false;
+                       }
+
+                       /* check bounds and ordering */
+
+                       if (n > 0x7fffffff || m > 0x7fffffff)
+                               throw NewParseException ("Illegal {x, y} - maximum of 2147483647.");
+                       if (m >= 0 && m < n)
+                               throw NewParseException ("Illegal {x, y} with x > y.");
+
+                       /* assign min and max */
+                       
+                       min = n;
+                       if (m > 0)
+                               max = m;
+                       else
+                               max = 0x7fffffff;
+
+                       return true;
+               }
+
+               private Category ParseUnicodeCategory () {
+                       if (pattern[ptr ++] != '{')
+                               throw NewParseException ("Incomplete \\p{X} character escape.");
+
+                       string name = ParseName (pattern, ref ptr);
+                       if (name == null)
+                               throw NewParseException ("Incomplete \\p{X} character escape.");
+
+                       Category cat = CategoryUtils.CategoryFromName (name);
+                       if (cat == Category.None)
+                               throw NewParseException ("Unknown property '" + name + "'.");
+
+                       if (pattern[ptr ++] != '}')
+                               throw NewParseException ("Incomplete \\p{X} character escape.");
+
+                       return cat;
+               }
+
+               private Expression ParseSpecial (RegexOptions options) {
+                       int p = ptr;
+                       bool ecma = IsECMAScript (options);
+                       Expression expr = null;
+                       
+                       switch (pattern[ptr ++]) {
+
+                       // categories
+
+                       case 'd':
+                               expr = new CharacterClass (ecma ? Category.EcmaDigit : Category.Digit, false);
+                               break;
+                               
+                       case 'w':
+                               expr = new CharacterClass (ecma ? Category.EcmaWord : Category.Word, false);
+                               break;
+                               
+                       case 's':
+                               expr = new CharacterClass (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, false);
+                               break;
+                               
+                       case 'p':
+                               // this is odd - ECMAScript isn't supposed to support Unicode,
+                               // yet \p{..} compiles and runs under the MS implementation
+                               // identically to canonical mode. That's why I'm ignoring the
+                               // value of ecma here.
+                       
+                               expr = new CharacterClass (ParseUnicodeCategory (), false);
+                               break;
+                               
+                       case 'D':
+                               expr = new CharacterClass (ecma ? Category.EcmaDigit : Category.Digit, true);
+                               break;
+                               
+                       case 'W':
+                               expr = new CharacterClass (ecma ? Category.EcmaWord : Category.Word, true);
+                               break;
+                               
+                       case 'S':
+                               expr = new CharacterClass (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, true);
+                               break;
+                               
+                       case 'P':
+                               expr = new CharacterClass (ParseUnicodeCategory (), true);
+                               break;
+
+                       // positions
+
+                       case 'A': expr = new PositionAssertion (Position.StartOfString); break;
+                       case 'Z': expr = new PositionAssertion (Position.End); break;
+                       case 'z': expr = new PositionAssertion (Position.EndOfString); break;
+                       case 'G': expr = new PositionAssertion (Position.StartOfScan); break;
+                       case 'b': expr = new PositionAssertion (Position.Boundary); break;
+                       case 'B': expr = new PositionAssertion (Position.NonBoundary); break;
+                       
+                       // references
+
+                       case '1': case '2': case '3': case '4': case '5':
+                       case '6': case '7': case '8': case '9': {
+                               ptr --;
+                               int n = ParseNumber (10, 1, 0);
+                               if (n < 0) {
+                                       ptr = p;
+                                       return null;
+                               }
+
+                               // FIXME test if number is within number of assigned groups
+                               // this may present a problem for right-to-left matching
+
+                               Reference reference = new Reference (IsIgnoreCase (options));
+                               refs.Add (reference, n.ToString ());
+                               expr = reference;
+                               break;
+                       }
+
+                       case 'k': {
+                               char delim = pattern[ptr ++];
+                               if (delim == '<')
+                                       delim = '>';
+                               else if (delim != '\'')
+                                       throw NewParseException ("Malformed \\k<...> named backreference.");
+
+                               string name = ParseName ();
+                               if (name == null || pattern[ptr] != delim)
+                                       throw NewParseException ("Malformed \\k<...> named backreference.");
+
+                               ++ ptr;
+                               Reference reference = new Reference (IsIgnoreCase (options));
+                               refs.Add (reference, name);
+                               expr = reference;
+                               break;
+                       }
+
+                       default:
+                               expr = null;
+                               break;
+                       }
+
+                       if (expr == null)
+                               ptr = p;
+
+                       return expr;
+               }
+
+               private int ParseEscape () {
+                       int p = ptr;
+                       int c;
+
+                       if (p >= pattern.Length)
+                               throw new ArgumentException (
+                                               String.Format ("Parsing \"{0}\" - Illegal \\ at end of " + 
+                                                               "pattern.", pattern), pattern);
+                       
+                       switch (pattern[ptr ++]) {
+       
+                       // standard escapes (except \b)
+
+                       case 'a': return '\u0007';
+                       case 't': return '\u0009';
+                       case 'r': return '\u000d';
+                       case 'v': return '\u000b';
+                       case 'f': return '\u000c';
+                       case 'n': return '\u000a';
+                       case 'e': return '\u001b';
+                       case '\\': return '\\';
+
+                       // character codes
+
+                       case '0':
+                               //
+                               // Turns out that octal values can be specified
+                               // without a leading zero.   But also the limit
+                               // of three character should include this first
+                               // one.  
+                               //
+                               ptr--;
+                               int prevptr = ptr;
+                               int result = ParseOctal (pattern, ref ptr);
+                               if (result == -1 && prevptr == ptr)
+                                       return 0;
+
+                               return result;
+
+                       case 'x':
+                               c = ParseHex (pattern, ref ptr, 2);
+                               if (c < 0)
+                                       throw NewParseException ("Insufficient hex digits");
+
+                               return c;
+
+                       case 'u':
+                               c = ParseHex (pattern, ref ptr, 4);
+                               if (c < 0)
+                                       throw NewParseException ("Insufficient hex digits");
+                               
+                               return c;
+
+                       // control characters
+
+                       case 'c':
+                               c = pattern[ptr ++];
+                               if (c >= '@' && c <= '_')
+                                       return c - '@';
+                               else
+                                       throw NewParseException ("Unrecognized control character.");
+
+                       // unknown escape
+
+                       default:
+                               ptr = p;
+                               return -1;
+                       }
+               }
+
+               private string ParseName () {
+                       return Parser.ParseName (pattern, ref ptr);
+               }
+
+               private static bool IsNameChar (char c) {
+                       UnicodeCategory cat = Char.GetUnicodeCategory (c);
+                       if (cat == UnicodeCategory.ModifierLetter)
+                               return false;
+                       if (cat == UnicodeCategory.ConnectorPunctuation)
+                               return true;
+                       return Char.IsLetterOrDigit (c);
+               }
+       
+               private int ParseNumber (int b, int min, int max) {
+                       return Parser.ParseNumber (pattern, ref ptr, b, min, max);
+               }
+
+               private static int ParseDigit (char c, int b, int n) {
+                       switch (b) {
+                       case 8:
+                               if (c >= '0' && c <= '7')
+                                       return c - '0';
+                               else
+                                       return -1;
+                       case 10:
+                               if (c >= '0' && c <= '9')
+                                       return c - '0';
+                               else
+                                       return -1;
+                       case 16:
+                               if (c >= '0' && c <= '9')
+                                       return c - '0';
+                               else if (c >= 'a' && c <= 'f')
+                                       return 10 + c - 'a';
+                               else if (c >= 'A' && c <= 'F')
+                                       return 10 + c - 'A';
+                               else
+                                       return -1;
+                       default:
+                               return -1;
+                       }
+               }
+
+               private void ConsumeWhitespace (bool ignore) {
+                       while (ptr < pattern.Length) {
+                               if (pattern[ptr] == '(') {
+                                       if (ptr + 3 >= pattern.Length)
+                                               return;
+
+                                       if (pattern[ptr + 1] != '?' || pattern[ptr + 2] != '#')
+                                               return;
+
+                                       ptr += 3;
+                                       while (ptr < pattern.Length && pattern[ptr ++] != ')')
+                                               /* ignore */ ;
+                               }
+                               else if (ignore && pattern[ptr] == '#') {
+                                       while (ptr < pattern.Length && pattern[ptr ++] != '\n')
+                                               /* ignore */ ;
+                               }
+                               else if (ignore && Char.IsWhiteSpace (pattern[ptr])) {
+                                       while (ptr < pattern.Length && Char.IsWhiteSpace (pattern[ptr]))
+                                               ++ ptr;
+                               }
+                               else
+                                       return;
+                       }
+               }
+
+               private string ParseString (string pattern) {
+                       this.pattern = pattern;
+                       this.ptr = 0;
+
+                       StringBuilder result = new StringBuilder (pattern.Length);
+                       while (ptr < pattern.Length) {
+                               int c = pattern[ptr ++];
+                               if (c == '\\') {
+                                       c = ParseEscape ();
+
+                                       if(c < 0) {
+                                               c = pattern[ptr ++];
+                                               if(c == 'b')
+                                                       c = '\b';
+                                       }
+                               }
+                               result.Append ((char) c);
+                       }
+
+                       return result.ToString ();
+               }
+
+               private void ResolveReferences () {
+                       int gid = 1;
+                       Hashtable dict = new Hashtable ();
+
+                       // number unnamed groups
+
+                       foreach (CapturingGroup group in caps) {
+                               if (group.Name == null) {
+                                       dict.Add (gid.ToString (), group);
+                                       group.Number = gid ++;
+
+                                       ++ num_groups;
+                               }
+                       }
+
+                       // number named groups
+
+                       foreach (CapturingGroup group in caps) {
+                               if (group.Name != null) {
+                                       if (!dict.Contains (group.Name)) {
+                                               dict.Add (group.Name, group);
+                                               group.Number = gid ++;
+
+                                               ++ num_groups;
+                                       }
+                                       else {
+                                               CapturingGroup prev = (CapturingGroup)dict[group.Name];
+                                               group.Number = prev.Number;
+                                       }
+                               }
+                       }
+
+                       // resolve references
+
+                       foreach (Expression expr in refs.Keys) {
+                               string name = (string)refs[expr];
+                               if (!dict.Contains (name)) {
+                                       if (expr is CaptureAssertion && !Char.IsDigit (name [0]))
+                                               continue;
+                                       throw NewParseException ("Reference to undefined group " +
+                                               (Char.IsDigit (name[0]) ? "number " : "name ") +
+                                               name);
+                               }
+
+                               CapturingGroup group = (CapturingGroup)dict[name];
+                               if (expr is Reference)
+                                       ((Reference)expr).CapturingGroup = group;
+                               else if (expr is CaptureAssertion)
+                                       ((CaptureAssertion)expr).CapturingGroup = group;
+                               else if (expr is BalancingGroup)
+                                       ((BalancingGroup)expr).Balance = group;
+                       }
+               }
+
+               // flag helper functions
+
+               private static bool IsIgnoreCase (RegexOptions options) {
+                       return (options & RegexOptions.IgnoreCase) != 0;
+               }
+
+               private static bool IsMultiline (RegexOptions options) {
+                       return (options & RegexOptions.Multiline) != 0;
+               }
+
+               private static bool IsExplicitCapture (RegexOptions options) {
+                       return (options & RegexOptions.ExplicitCapture) != 0;
+               }
+       
+               private static bool IsSingleline (RegexOptions options) {
+                       return (options & RegexOptions.Singleline) != 0;
+               }
+
+               private static bool IsIgnorePatternWhitespace (RegexOptions options) {
+                       return (options & RegexOptions.IgnorePatternWhitespace) != 0;
+               }
+
+               private static bool IsECMAScript (RegexOptions options) {
+                       return (options & RegexOptions.ECMAScript) != 0;
+               }
+
+               // exception creation
+
+               private ArgumentException NewParseException (string msg) {
+                       msg = "parsing \"" + pattern + "\" - " + msg;
+                       return new ArgumentException (msg, pattern);
+               }
+
+               private string pattern;
+               private int ptr;
+
+               private ArrayList caps;
+               private Hashtable refs;
+               private int num_groups;
+       }
+}