Bug 10670 fix.
[mono.git] / mcs / class / System / System.Text.RegularExpressions / parser.cs
index ef3e782c4c47bc2186c778ec3f47859fb9d33f58..f3ddf196730bb8879cbdc0dd8fc5a82829c91486 100644 (file)
@@ -115,6 +115,8 @@ namespace System.Text.RegularExpressions.Syntax {
                }
 
                public static string Unescape (string str) {
+                       if (str.IndexOf ('\\') == -1)
+                               return str;
                        return new Parser ().ParseString (str);
                }
 
@@ -147,24 +149,25 @@ namespace System.Text.RegularExpressions.Syntax {
                        }
                }
 
-               public IDictionary GetMapping () {
+               public IDictionary GetMapping ()
+               {
                        Hashtable mapping = new Hashtable ();
-                       Hashtable numbers = new Hashtable ();
                        int end = caps.Count;
                        mapping.Add ("0", 0);
                        for (int i = 0; i < end; i++) {
                                CapturingGroup group = (CapturingGroup) caps [i];
-                               if (group.Name != null && !mapping.Contains (group.Name)) {
+                               if (group.Name != null) {
+                                       if (mapping.Contains (group.Name)) {
+                                               if ((int) mapping [group.Name] != group.Number)
+                                                       throw new SystemException ("invalid state");
+                                               continue;
+                                       }
                                        mapping.Add (group.Name, group.Number);
-                                       numbers.Add (group.Number, group.Number);
+                               } else {
+                                       mapping.Add (group.Number.ToString (), group.Number);
                                }
                        }
 
-                       for (int i = 1; i < end; i++) {
-                               if (numbers [i] == null)
-                                       mapping.Add (i.ToString (), i);
-                       }
-
                        return mapping;
                }
 
@@ -298,8 +301,8 @@ namespace System.Text.RegularExpressions.Syntax {
 
                                                switch (k) {
                                                case '?': min = 0; max = 1; break;
-                                               case '*': min = 0; max = 0xffff; break;
-                                               case '+': min = 1; max = 0xffff; break;
+                                               case '*': min = 0; max = 0x7fffffff; break;
+                                               case '+': min = 1; max = 0x7fffffff; break;
                                                }
                                        } else if (k == '{' && ptr + 1 < pattern.Length) {
                                                int saved_ptr = ptr;
@@ -526,7 +529,7 @@ namespace System.Text.RegularExpressions.Syntax {
                                }
                                else {                                          // capture test
                                        ++ ptr;
-                                       asn = new CaptureAssertion ();
+                                       asn = new CaptureAssertion (new Literal (name, IsIgnoreCase (options)));
                                        refs.Add (asn, name);
                                }
 
@@ -631,15 +634,13 @@ namespace System.Text.RegularExpressions.Syntax {
                }
 
                private Expression ParseCharacterClass (RegexOptions options) {
-                       bool negate, ecma;
+                       bool negate = false;
                        if (pattern[ptr] == '^') {
                                negate = true;
                                ++ ptr;
                        }
-                       else
-                               negate = false;
                        
-                       ecma = IsECMAScript (options);
+                       bool ecma = IsECMAScript (options);
                        CharacterClass cls = new CharacterClass (negate, IsIgnoreCase (options));
 
                        if (pattern[ptr] == ']') {
@@ -658,84 +659,65 @@ namespace System.Text.RegularExpressions.Syntax {
                                        closed = true;
                                        break;
                                }
-                               
-                               if (c == '-') {
+
+                               if (c == '-' && last >= 0 && !range) {
                                        range = true;
                                        continue;
                                }
 
                                if (c == '\\') {
                                        c = ParseEscape ();
-                                       if (c < 0) {
-                                               // didn't recognize escape
-
-                                               c = pattern[ptr ++];
-                                               switch (c) {
-                                               case 'b': c = '\b'; break;
-
-                                               case 'd':
-                                                       cls.AddCategory (ecma ? Category.EcmaDigit : Category.Digit, false);
-                                                       last = -1;
-                                                       continue;
-                                                       
-                                               case 'w':
-                                                       cls.AddCategory (ecma ? Category.EcmaWord : Category.Word, false);
-                                                       last = -1;
-                                                       continue;
-                                                       
-                                               case 's':
-                                                       cls.AddCategory (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, false);
-                                                       last = -1;
-                                                       continue;
-                                                       
-                                               case 'p':
-                                                       cls.AddCategory (ParseUnicodeCategory (), false);       // ignore ecma
-                                                       last = -1;
-                                                       continue;
-                                                       
-                                               case 'D':
-                                                       cls.AddCategory (ecma ? Category.EcmaDigit : Category.Digit, true);
-                                                       last = -1;
-                                                       continue;
-                                                       
-                                               case 'W':
-                                                       cls.AddCategory (ecma ? Category.EcmaWord : Category.Word, true);
-                                                       last = -1;
-                                                       continue;
-                                                       
-                                               case 'S':
-                                                       cls.AddCategory (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, true);
-                                                       last = -1;
-                                                       continue;
-                                                       
-                                               case 'P':
-                                                       cls.AddCategory (ParseUnicodeCategory (), true);
-                                                       last = -1;
-                                                       continue;
-
-                                               default: break;         // add escaped character
-                                               }
+                                       if (c >= 0)
+                                               goto char_recognized;
+
+                                       // didn't recognize escape
+                                       c = pattern [ptr ++];
+                                       switch (c) {
+                                       case 'b':
+                                               c = '\b';
+                                               goto char_recognized;
+
+                                       case 'd': case 'D':
+                                               cls.AddCategory (ecma ? Category.EcmaDigit : Category.Digit, c == 'D');
+                                               break;
+                                               
+                                       case 'w': case 'W':
+                                               cls.AddCategory (ecma ? Category.EcmaWord : Category.Word, c == 'W');
+                                               break;
+                                               
+                                       case 's': case 'S':
+                                               cls.AddCategory (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, c == 'S');
+                                               break;
+                                               
+                                       case 'p': case 'P':
+                                               cls.AddCategory (ParseUnicodeCategory (), c == 'P');    // ignore ecma
+                                               break;
+
+                                       default:                // add escaped character
+                                               goto char_recognized;
                                        }
+
+                                       // if the pattern looks like [a-\s] ...
+                                       if (range)
+                                               throw NewParseException ("character range cannot have category \\" + c);
+
+                                       last = -1;
+                                       continue;
                                }
 
+                       char_recognized:
                                if (range) {
+                                       // if 'range' is true, we know that 'last >= 0'
                                        if (c < last)
-                                               throw NewParseException ("[x-y] range in reverse order.");
-
-                                       if (last >=0 )
-                                               cls.AddRange ((char)last, (char)c);
-                                       else {
-                                               cls.AddCharacter ((char)c);
-                                               cls.AddCharacter ('-');
-                                       }
-
-                                       range = false;
+                                               throw NewParseException ("[" + last + "-" + c + "] range in reverse order.");
+                                       cls.AddRange ((char)last, (char)c);
                                        last = -1;
+                                       range = false;
+                                       continue;
                                }
-                               else {
-                                       cls.AddCharacter ((char)c);
-                                       last = c;
-                               }
+
+                               cls.AddCharacter ((char)c);
+                               last = c;
                        }
 
                        if (!closed)
@@ -779,8 +761,8 @@ namespace System.Text.RegularExpressions.Syntax {
 
                        /* check bounds and ordering */
 
-                       if (n >= 0xffff || m >= 0xffff)
-                               throw NewParseException ("Illegal {x, y} - maximum of 65535.");
+                       if (n > 0x7fffffff || m > 0x7fffffff)
+                               throw NewParseException ("Illegal {x, y} - maximum of 2147483647.");
                        if (m >= 0 && m < n)
                                throw NewParseException ("Illegal {x, y} with x > y.");
 
@@ -790,7 +772,7 @@ namespace System.Text.RegularExpressions.Syntax {
                        if (m > 0)
                                max = m;
                        else
-                               max = 0xffff;
+                               max = 0x7fffffff;
 
                        return true;
                }
@@ -1004,10 +986,6 @@ namespace System.Text.RegularExpressions.Syntax {
                        return Parser.ParseNumber (pattern, ref ptr, b, min, max);
                }
 
-               private int ParseDecimal () {
-                       return Parser.ParseDecimal (pattern, ref ptr);
-               }
-
                private static int ParseDigit (char c, int b, int n) {
                        switch (b) {
                        case 8:
@@ -1035,10 +1013,7 @@ namespace System.Text.RegularExpressions.Syntax {
                }
 
                private void ConsumeWhitespace (bool ignore) {
-                       while (true) {
-                               if (ptr >= pattern.Length)
-                                       break;
-                       
+                       while (ptr < pattern.Length) {
                                if (pattern[ptr] == '(') {
                                        if (ptr + 3 >= pattern.Length)
                                                return;
@@ -1047,7 +1022,7 @@ namespace System.Text.RegularExpressions.Syntax {
                                                return;
 
                                        ptr += 3;
-                                       while (pattern[ptr ++] != ')')
+                                       while (ptr < pattern.Length && pattern[ptr ++] != ')')
                                                /* ignore */ ;
                                }
                                else if (ignore && pattern[ptr] == '#') {
@@ -1122,6 +1097,8 @@ namespace System.Text.RegularExpressions.Syntax {
                        foreach (Expression expr in refs.Keys) {
                                string name = (string)refs[expr];
                                if (!dict.Contains (name)) {
+                                       if (expr is CaptureAssertion && !Char.IsDigit (name [0]))
+                                               continue;
                                        throw NewParseException ("Reference to undefined group " +
                                                (Char.IsDigit (name[0]) ? "number " : "name ") +
                                                name);
@@ -1159,10 +1136,6 @@ namespace System.Text.RegularExpressions.Syntax {
                        return (options & RegexOptions.IgnorePatternWhitespace) != 0;
                }
 
-               private static bool IsRightToLeft (RegexOptions options) {
-                       return (options & RegexOptions.RightToLeft) != 0;
-               }
-
                private static bool IsECMAScript (RegexOptions options) {
                        return (options & RegexOptions.ECMAScript) != 0;
                }