Bug 10670 fix.
[mono.git] / mcs / class / System / System.Text.RegularExpressions / parser.cs
index 80ce31eb301bcdddfd638653f56d2507cfd67d2b..f3ddf196730bb8879cbdc0dd8fc5a82829c91486 100644 (file)
@@ -115,6 +115,8 @@ namespace System.Text.RegularExpressions.Syntax {
                }
 
                public static string Unescape (string str) {
+                       if (str.IndexOf ('\\') == -1)
+                               return str;
                        return new Parser ().ParseString (str);
                }
 
@@ -147,24 +149,25 @@ namespace System.Text.RegularExpressions.Syntax {
                        }
                }
 
-               public IDictionary GetMapping () {
+               public IDictionary GetMapping ()
+               {
                        Hashtable mapping = new Hashtable ();
-                       Hashtable numbers = new Hashtable ();
                        int end = caps.Count;
                        mapping.Add ("0", 0);
                        for (int i = 0; i < end; i++) {
                                CapturingGroup group = (CapturingGroup) caps [i];
-                               if (group.Name != null && !mapping.Contains (group.Name)) {
+                               if (group.Name != null) {
+                                       if (mapping.Contains (group.Name)) {
+                                               if ((int) mapping [group.Name] != group.Number)
+                                                       throw new SystemException ("invalid state");
+                                               continue;
+                                       }
                                        mapping.Add (group.Name, group.Number);
-                                       numbers.Add (group.Number, group.Number);
+                               } else {
+                                       mapping.Add (group.Number.ToString (), group.Number);
                                }
                        }
 
-                       for (int i = 1; i < end; i++) {
-                               if (numbers [i] == null)
-                                       mapping.Add (i.ToString (), i);
-                       }
-
                        return mapping;
                }
 
@@ -287,20 +290,29 @@ namespace System.Text.RegularExpressions.Syntax {
                                
                                if (ptr < pattern.Length) {
                                        char k = pattern[ptr];
+                                       int min = 0, max = 0;
+                                       bool lazy = false;
+                                       bool haveRep = false;
 
-                                       if (k == '?' || k == '*' || k == '+' || k == '{') {
-                                               ++ ptr;
 
-                                               int min = 0, max = 0;
-                                               bool lazy = false;
+                                       if (k == '?' || k == '*' || k == '+') {
+                                               ++ ptr;
+                                               haveRep = true;
 
                                                switch (k) {
                                                case '?': min = 0; max = 1; break;
-                                               case '*': min = 0; max = 0xffff; break;
-                                               case '+': min = 1; max = 0xffff; break;
-                                               case '{': ParseRepetitionBounds (out min, out max, options); break;
+                                               case '*': min = 0; max = 0x7fffffff; break;
+                                               case '+': min = 1; max = 0x7fffffff; break;
                                                }
+                                       } else if (k == '{' && ptr + 1 < pattern.Length) {
+                                               int saved_ptr = ptr;
+                                               ++ptr;
+                                               haveRep = ParseRepetitionBounds (out min, out max, options);
+                                               if (!haveRep)
+                                                       ptr = saved_ptr;
+                                       }
 
+                                       if (haveRep) {
                                                ConsumeWhitespace (IsIgnorePatternWhitespace (options));
                                                if (ptr < pattern.Length && pattern[ptr] == '?') {
                                                        ++ ptr;
@@ -517,7 +529,7 @@ namespace System.Text.RegularExpressions.Syntax {
                                }
                                else {                                          // capture test
                                        ++ ptr;
-                                       asn = new CaptureAssertion ();
+                                       asn = new CaptureAssertion (new Literal (name, IsIgnoreCase (options)));
                                        refs.Add (asn, name);
                                }
 
@@ -622,15 +634,13 @@ namespace System.Text.RegularExpressions.Syntax {
                }
 
                private Expression ParseCharacterClass (RegexOptions options) {
-                       bool negate, ecma;
+                       bool negate = false;
                        if (pattern[ptr] == '^') {
                                negate = true;
                                ++ ptr;
                        }
-                       else
-                               negate = false;
                        
-                       ecma = IsECMAScript (options);
+                       bool ecma = IsECMAScript (options);
                        CharacterClass cls = new CharacterClass (negate, IsIgnoreCase (options));
 
                        if (pattern[ptr] == ']') {
@@ -649,84 +659,65 @@ namespace System.Text.RegularExpressions.Syntax {
                                        closed = true;
                                        break;
                                }
-                               
-                               if (c == '-') {
+
+                               if (c == '-' && last >= 0 && !range) {
                                        range = true;
                                        continue;
                                }
 
                                if (c == '\\') {
                                        c = ParseEscape ();
-                                       if (c < 0) {
-                                               // didn't recognize escape
-
-                                               c = pattern[ptr ++];
-                                               switch (c) {
-                                               case 'b': c = '\b'; break;
-
-                                               case 'd':
-                                                       cls.AddCategory (ecma ? Category.EcmaDigit : Category.Digit, false);
-                                                       last = -1;
-                                                       continue;
-                                                       
-                                               case 'w':
-                                                       cls.AddCategory (ecma ? Category.EcmaWord : Category.Word, false);
-                                                       last = -1;
-                                                       continue;
-                                                       
-                                               case 's':
-                                                       cls.AddCategory (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, false);
-                                                       last = -1;
-                                                       continue;
-                                                       
-                                               case 'p':
-                                                       cls.AddCategory (ParseUnicodeCategory (), false);       // ignore ecma
-                                                       last = -1;
-                                                       continue;
-                                                       
-                                               case 'D':
-                                                       cls.AddCategory (ecma ? Category.EcmaDigit : Category.Digit, true);
-                                                       last = -1;
-                                                       continue;
-                                                       
-                                               case 'W':
-                                                       cls.AddCategory (ecma ? Category.EcmaWord : Category.Word, true);
-                                                       last = -1;
-                                                       continue;
-                                                       
-                                               case 'S':
-                                                       cls.AddCategory (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, true);
-                                                       last = -1;
-                                                       continue;
-                                                       
-                                               case 'P':
-                                                       cls.AddCategory (ParseUnicodeCategory (), true);
-                                                       last = -1;
-                                                       continue;
-
-                                               default: break;         // add escaped character
-                                               }
+                                       if (c >= 0)
+                                               goto char_recognized;
+
+                                       // didn't recognize escape
+                                       c = pattern [ptr ++];
+                                       switch (c) {
+                                       case 'b':
+                                               c = '\b';
+                                               goto char_recognized;
+
+                                       case 'd': case 'D':
+                                               cls.AddCategory (ecma ? Category.EcmaDigit : Category.Digit, c == 'D');
+                                               break;
+                                               
+                                       case 'w': case 'W':
+                                               cls.AddCategory (ecma ? Category.EcmaWord : Category.Word, c == 'W');
+                                               break;
+                                               
+                                       case 's': case 'S':
+                                               cls.AddCategory (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, c == 'S');
+                                               break;
+                                               
+                                       case 'p': case 'P':
+                                               cls.AddCategory (ParseUnicodeCategory (), c == 'P');    // ignore ecma
+                                               break;
+
+                                       default:                // add escaped character
+                                               goto char_recognized;
                                        }
+
+                                       // if the pattern looks like [a-\s] ...
+                                       if (range)
+                                               throw NewParseException ("character range cannot have category \\" + c);
+
+                                       last = -1;
+                                       continue;
                                }
 
+                       char_recognized:
                                if (range) {
+                                       // if 'range' is true, we know that 'last >= 0'
                                        if (c < last)
-                                               throw NewParseException ("[x-y] range in reverse order.");
-
-                                       if (last >=0 )
-                                               cls.AddRange ((char)last, (char)c);
-                                       else {
-                                               cls.AddCharacter ((char)c);
-                                               cls.AddCharacter ('-');
-                                       }
-
-                                       range = false;
+                                               throw NewParseException ("[" + last + "-" + c + "] range in reverse order.");
+                                       cls.AddRange ((char)last, (char)c);
                                        last = -1;
+                                       range = false;
+                                       continue;
                                }
-                               else {
-                                       cls.AddCharacter ((char)c);
-                                       last = c;
-                               }
+
+                               cls.AddCharacter ((char)c);
+                               last = c;
                        }
 
                        if (!closed)
@@ -738,8 +729,9 @@ namespace System.Text.RegularExpressions.Syntax {
                        return cls;
                }
 
-               private void ParseRepetitionBounds (out int min, out int max, RegexOptions options) {
+               private bool ParseRepetitionBounds (out int min, out int max, RegexOptions options) {
                        int n, m;
+                       min = max = 0;
 
                        /* check syntax */
 
@@ -761,16 +753,16 @@ namespace System.Text.RegularExpressions.Syntax {
                                m = ParseNumber (10, 1, 0);
                                ConsumeWhitespace (IsIgnorePatternWhitespace (options));
                                if (pattern[ptr ++] != '}')
-                                       throw NewParseException ("Illegal {x,y} - bad value of y.");
+                                       return false;
                                break;
                        default:
-                               throw NewParseException ("Illegal {x,y}");
+                               return false;
                        }
 
                        /* check bounds and ordering */
 
-                       if (n >= 0xffff || m >= 0xffff)
-                               throw NewParseException ("Illegal {x, y} - maximum of 65535.");
+                       if (n > 0x7fffffff || m > 0x7fffffff)
+                               throw NewParseException ("Illegal {x, y} - maximum of 2147483647.");
                        if (m >= 0 && m < n)
                                throw NewParseException ("Illegal {x, y} with x > y.");
 
@@ -780,7 +772,9 @@ namespace System.Text.RegularExpressions.Syntax {
                        if (m > 0)
                                max = m;
                        else
-                               max = 0xffff;
+                               max = 0x7fffffff;
+
+                       return true;
                }
 
                private Category ParseUnicodeCategory () {
@@ -930,6 +924,13 @@ namespace System.Text.RegularExpressions.Syntax {
                        // character codes
 
                        case '0':
+                               //
+                               // Turns out that octal values can be specified
+                               // without a leading zero.   But also the limit
+                               // of three character should include this first
+                               // one.  
+                               //
+                               ptr--;
                                int prevptr = ptr;
                                int result = ParseOctal (pattern, ref ptr);
                                if (result == -1 && prevptr == ptr)
@@ -985,10 +986,6 @@ namespace System.Text.RegularExpressions.Syntax {
                        return Parser.ParseNumber (pattern, ref ptr, b, min, max);
                }
 
-               private int ParseDecimal () {
-                       return Parser.ParseDecimal (pattern, ref ptr);
-               }
-
                private static int ParseDigit (char c, int b, int n) {
                        switch (b) {
                        case 8:
@@ -1016,10 +1013,7 @@ namespace System.Text.RegularExpressions.Syntax {
                }
 
                private void ConsumeWhitespace (bool ignore) {
-                       while (true) {
-                               if (ptr >= pattern.Length)
-                                       break;
-                       
+                       while (ptr < pattern.Length) {
                                if (pattern[ptr] == '(') {
                                        if (ptr + 3 >= pattern.Length)
                                                return;
@@ -1028,7 +1022,7 @@ namespace System.Text.RegularExpressions.Syntax {
                                                return;
 
                                        ptr += 3;
-                                       while (pattern[ptr ++] != ')')
+                                       while (ptr < pattern.Length && pattern[ptr ++] != ')')
                                                /* ignore */ ;
                                }
                                else if (ignore && pattern[ptr] == '#') {
@@ -1103,6 +1097,8 @@ namespace System.Text.RegularExpressions.Syntax {
                        foreach (Expression expr in refs.Keys) {
                                string name = (string)refs[expr];
                                if (!dict.Contains (name)) {
+                                       if (expr is CaptureAssertion && !Char.IsDigit (name [0]))
+                                               continue;
                                        throw NewParseException ("Reference to undefined group " +
                                                (Char.IsDigit (name[0]) ? "number " : "name ") +
                                                name);
@@ -1140,10 +1136,6 @@ namespace System.Text.RegularExpressions.Syntax {
                        return (options & RegexOptions.IgnorePatternWhitespace) != 0;
                }
 
-               private static bool IsRightToLeft (RegexOptions options) {
-                       return (options & RegexOptions.RightToLeft) != 0;
-               }
-
                private static bool IsECMAScript (RegexOptions options) {
                        return (options & RegexOptions.ECMAScript) != 0;
                }