Bug 10670 fix.
[mono.git] / mcs / class / System / System.Text.RegularExpressions / parser.cs
index 74d718494c20a0be6dea0210f100eee2adbaa2be..f3ddf196730bb8879cbdc0dd8fc5a82829c91486 100644 (file)
@@ -149,24 +149,25 @@ namespace System.Text.RegularExpressions.Syntax {
                        }
                }
 
-               public IDictionary GetMapping () {
+               public IDictionary GetMapping ()
+               {
                        Hashtable mapping = new Hashtable ();
-                       Hashtable numbers = new Hashtable ();
                        int end = caps.Count;
                        mapping.Add ("0", 0);
                        for (int i = 0; i < end; i++) {
                                CapturingGroup group = (CapturingGroup) caps [i];
-                               if (group.Name != null && !mapping.Contains (group.Name)) {
+                               if (group.Name != null) {
+                                       if (mapping.Contains (group.Name)) {
+                                               if ((int) mapping [group.Name] != group.Number)
+                                                       throw new SystemException ("invalid state");
+                                               continue;
+                                       }
                                        mapping.Add (group.Name, group.Number);
-                                       numbers.Add (group.Number, group.Number);
+                               } else {
+                                       mapping.Add (group.Number.ToString (), group.Number);
                                }
                        }
 
-                       for (int i = 1; i < end; i++) {
-                               if (numbers [i] == null)
-                                       mapping.Add (i.ToString (), i);
-                       }
-
                        return mapping;
                }
 
@@ -300,8 +301,8 @@ namespace System.Text.RegularExpressions.Syntax {
 
                                                switch (k) {
                                                case '?': min = 0; max = 1; break;
-                                               case '*': min = 0; max = 0xffff; break;
-                                               case '+': min = 1; max = 0xffff; break;
+                                               case '*': min = 0; max = 0x7fffffff; break;
+                                               case '+': min = 1; max = 0x7fffffff; break;
                                                }
                                        } else if (k == '{' && ptr + 1 < pattern.Length) {
                                                int saved_ptr = ptr;
@@ -528,7 +529,7 @@ namespace System.Text.RegularExpressions.Syntax {
                                }
                                else {                                          // capture test
                                        ++ ptr;
-                                       asn = new CaptureAssertion ();
+                                       asn = new CaptureAssertion (new Literal (name, IsIgnoreCase (options)));
                                        refs.Add (asn, name);
                                }
 
@@ -633,15 +634,13 @@ namespace System.Text.RegularExpressions.Syntax {
                }
 
                private Expression ParseCharacterClass (RegexOptions options) {
-                       bool negate, ecma;
+                       bool negate = false;
                        if (pattern[ptr] == '^') {
                                negate = true;
                                ++ ptr;
                        }
-                       else
-                               negate = false;
                        
-                       ecma = IsECMAScript (options);
+                       bool ecma = IsECMAScript (options);
                        CharacterClass cls = new CharacterClass (negate, IsIgnoreCase (options));
 
                        if (pattern[ptr] == ']') {
@@ -660,83 +659,65 @@ namespace System.Text.RegularExpressions.Syntax {
                                        closed = true;
                                        break;
                                }
-                               
-                               if (c == '-') {
+
+                               if (c == '-' && last >= 0 && !range) {
                                        range = true;
                                        continue;
                                }
 
                                if (c == '\\') {
                                        c = ParseEscape ();
-                                       if (c < 0) {
-                                               // didn't recognize escape
-
-                                               c = pattern[ptr ++];
-                                               switch (c) {
-                                               case 'b': c = '\b'; break;
-
-                                               case 'd':
-                                                       cls.AddCategory (ecma ? Category.EcmaDigit : Category.Digit, false);
-                                                       last = -1;
-                                                       continue;
-                                                       
-                                               case 'w':
-                                                       cls.AddCategory (ecma ? Category.EcmaWord : Category.Word, false);
-                                                       last = -1;
-                                                       continue;
-                                                       
-                                               case 's':
-                                                       cls.AddCategory (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, false);
-                                                       last = -1;
-                                                       continue;
-                                                       
-                                               case 'p':
-                                                       cls.AddCategory (ParseUnicodeCategory (), false);       // ignore ecma
-                                                       last = -1;
-                                                       continue;
-                                                       
-                                               case 'D':
-                                                       cls.AddCategory (ecma ? Category.EcmaDigit : Category.Digit, true);
-                                                       last = -1;
-                                                       continue;
-                                                       
-                                               case 'W':
-                                                       cls.AddCategory (ecma ? Category.EcmaWord : Category.Word, true);
-                                                       last = -1;
-                                                       continue;
-                                                       
-                                               case 'S':
-                                                       cls.AddCategory (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, true);
-                                                       last = -1;
-                                                       continue;
-                                                       
-                                               case 'P':
-                                                       cls.AddCategory (ParseUnicodeCategory (), true);
-                                                       last = -1;
-                                                       continue;
-
-                                               default: break;         // add escaped character
-                                               }
+                                       if (c >= 0)
+                                               goto char_recognized;
+
+                                       // didn't recognize escape
+                                       c = pattern [ptr ++];
+                                       switch (c) {
+                                       case 'b':
+                                               c = '\b';
+                                               goto char_recognized;
+
+                                       case 'd': case 'D':
+                                               cls.AddCategory (ecma ? Category.EcmaDigit : Category.Digit, c == 'D');
+                                               break;
+                                               
+                                       case 'w': case 'W':
+                                               cls.AddCategory (ecma ? Category.EcmaWord : Category.Word, c == 'W');
+                                               break;
+                                               
+                                       case 's': case 'S':
+                                               cls.AddCategory (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, c == 'S');
+                                               break;
+                                               
+                                       case 'p': case 'P':
+                                               cls.AddCategory (ParseUnicodeCategory (), c == 'P');    // ignore ecma
+                                               break;
+
+                                       default:                // add escaped character
+                                               goto char_recognized;
                                        }
+
+                                       // if the pattern looks like [a-\s] ...
+                                       if (range)
+                                               throw NewParseException ("character range cannot have category \\" + c);
+
+                                       last = -1;
+                                       continue;
                                }
 
+                       char_recognized:
                                if (range) {
+                                       // if 'range' is true, we know that 'last >= 0'
                                        if (c < last)
-                                               throw NewParseException ("[x-y] range in reverse order.");
-
-                                       if (last >= 0) {
-                                               cls.AddRange ((char)last, (char)c);
-                                               last = -1;
-                                       } else {
-                                               cls.AddCharacter ('-');
-                                               cls.AddCharacter ((char)c);
-                                               last = c;
-                                       }
+                                               throw NewParseException ("[" + last + "-" + c + "] range in reverse order.");
+                                       cls.AddRange ((char)last, (char)c);
+                                       last = -1;
                                        range = false;
-                               } else {
-                                       cls.AddCharacter ((char)c);
-                                       last = c;
+                                       continue;
                                }
+
+                               cls.AddCharacter ((char)c);
+                               last = c;
                        }
 
                        if (!closed)
@@ -780,8 +761,8 @@ namespace System.Text.RegularExpressions.Syntax {
 
                        /* check bounds and ordering */
 
-                       if (n >= 0xffff || m >= 0xffff)
-                               throw NewParseException ("Illegal {x, y} - maximum of 65535.");
+                       if (n > 0x7fffffff || m > 0x7fffffff)
+                               throw NewParseException ("Illegal {x, y} - maximum of 2147483647.");
                        if (m >= 0 && m < n)
                                throw NewParseException ("Illegal {x, y} with x > y.");
 
@@ -791,7 +772,7 @@ namespace System.Text.RegularExpressions.Syntax {
                        if (m > 0)
                                max = m;
                        else
-                               max = 0xffff;
+                               max = 0x7fffffff;
 
                        return true;
                }
@@ -1116,6 +1097,8 @@ namespace System.Text.RegularExpressions.Syntax {
                        foreach (Expression expr in refs.Keys) {
                                string name = (string)refs[expr];
                                if (!dict.Contains (name)) {
+                                       if (expr is CaptureAssertion && !Char.IsDigit (name [0]))
+                                               continue;
                                        throw NewParseException ("Reference to undefined group " +
                                                (Char.IsDigit (name[0]) ? "number " : "name ") +
                                                name);