regexp backreferences: Implement fallback to octal numbers, and ECMAScript semantics.
authorRaja R Harinath <harinath@hurrynot.org>
Mon, 10 Aug 2009 16:25:10 +0000 (16:25 -0000)
committerRaja R Harinath <harinath@hurrynot.org>
Mon, 10 Aug 2009 16:25:10 +0000 (16:25 -0000)
* syntax.cs (BackslashNumber.ResolveReference): Implement fallback
to octal numbers, and ECMAScript semantics.
* parser.cs (ResolveReferences): Use it.
* RegexMatchTests.cs (RegexTrial0054..60): New.

svn path=/trunk/mcs/; revision=139657

mcs/class/System/System.Text.RegularExpressions/ChangeLog
mcs/class/System/System.Text.RegularExpressions/parser.cs
mcs/class/System/System.Text.RegularExpressions/syntax.cs
mcs/class/System/Test/System.Text.RegularExpressions/ChangeLog
mcs/class/System/Test/System.Text.RegularExpressions/RegexMatchTests.cs

index e6e83332e02c9009ddaccb22880b0f9fca5d0c3d..c71706ec268a53272b671945e278a593b558c728 100644 (file)
@@ -1,3 +1,9 @@
+2009-08-10  Raja R Harinath  <harinath@hurrynot.org>
+
+       * syntax.cs (BackslashNumber.ResolveReference): Implement fallback
+       to octal numbers, and ECMAScript semantics.
+       * parser.cs (ResolveReferences): Use it.
+
 2009-08-10  Raja R Harinath  <harinath@hurrynot.org>
 
        * syntax.cs (BackslashNumber): New class.
index ee115c175568ce8955eb7bedcfd1695f3890ebaa..96c2025ca31c3eb4f97fff5eb8002d87c39be3f2 100644 (file)
@@ -1102,6 +1102,9 @@ namespace System.Text.RegularExpressions.Syntax {
                                if (!dict.Contains (name)) {
                                        if (expr is CaptureAssertion && !Char.IsDigit (name [0]))
                                                continue;
+                                       BackslashNumber bn = expr as BackslashNumber;
+                                       if (bn != null && bn.ResolveReference (name, dict))
+                                               continue;
                                        throw NewParseException ("Reference to undefined group " +
                                                (Char.IsDigit (name[0]) ? "number " : "name ") +
                                                name);
index d9cc4a36141f0531a1d8404678694924966e3de8..3b4b717dc7c95941d10b7c8ca6af4f8c62d0a9f5 100644 (file)
@@ -702,7 +702,8 @@ namespace System.Text.RegularExpressions.Syntax {
                        set { ignore = value; }
                }
 
-               public override void Compile (ICompiler cmp, bool reverse) {
+               public static void CompileLiteral (string str, ICompiler cmp, bool ignore, bool reverse)
+               {
                        if (str.Length == 0)
                                return;
 
@@ -712,6 +713,11 @@ namespace System.Text.RegularExpressions.Syntax {
                                cmp.EmitString (str, ignore, reverse);
                }
 
+               public override void Compile (ICompiler cmp, bool reverse)
+               {
+                       CompileLiteral (str, cmp, ignore, reverse);
+               }
+
                public override void GetWidth (out int min, out int max) {
                        min = max = str.Length;
                }
@@ -806,6 +812,49 @@ namespace System.Text.RegularExpressions.Syntax {
                {
                        this.ecma = ecma;
                }
+
+               // Precondition: groups [num_str] == null
+               public bool ResolveReference (string num_str, Hashtable groups)
+               {
+                       if (ecma) {
+                               int i;
+                               for (i = 1; i < num_str.Length; ++i) {
+                                       string name = num_str.Substring (0, i);
+                                       CapturingGroup group = (CapturingGroup) groups [name];
+                                       if (group == null)
+                                               break;
+                                       CapturingGroup = group;
+                               }
+                               if (i > 1) {
+                                       literal = num_str.Substring (i - 1);
+                                       return true;
+                               }
+                       } else {
+                               if (num_str.Length == 1)
+                                       return false;
+                       }
+
+                       int ptr = 0;
+                       int as_octal = Parser.ParseOctal (num_str, ref ptr);
+                       // Since ParseOctal reads at most 3 digits, as_octal <= octal 0777
+                       if (as_octal == -1)
+                               return false;
+                       if (as_octal > 0xff && ecma) {
+                               as_octal /= 8;
+                               --ptr;
+                       }
+                       as_octal &= 0xff;
+                       literal = ((char) as_octal) + num_str.Substring (ptr);
+                       return true;
+               }
+
+               public override void Compile (ICompiler cmp, bool reverse)
+               {
+                       if (CapturingGroup != null)
+                               base.Compile (cmp, reverse);
+                       if (literal != null)
+                               Literal.CompileLiteral (literal, cmp, IgnoreCase, reverse);
+               }
        }
 
        class CharacterClass : Expression {
index 877ff8241cd8c7ede34f6083254feffd690f372b..0c0cf3648d48ee4dee3ce382c9c5e407ebc22977 100644 (file)
@@ -1,3 +1,7 @@
+2009-08-10  Raja R Harinath  <harinath@hurrynot.org>
+
+       * RegexMatchTests.cs (RegexTrial0054..60): New.
+
 2009-08-10  Raja R Harinath  <harinath@hurrynot.org>
 
        * RegexMatchTests.cs (RegexTrial0053): New.
index 53a00f80442e2066bacf5722ee9ed0bc2d4020cc..e9aeb72b61b244762165c6b639270da9afd8da48 100644 (file)
@@ -150,6 +150,13 @@ namespace MonoTests.System.Text.RegularExpressions
                        new RegexTrial (@"(?>a*).", RegexOptions.ExplicitCapture, "aaaa", "Fail."),//52
 
                        new RegexTrial (@"(?<ab>ab)c\1", RegexOptions.None, "abcabc", "Pass. Group[0]=(0,5) Group[1]=(0,2)"),//53
+                       new RegexTrial (@"\1", RegexOptions.ECMAScript, "-", "Fail."),//54
+                       new RegexTrial (@"\2", RegexOptions.ECMAScript, "-", "Fail."),//55
+                       new RegexTrial (@"(a)|\2", RegexOptions.ECMAScript, "-", "Fail."),//56
+                       new RegexTrial (@"\4400", RegexOptions.None, "asdf 012", "Pass. Group[0]=(4,2)"),//57
+                       new RegexTrial (@"\4400", RegexOptions.ECMAScript, "asdf 012", "Fail."),//58
+                       new RegexTrial (@"\4400", RegexOptions.None, "asdf$0012", "Fail."),//59
+                       new RegexTrial (@"\4400", RegexOptions.ECMAScript, "asdf$0012", "Pass. Group[0]=(4,3)"),//60
                };
 
                [Test]
@@ -322,5 +329,12 @@ namespace MonoTests.System.Text.RegularExpressions
                [Test]  public void RegexJvmTrial0052 () { trials [52].Execute (); }
 
                [Test]  public void RegexTrial0053 () { trials [53].Execute (); }
+               [Test]  public void RegexTrial0054 () { trials [54].Execute (); }
+               [Test]  public void RegexTrial0055 () { trials [55].Execute (); }
+               [Test]  public void RegexTrial0056 () { trials [56].Execute (); }
+               [Test]  public void RegexTrial0057 () { trials [57].Execute (); }
+               [Test]  public void RegexTrial0058 () { trials [58].Execute (); }
+               [Test]  public void RegexTrial0059 () { trials [59].Execute (); }
+               [Test]  public void RegexTrial0060 () { trials [60].Execute (); }
        }
 }