Final piece of explicit numeric group support
authorRaja R Harinath <harinath@hurrynot.org>
Wed, 19 Aug 2009 20:43:01 +0000 (20:43 -0000)
committerRaja R Harinath <harinath@hurrynot.org>
Wed, 19 Aug 2009 20:43:01 +0000 (20:43 -0000)
* parser.cs (gap): New.
(GetMapping): Return it.
(ResolveReferences): Collect explicit numeric groups which may not
match their indices.
(HandleExplicitNumericGroups): New.  Process the above list to
assign appropriate indices, and compute the correct value of 'gap'.

svn path=/trunk/mcs/; revision=140290

mcs/class/System/System.Text.RegularExpressions/ChangeLog
mcs/class/System/System.Text.RegularExpressions/parser.cs
mcs/class/System/Test/System.Text.RegularExpressions/ChangeLog
mcs/class/System/Test/System.Text.RegularExpressions/RegexMatchTests.cs

index 096d9ba7eae0ef19b9e97bd05e85c11b2e41fe15..a1026efc849a2cf3de752c0e4abc83fe0adad7bd 100644 (file)
@@ -1,3 +1,13 @@
+2009-08-20  Raja R Harinath  <harinath@hurrynot.org>
+
+       Final piece of explicit numeric group support
+       * parser.cs (gap): New.
+       (GetMapping): Return it.
+       (ResolveReferences): Collect explicit numeric groups which may not
+       match their indices.
+       (HandleExplicitNumericGroups): New.  Process the above list to
+       assign appropriate indices, and compute the correct value of 'gap'.
+
 2009-08-20  Raja R Harinath  <harinath@hurrynot.org>
 
        * syntax.cs (CapturingGroup.Index): Rename from Number to clarify
index 1c7b90006e6fb9b41bdf14a0ac9429e2ffe5e3fd..a96228c01a93e392f83787634ecc2a9a93f6e389 100644 (file)
@@ -164,7 +164,7 @@ namespace System.Text.RegularExpressions.Syntax {
                                mapping.Add (name, group.Index);
                        }
 
-                       return 1 + num_groups;
+                       return gap;
                }
 
                // private methods
@@ -1060,6 +1060,7 @@ namespace System.Text.RegularExpressions.Syntax {
                {
                        int gid = 1;
                        Hashtable dict = new Hashtable ();
+                       ArrayList explicit_numeric_groups = null;
 
                        // number unnamed groups
 
@@ -1081,6 +1082,11 @@ namespace System.Text.RegularExpressions.Syntax {
                                if (dict.Contains (group.Name)) {
                                        CapturingGroup prev = (CapturingGroup) dict [group.Name];
                                        group.Index = prev.Index;
+
+                                       if (group.Index == gid)
+                                               gid ++;
+                                       else if (group.Index > gid)
+                                               explicit_numeric_groups.Add (group);
                                        continue;
                                }
 
@@ -1088,10 +1094,19 @@ namespace System.Text.RegularExpressions.Syntax {
                                        int ptr = 0;
                                        int group_gid = ParseDecimal (group.Name, ref ptr);
                                        if (ptr == group.Name.Length) {
-                                               // FIXME: Handle non-contiguous groups
                                                group.Index = group_gid;
                                                dict.Add (group.Name, group);
                                                ++ num_groups;
+
+                                               if (group_gid == gid) {
+                                                       gid ++;
+                                               } else {
+                                                       // all numbers before 'gid' are already in the dictionary.  So, we know group_gid > gid
+                                                       if (explicit_numeric_groups == null)
+                                                               explicit_numeric_groups = new ArrayList (4);
+                                                       explicit_numeric_groups.Add (group);
+                                               }
+
                                                continue;
                                        }
                                }
@@ -1106,6 +1121,11 @@ namespace System.Text.RegularExpressions.Syntax {
                                ++ num_groups;
                        }
 
+                       gap = gid; // == 1 + num_groups, if explicit_numeric_groups == null
+
+                       if (explicit_numeric_groups != null)
+                               HandleExplicitNumericGroups (explicit_numeric_groups);
+
                        // resolve references
 
                        foreach (Expression expr in refs.Keys) {
@@ -1131,6 +1151,39 @@ namespace System.Text.RegularExpressions.Syntax {
                        }
                }
 
+               private void HandleExplicitNumericGroups (ArrayList explicit_numeric_groups)
+               {
+                       int gid = gap;
+                       int i = 0;
+                       int n_explicit = explicit_numeric_groups.Count;
+
+                       explicit_numeric_groups.Sort ();
+
+                       // move 'gap' forward to skip over all explicit groups that
+                       // turn out to match their index
+                       for (; i < n_explicit; ++i) {
+                               CapturingGroup g = (CapturingGroup) explicit_numeric_groups [i];
+                               if (g.Index > gid)
+                                       break;
+                               if (g.Index == gid)
+                                       gid ++;
+                       }
+
+                       gap = gid;
+
+                       // re-index all further groups so that the indexes are contiguous
+                       int prev = gid;
+                       for (; i < n_explicit; ++i) {
+                               CapturingGroup g = (CapturingGroup) explicit_numeric_groups [i];
+                               if (g.Index == prev) {
+                                       g.Index = gid - 1;
+                               } else {
+                                       prev = g.Index;
+                                       g.Index = gid ++;
+                               }
+                       }
+               }
+
                // flag helper functions
 
                private static bool IsIgnoreCase (RegexOptions options) {
@@ -1170,5 +1223,6 @@ namespace System.Text.RegularExpressions.Syntax {
                private ArrayList caps;
                private Hashtable refs;
                private int num_groups;
+               private int gap;
        }
 }
index fa2e8ec119bb0dff6b304bd26be1f5f35c85aa0c..533022cac68a2428fa1919fa4a5347609159479c 100644 (file)
@@ -1,3 +1,7 @@
+2009-08-20  Raja R Harinath  <harinath@hurrynot.org>
+
+       * RegexMatchTests.cs: Add some explicit numeric group tests.
+
 2009-08-17  Raja R Harinath  <harinath@hurrynot.org>
 
        * RegexMatchTests.cs (RegexTrial0061): New.
index a2b4feac50af0f4e9d55d0fc86595f5f40018512..351a8af003634099383ef383a642f8a9448ceef9 100644 (file)
@@ -159,6 +159,14 @@ namespace MonoTests.System.Text.RegularExpressions
                        new RegexTrial (@"\4400", RegexOptions.ECMAScript, "asdf$0012", "Pass. Group[0]=(4,3)"),//60
                        new RegexTrial (@"(?<2>ab)(?<c>c)(?<d>d)", RegexOptions.None, "abcd", "Pass. Group[0]=(0,4) Group[1]=(2,1) Group[2]=(0,2) Group[3]=(3,1)"),// 61
                        new RegexTrial (@"(?<1>ab)(c)", RegexOptions.None, "abc", "Pass. Group[0]=(0,3) Group[1]=(0,2)(2,1)"),//62
+                       new RegexTrial (@"(?<44>a)", RegexOptions.None, "a", "Pass. Group[0]=(0,1) Group[44]=(0,1)"),//63
+                       new RegexTrial (@"(?<44>a)(?<8>b)", RegexOptions.None, "ab", "Pass. Group[0]=(0,2) Group[8]=(1,1) Group[44]=(0,1)"),//64
+                       new RegexTrial (@"(?<44>a)(?<8>b)(?<1>c)(d)", RegexOptions.None, "abcd", "Pass. Group[0]=(0,4) Group[1]=(2,1)(3,1) Group[8]=(1,1) Group[44]=(0,1)"),//65
+                       new RegexTrial (@"(?<44>a)(?<44>b)", RegexOptions.None, "ab", "Pass. Group[0]=(0,2) Group[44]=(0,1)(1,1)"),//66
+                       new RegexTrial (@"(?<44>a)\440", RegexOptions.None, "a ", "Pass. Group[0]=(0,2) Group[44]=(0,1)"),//67
+                       new RegexTrial (@"(?<44>a)\440", RegexOptions.ECMAScript, "a ", "Fail."),//68
+                       new RegexTrial (@"(?<44>a)\440", RegexOptions.None, "aa0", "Fail."),//69
+                       new RegexTrial (@"(?<44>a)\440", RegexOptions.ECMAScript, "aa0", "Pass. Group[0]=(0,3) Group[44]=(0,1)"),//70
                };
 
                [Test]
@@ -340,5 +348,13 @@ namespace MonoTests.System.Text.RegularExpressions
                [Test]  public void RegexTrial0060 () { trials [60].Execute (); }
                [Test]  public void RegexTrial0061 () { trials [61].Execute (); }
                [Test]  public void RegexTrial0062 () { trials [62].Execute (); }
+               [Test]  public void RegexTrial0063 () { trials [63].Execute (); }
+               [Test]  public void RegexTrial0064 () { trials [64].Execute (); }
+               [Test]  public void RegexTrial0065 () { trials [65].Execute (); }
+               [Test]  public void RegexTrial0066 () { trials [66].Execute (); }
+               [Test]  public void RegexTrial0067 () { trials [67].Execute (); }
+               [Test]  public void RegexTrial0068 () { trials [68].Execute (); }
+               [Test]  public void RegexTrial0069 () { trials [69].Execute (); }
+               [Test]  public void RegexTrial0070 () { trials [70].Execute (); }
        }
 }