2008-08-30 Zoltan Varga <vargaz@gmail.com>
[mono.git] / mcs / class / System / System.Text.RegularExpressions / NamingGroupsConstruct.jvm.cs
1 //
2 // NamingGroupsConstruct.jvm.cs
3 //
4 // Author:
5 //      Arina Itkes  <arinai@mainsoft.com>
6 //
7 // Copyright (C) 2007 Mainsoft, Inc.
8 //
9
10 //
11 // Permission is hereby granted, free of charge, to any person obtaining
12 // a copy of this software and associated documentation files (the
13 // "Software"), to deal in the Software without restriction, including
14 // without limitation the rights to use, copy, modify, merge, publish,
15 // distribute, sublicense, and/or sell copies of the Software, and to
16 // permit persons to whom the Software is furnished to do so, subject to
17 // the following conditions:
18 // 
19 // The above copyright notice and this permission notice shall be
20 // included in all copies or substantial portions of the Software.
21 // 
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
26 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
27 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30
31
32 using System;
33 using System.Collections;
34 using System.Collections.Generic;
35 using System.Text;
36 using java.util.regex;
37 using java.lang;
38
39 namespace System.Text.RegularExpressions
40 {
41         sealed class NamingGroupsConstruct : IConstructType
42         {
43                 private const string NAMED_GROUP_PATTERN_1 = @"\(\?<[A-Za-z]\w*>.*\)";
44                 private const string NAMED_GROUP_PATTERN_2 = @"\(\?'[A-Za-z]\w*'.*\)";
45                 private const string NUMBERED_GROUP_PATTERN_1 = @"\(\?<\d+>.*\)";
46                 private const string NUMBERED_GROUP_PATTERN_2 = @"\(\?'\d+'.*\)";
47                 private const string LEFT_PAREN = @"\(";
48                 private const string ESCAPED_LEFT_PAREN_TEMPL = @"(?<=(?:[^\\]|\A)(?:[\\]{2}){0,1073741823})\\\(";
49                 private const string NON_CAPTURED_GROUP_PATTERN = @"(?:^\?[:imnsx=!>-]|^\?<[!=])";
50                 private const string NAMED_GROUP_PATTERN1 = @"^\?<([A-Za-z]\w*)>";
51                 private const string NAMED_GROUP_PATTERN2 = @"^\?'([A-Za-z]\w*)'";
52                 private const string NUMBERED_GROUP_PATTERN1 = @"^\?<(\d+)>";
53                 private const string NUMBERED_GROUP_PATTERN2 = @"^\?'(\d+)'";
54                 private const string QUESTION = "?";
55                 private const string REMOVED_NAME_PATTERN_TEMPL1 = @"(?<=(?:[^\\]|\A)(?:[\\]{2}){0,1073741823}\()\?<[A-Za-z]\w*>";
56                 private const string REMOVED_NAME_PATTERN_TEMPL2 = @"(?<=(?:[^\\]|\A)(?:[\\]{2}){0,1073741823}\()\?'[A-Za-z]\w*'";
57                 private const string REMOVED_NUMBERED_PATTERN_TEMPL1 = @"(?<=(?:[^\\]|\A)(?:[\\]{2}){0,1073741823}\()\?<\d+>";
58                 private const string REMOVED_NUMBERED_PATTERN_TEMPL2 = @"(?<=(?:[^\\]|\A)(?:[\\]{2}){0,1073741823}\()\?'\d+'";
59
60
61                 public bool HasConstruct (string pattern, RegexOptions options) {
62                         if (JavaUtils.IsMatch (pattern, NAMED_GROUP_PATTERN_1)) {
63                                 return true;
64                         }
65                         if (JavaUtils.IsMatch (pattern, NAMED_GROUP_PATTERN_2)) {
66                                 return true;
67                         }
68                         if (JavaUtils.IsMatch (pattern, NUMBERED_GROUP_PATTERN_1)) {
69                                 return true;
70                         }
71                         if (JavaUtils.IsMatch (pattern, NUMBERED_GROUP_PATTERN_2)) {
72                                 return true;
73                         }
74                         return false;
75                 }
76
77
78                 public string Reformat (RegexOptions options,
79                         string reformattedPattern,
80                         PatternGrouping patternGrouping) {
81                         if (!HasConstruct (reformattedPattern, options)) {
82                                 return reformattedPattern;
83                         }
84
85                         UpdateGroupMapping (reformattedPattern, options, patternGrouping);
86
87                         return ReformatPattern (reformattedPattern);
88                 }
89
90                 private static string ReformatPattern (string reformattedPattern) {
91                         //Reformat pattern
92                         reformattedPattern = JavaUtils.ReplaceAll (reformattedPattern, REMOVED_NAME_PATTERN_TEMPL1, String.Empty);
93                         reformattedPattern = JavaUtils.ReplaceAll (reformattedPattern, REMOVED_NAME_PATTERN_TEMPL2, String.Empty);
94                         reformattedPattern = JavaUtils.ReplaceAll (reformattedPattern, REMOVED_NUMBERED_PATTERN_TEMPL1, String.Empty);
95                         reformattedPattern = JavaUtils.ReplaceAll (reformattedPattern, REMOVED_NUMBERED_PATTERN_TEMPL2, String.Empty);
96
97                         return reformattedPattern;
98                 }
99
100                 private static void UpdateGroupMapping (string reformattedPattern,
101                         RegexOptions options,
102                         PatternGrouping patternGrouping) {
103                 
104                         CharSequence workString = (CharSequence) (object) JavaUtils.ReplaceAll (reformattedPattern, ESCAPED_LEFT_PAREN_TEMPL, String.Empty);
105
106                         //Split pattern by left parenthesis
107                         Pattern p = Pattern.compile (LEFT_PAREN);
108                         string [] parts = p.split (workString);
109
110                         Pattern nonCapturedGroupPattern = Pattern.compile (NON_CAPTURED_GROUP_PATTERN);
111                         Pattern groupNamePattern1 = Pattern.compile (NAMED_GROUP_PATTERN1);
112                         Pattern groupNamePattern2 = Pattern.compile (NAMED_GROUP_PATTERN2);
113                         Pattern groupNumPattern1 = Pattern.compile (NUMBERED_GROUP_PATTERN1);
114                         Pattern groupNumPattern2 = Pattern.compile (NUMBERED_GROUP_PATTERN2);
115
116                         int enoughLength = parts.Length;
117                         string [] namedGroups = new string [enoughLength];
118                         int [] javaGroupNumberToNetGroupNumber = new int [enoughLength];
119                         int capturedGroupsCount = 0;
120                         int namedGroupsCount = 0;
121                         int nonamedGroupsCount = 0;
122                         int sameGroupsCounter = 0;
123
124                         //Scan of groups
125                         for (int i = 1; i < parts.Length; ++i) {
126                                 //nonamed group            
127                                 if (parts [i].StartsWith (QUESTION) == false) {
128                                         javaGroupNumberToNetGroupNumber [++capturedGroupsCount] = ++nonamedGroupsCount;
129                                         continue;
130                                 }
131
132                                 //Skip non captured groups
133                                 Matcher partMatcher =
134                                                 nonCapturedGroupPattern.matcher ((CharSequence) (object) parts [i]);
135                                 if (partMatcher.find ()) {
136                                         continue;
137                                 }
138
139                                 //Find named groups by 2 patterns
140                                 partMatcher = groupNamePattern1.matcher ((CharSequence) (object) parts [i]);
141                                 if (partMatcher.find ()) {
142                                         namedGroups [namedGroupsCount++] = partMatcher.group (1);
143                                         javaGroupNumberToNetGroupNumber [++capturedGroupsCount] = -1;
144                                         continue;
145                                 }
146                                 partMatcher = groupNamePattern2.matcher ((CharSequence) (object) parts [i]);
147                                 if (partMatcher.find ()) {
148                                         namedGroups [namedGroupsCount++] = partMatcher.group (1);
149                                         javaGroupNumberToNetGroupNumber [++capturedGroupsCount] = -1;
150                                         continue;
151                                 }
152
153                                 //Find explicitly numbered groups by 2 patterns
154                                 partMatcher = groupNumPattern1.matcher ((CharSequence) (object) parts [i]);
155                                 if (partMatcher.find ()) {
156                                         int netGroupNumber = int.Parse (partMatcher.group (1));
157                                         if ((options & RegexOptions.ExplicitCapture) == RegexOptions.ExplicitCapture) {
158                                                 namedGroups [namedGroupsCount++] = partMatcher.group (1);
159                                                 javaGroupNumberToNetGroupNumber [++capturedGroupsCount] = -1;                                           
160                                         }
161                                         else {
162                                                 javaGroupNumberToNetGroupNumber [++capturedGroupsCount] = netGroupNumber;
163                                                 if (javaGroupNumberToNetGroupNumber [capturedGroupsCount] != netGroupNumber) {
164                                                         ++sameGroupsCounter;
165                                                 }
166                                         }
167                                         continue;
168                                 }
169                                 partMatcher = groupNumPattern2.matcher ((CharSequence) (object) parts [i]);
170                                 if (partMatcher.find ()) {
171                                         int netGroupNumber = int.Parse (partMatcher.group (1));
172                                         if ((options & RegexOptions.ExplicitCapture) == RegexOptions.ExplicitCapture) {
173                                                 namedGroups [namedGroupsCount++] = partMatcher.group (1);
174                                                 javaGroupNumberToNetGroupNumber [++capturedGroupsCount] = -1;
175                                         }
176                                         else {
177                                                 javaGroupNumberToNetGroupNumber [++capturedGroupsCount] = netGroupNumber;
178                                                 if (javaGroupNumberToNetGroupNumber [capturedGroupsCount] != netGroupNumber) {
179                                                         ++sameGroupsCounter;
180                                                 }
181                                         }
182                                         continue;
183                                 }
184                         }
185
186                         //Filling grouping
187                         patternGrouping.SetGroups (namedGroups,
188                                 javaGroupNumberToNetGroupNumber,
189                                 nonamedGroupsCount,
190                                 capturedGroupsCount,
191                                 sameGroupsCounter,
192                                 options);
193
194                         return;
195                 }
196         }
197 }