Minor code re-organization
[mono.git] / mcs / class / System / System.Text.RegularExpressions / Regex.cs
1 //
2 // assembly:    System
3 // namespace:   System.Text.RegularExpressions
4 // file:        regex.cs
5 //
6 // author:      Dan Lewis (dlewis@gmx.co.uk)
7 //              (c) 2002
8
9 //
10 // Permission is hereby granted, free of charge, to any person obtaining
11 // a copy of this software and associated documentation files (the
12 // "Software"), to deal in the Software without restriction, including
13 // without limitation the rights to use, copy, modify, merge, publish,
14 // distribute, sublicense, and/or sell copies of the Software, and to
15 // permit persons to whom the Software is furnished to do so, subject to
16 // the following conditions:
17 // 
18 // The above copyright notice and this permission notice shall be
19 // included in all copies or substantial portions of the Software.
20 // 
21 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 //
29
30 using System;
31 using System.Text;
32 using System.Collections;
33 using System.Reflection;
34 using System.Reflection.Emit;
35 using System.Runtime.Serialization;
36
37 using RegularExpression = System.Text.RegularExpressions.Syntax.RegularExpression;
38 using Parser = System.Text.RegularExpressions.Syntax.Parser;
39
40 using System.Diagnostics;
41
42
43 namespace System.Text.RegularExpressions {
44         
45         [Serializable]
46         public partial class Regex : ISerializable {
47
48 #if !TARGET_JVM
49                 [MonoTODO]
50                 public static void CompileToAssembly (RegexCompilationInfo [] regexes, AssemblyName aname)
51                 {
52                         Regex.CompileToAssembly(regexes, aname, new CustomAttributeBuilder [] {}, null);
53                 }
54
55                 [MonoTODO]
56                 public static void CompileToAssembly (RegexCompilationInfo [] regexes, AssemblyName aname,
57                                                       CustomAttributeBuilder [] attribs)
58                 {
59                         Regex.CompileToAssembly(regexes, aname, attribs, null);
60                 }
61
62                 [MonoTODO]
63                 public static void CompileToAssembly (RegexCompilationInfo [] regexes, AssemblyName aname,
64                                                       CustomAttributeBuilder [] attribs, string resourceFile)
65                 {
66                         throw new NotImplementedException ();
67                         // TODO : Make use of attribs and resourceFile parameters
68                         /*
69                         AssemblyBuilder asmBuilder = AppDomain.CurrentDomain.DefineDynamicAssembly (aname, AssemblyBuilderAccess.RunAndSave);
70                         ModuleBuilder modBuilder = asmBuilder.DefineDynamicModule("InnerRegexModule",aname.Name);
71                         Parser psr = new Parser ();     
72                         
73                         System.Console.WriteLine("CompileToAssembly");
74                                
75                         for(int i=0; i < regexes.Length; i++)
76                                 {
77                                         System.Console.WriteLine("Compiling expression :" + regexes[i].Pattern);
78                                         RegularExpression re = psr.ParseRegularExpression (regexes[i].Pattern, regexes[i].Options);
79                                         
80                                         // compile
81                                                                                 
82                                         CILCompiler cmp = new CILCompiler (modBuilder, i);
83                                         bool reverse = (regexes[i].Options & RegexOptions.RightToLeft) !=0;
84                                         re.Compile (cmp, reverse);
85                                         cmp.Close();
86                                         
87                                 }
88                        
89
90                         // Define a runtime class with specified name and attributes.
91                         TypeBuilder builder = modBuilder.DefineType("ITest");
92                         builder.CreateType();
93                         asmBuilder.Save(aname.Name);
94                         */
95                 }
96 #endif
97                 
98                 public static string Escape (string str)
99                 {
100                         return Parser.Escape (str);
101                 }
102
103                 public static string Unescape (string str)
104                 {
105                         return Parser.Unescape (str);
106                 }
107
108                 public static bool IsMatch (string input, string pattern)
109                 {
110                         return IsMatch (input, pattern, RegexOptions.None);
111                 }
112
113                 public static bool IsMatch (string input, string pattern, RegexOptions options)
114                 {
115                         Regex re = new Regex (pattern, options);
116                         return re.IsMatch (input);
117                 }
118
119                 public static Match Match (string input, string pattern)
120                 {
121                         return Regex.Match (input, pattern, RegexOptions.None);
122                 }
123
124                 public static Match Match (string input, string pattern, RegexOptions options)
125                 {
126                         Regex re = new Regex (pattern, options);
127                         return re.Match (input);
128                 }
129
130                 public static MatchCollection Matches (string input, string pattern)
131                 {
132                         return Matches (input, pattern, RegexOptions.None);
133                 }
134
135                 public static MatchCollection Matches (string input, string pattern, RegexOptions options)
136                 {
137                         Regex re = new Regex (pattern, options);
138                         return re.Matches (input);
139                 }
140
141                 public static string Replace (string input, string pattern, MatchEvaluator evaluator)
142                 {
143                         return Regex.Replace (input, pattern, evaluator, RegexOptions.None);
144                 }
145
146                 public static string Replace (string input, string pattern, MatchEvaluator evaluator,
147                                               RegexOptions options)
148                 {
149                         Regex re = new Regex (pattern, options);
150                         return re.Replace (input, evaluator);
151                 }
152
153                 public static string Replace (string input, string pattern, string replacement)
154                 {
155                         return Regex.Replace (input, pattern, replacement, RegexOptions.None);
156                 }
157
158                 public static string Replace (string input, string pattern, string replacement,
159                                               RegexOptions options)
160                 {
161                         Regex re = new Regex (pattern, options);
162                         return re.Replace (input, replacement);
163                 }
164
165                 public static string [] Split (string input, string pattern)
166                 {
167                         return Regex.Split (input, pattern, RegexOptions.None);
168                 }
169
170                 public static string [] Split (string input, string pattern, RegexOptions options)
171                 {
172                         Regex re = new Regex (pattern, options);
173                         return re.Split (input);
174                 }
175
176 #if NET_2_0
177                 static FactoryCache cache = new FactoryCache (15);
178                 public static int CacheSize {
179                         get { return cache.Capacity; }
180                         set {
181                                 if (value < 0)
182                                         throw new ArgumentOutOfRangeException ("CacheSize");
183
184                                 cache.Capacity = value; 
185                         }
186                 }
187 #else
188                 static FactoryCache cache = new FactoryCache (200);
189 #endif
190
191                 // private
192
193
194                 // constructors
195
196                 // This constructor is used by compiled regular expressions that are
197                 // classes derived from Regex class. No initialization required.
198                 protected Regex ()
199                 {
200                 }
201
202                 public Regex (string pattern) : this (pattern, RegexOptions.None)
203                 {
204                 }
205
206                 public Regex (string pattern, RegexOptions options)
207                 {
208                         this.pattern = pattern;
209                         this.roptions = options;
210                         Init ();
211                 }
212 #if !TARGET_JVM
213                 private void Init ()
214                 {
215                         this.machineFactory = cache.Lookup (this.pattern, this.roptions);
216
217                         if (this.machineFactory == null) {
218                                 InitNewRegex();
219                         } else {
220                                 this.group_count = this.machineFactory.GroupCount;
221                                 this.mapping = this.machineFactory.Mapping;
222                                 this.group_names = this.machineFactory.NamesMapping;
223                         }
224                 }
225 #endif
226
227                 private void InitNewRegex () 
228                 {
229                         this.machineFactory = CreateMachineFactory (this.pattern, this.roptions);
230                         cache.Add (this.pattern, this.roptions, this.machineFactory);
231                         this.group_count = machineFactory.GroupCount;
232                         this.mapping = machineFactory.Mapping;
233                         this.group_names = this.machineFactory.NamesMapping;
234                 }
235
236 #if !NET_2_1
237                 // The new rx engine has blocking bugs like
238                 // https://bugzilla.novell.com/show_bug.cgi?id=470827
239                 static readonly bool old_rx =
240                         Environment.GetEnvironmentVariable ("MONO_NEW_RX") == null;
241 #endif
242
243                 private static IMachineFactory CreateMachineFactory (string pattern, RegexOptions options) 
244                 {
245                         Parser psr = new Parser ();
246                         RegularExpression re = psr.ParseRegularExpression (pattern, options);
247
248 #if NET_2_1
249                         ICompiler cmp = new PatternCompiler ();
250 #else
251                         ICompiler cmp;
252                         if (!old_rx) {
253                                 if ((options & RegexOptions.Compiled) != 0)
254                                         cmp = new CILCompiler ();
255                                 else
256                                         cmp = new RxCompiler ();
257                         } else {
258                                 cmp = new PatternCompiler ();
259                         }
260 #endif
261
262                         re.Compile (cmp, (options & RegexOptions.RightToLeft) != 0);
263
264                         IMachineFactory machineFactory = cmp.GetMachineFactory ();
265                         Hashtable mapping = new Hashtable ();
266                         machineFactory.Gap = psr.GetMapping (mapping);
267                         machineFactory.Mapping = mapping;
268                         machineFactory.NamesMapping = GetGroupNamesArray (machineFactory.GroupCount, machineFactory.Mapping);
269
270                         return machineFactory;
271                 }
272
273 #if NET_2_0
274                 protected
275 #else
276                 private
277 #endif
278                 Regex (SerializationInfo info, StreamingContext context) :
279                         this (info.GetString ("pattern"), 
280                               (RegexOptions) info.GetValue ("options", typeof (RegexOptions)))
281                 {
282                 }
283
284 #if ONLY_1_1 && !TARGET_JVM
285                 // fixes public API signature
286                 ~Regex ()
287                 {
288                 }
289 #endif
290                 // public instance properties
291                 
292                 public RegexOptions Options {
293                         get { return roptions; }
294                 }
295
296                 public bool RightToLeft {
297                         get { return (roptions & RegexOptions.RightToLeft) != 0; }
298                 }
299
300                 // public instance methods
301                 
302                 public string [] GetGroupNames ()
303                 {
304                         string [] names = new string [1 + group_count];
305                         Array.Copy (group_names, names, 1 + group_count);
306                         return names;
307                 }
308
309                 public int [] GetGroupNumbers ()
310                 {
311                         int [] numbers = new int [1 + group_count];
312                         Array.Copy (GroupNumbers, numbers, 1 + group_count);
313                         return numbers;
314                 }
315
316                 public string GroupNameFromNumber (int i)
317                 {
318                         i = GetGroupIndex (i);
319                         if (i < 0)
320                                 return "";
321
322                         return group_names [i];
323                 }
324
325                 public int GroupNumberFromName (string name)
326                 {
327                         if (mapping.Contains (name))
328                                 return (int) mapping [name];
329
330                         return -1;
331                 }
332
333                 internal int GetGroupIndex (int number)
334                 {
335                         int gap = group_count + 1;
336                         if (number < gap)
337                                 return number;
338                         // FIXME: handle arbitrarily numbered groups here
339                         return -1;
340                 }
341
342                 // match methods
343                 
344                 public bool IsMatch (string input)
345                 {
346                         return IsMatch (input, RightToLeft ? input.Length : 0);
347                 }
348
349                 public bool IsMatch (string input, int startat)
350                 {
351                         return Match (input, startat).Success;
352                 }
353
354                 public Match Match (string input)
355                 {
356                         return Match (input, RightToLeft ? input.Length : 0);
357                 }
358
359                 public Match Match (string input, int startat)
360                 {
361                         return CreateMachine ().Scan (this, input, startat, input.Length);
362                 }
363
364                 public Match Match (string input, int startat, int length)
365                 {
366                         return CreateMachine ().Scan (this, input, startat, startat + length);
367                 }
368
369                 public MatchCollection Matches (string input)
370                 {
371                         return Matches (input, RightToLeft ? input.Length : 0);
372                 }
373
374                 public MatchCollection Matches (string input, int startat)
375                 {
376                         Match m = Match (input, startat);
377                         return new MatchCollection (m);
378                 }
379
380                 // replace methods
381
382                 public string Replace (string input, MatchEvaluator evaluator)
383                 {
384                         return Replace (input, evaluator, Int32.MaxValue, RightToLeft ? input.Length : 0);
385                 }
386
387                 public string Replace (string input, MatchEvaluator evaluator, int count)
388                 {
389                         return Replace (input, evaluator, count, RightToLeft ? input.Length : 0);
390                 }
391
392                 class Adapter {
393                         MatchEvaluator ev;
394                         public Adapter (MatchEvaluator ev) { this.ev = ev; }
395                         public void Evaluate (Match m, StringBuilder sb) { sb.Append (ev (m)); }
396                 }
397
398                 public string Replace (string input, MatchEvaluator evaluator, int count, int startat)
399                 {
400                         if (input == null)
401                                 throw new ArgumentNullException ("null");
402                         if (evaluator == null)
403                                 throw new ArgumentNullException ("evaluator");
404
405                         BaseMachine m = (BaseMachine)CreateMachine ();
406
407                         if (RightToLeft)
408                                 return m.RTLReplace (this, input, evaluator, count, startat);
409
410                         // NOTE: If this is a cause of a lot of allocations, we can convert it to
411                         //       use a ThreadStatic allocation mitigator
412                         Adapter a = new Adapter (evaluator);
413
414                         return m.LTRReplace (this, input, new BaseMachine.MatchAppendEvaluator (a.Evaluate),
415                                                                  count, startat);
416                 }
417
418                 public string Replace (string input, string replacement)
419                 {
420                         return Replace (input, replacement, Int32.MaxValue, RightToLeft ? input.Length : 0);
421                 }
422
423                 public string Replace (string input, string replacement, int count)
424                 {
425                         return Replace (input, replacement, count, RightToLeft ? input.Length : 0);
426                 }
427
428                 public string Replace (string input, string replacement, int count, int startat)
429                 {
430                         return CreateMachine ().Replace (this, input, replacement, count, startat);
431                 }
432
433                 // split methods
434
435                 public string [] Split (string input)
436                 {
437                         return Split (input, Int32.MaxValue, RightToLeft ? input.Length : 0);
438                 }
439
440                 public string [] Split (string input, int count)
441                 {
442                         return Split (input, count, RightToLeft ? input.Length : 0);
443                 }
444
445                 public string [] Split (string input, int count, int startat)
446                 {
447                         return CreateMachine ().Split (this, input, count, startat);
448                 }
449
450                 // This method is called at the end of the constructor of compiled
451                 // regular expression classes to do internal initialization.
452                 protected void InitializeReferences ()
453                 {
454                         if (refsInitialized)
455                                 throw new NotSupportedException ("This operation is only allowed once per object.");
456
457                         refsInitialized = true;
458
459                         // Compile pattern that results in performance loss as existing
460                         // CIL code is ignored but provides support for regular
461                         // expressions compiled to assemblies.
462                         Init ();
463                 }
464 #if !NET_2_1
465                 protected bool UseOptionC ()
466                 {
467                         return ((roptions & RegexOptions.Compiled) != 0);
468                 }
469 #endif
470                 protected bool UseOptionR ()
471                 {
472                         return ((roptions & RegexOptions.RightToLeft) != 0);
473                 }
474
475                 // object methods
476                 
477                 public override string ToString ()
478                 {
479                         return pattern;
480                 }
481
482                 // ISerializable interface
483                 void ISerializable.GetObjectData (SerializationInfo info, StreamingContext context)
484                 {
485                         info.AddValue ("pattern", this.ToString (), typeof (string));
486                         info.AddValue ("options", this.Options, typeof (RegexOptions));
487                 }
488
489                 // internal
490
491                 internal int GroupCount {
492                         get { return group_count; }
493                 }
494
495                 // private
496
497                 private IMachine CreateMachine ()
498                 {
499                         return machineFactory.NewInstance ();
500                 }
501
502                 private static string [] GetGroupNamesArray (int groupCount, IDictionary mapping) 
503                 {
504                         string [] group_names = new string [groupCount + 1];
505                         IDictionaryEnumerator de = mapping.GetEnumerator ();
506                         while (de.MoveNext ())
507                                 group_names [(int) de.Value] = (string) de.Key;
508                         return group_names;
509                 }
510
511                 private int [] GroupNumbers {
512                         get {
513                                 if (group_numbers == null) {
514                                         group_numbers = new int [1 + group_count];
515                                         for (int i = 0; i <= group_count; ++i)
516                                                 group_numbers [i] = i;
517                                         // FIXME: needs to handle arbitrarily numbered groups '(?<43>abc)'
518                                         return group_numbers;
519                                 }
520                                 return group_numbers;
521                         }
522                 }
523
524                 private IMachineFactory machineFactory;
525                 private IDictionary mapping;
526                 private int group_count;
527                 private bool refsInitialized;
528                 private string [] group_names;
529                 private int [] group_numbers;
530                 
531                 // protected members
532
533                 protected internal string pattern;
534                 protected internal RegexOptions roptions;
535                 
536                 // MS undocumented members
537 #if NET_2_1
538                 [MonoTODO]
539                 internal System.Collections.Generic.Dictionary<string, int> capnames;
540                 [MonoTODO]
541                 internal System.Collections.Generic.Dictionary<int, int> caps;
542 #else
543                 [MonoTODO]
544                 protected internal System.Collections.Hashtable capnames;
545                 [MonoTODO]
546                 protected internal System.Collections.Hashtable caps;
547
548                 [MonoTODO]
549                 protected internal RegexRunnerFactory factory;
550 #endif
551                 [MonoTODO]
552                 protected internal int capsize;
553                 [MonoTODO]
554                 protected internal string [] capslist;
555         }
556 }