New tests.
[mono.git] / mcs / class / System / System.Text.RegularExpressions / Regex.cs
1 //
2 // assembly:    System
3 // namespace:   System.Text.RegularExpressions
4 // file:        regex.cs
5 //
6 // author:      Dan Lewis (dlewis@gmx.co.uk)
7 //              (c) 2002
8
9 //
10 // Permission is hereby granted, free of charge, to any person obtaining
11 // a copy of this software and associated documentation files (the
12 // "Software"), to deal in the Software without restriction, including
13 // without limitation the rights to use, copy, modify, merge, publish,
14 // distribute, sublicense, and/or sell copies of the Software, and to
15 // permit persons to whom the Software is furnished to do so, subject to
16 // the following conditions:
17 // 
18 // The above copyright notice and this permission notice shall be
19 // included in all copies or substantial portions of the Software.
20 // 
21 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 //
29
30 using System;
31 using System.Text;
32 using System.Collections;
33 using System.Reflection;
34 using System.Reflection.Emit;
35 using System.Runtime.Serialization;
36
37 using RegularExpression = System.Text.RegularExpressions.Syntax.RegularExpression;
38 using Parser = System.Text.RegularExpressions.Syntax.Parser;
39
40 using System.Diagnostics;
41
42
43 namespace System.Text.RegularExpressions {
44         
45         [Serializable]
46         public partial class Regex : ISerializable {
47
48 #if !TARGET_JVM
49                 [MonoTODO]
50                 public static void CompileToAssembly (RegexCompilationInfo [] regexes, AssemblyName aname)
51                 {
52                         Regex.CompileToAssembly(regexes, aname, new CustomAttributeBuilder [] {}, null);
53                 }
54
55                 [MonoTODO]
56                 public static void CompileToAssembly (RegexCompilationInfo [] regexes, AssemblyName aname,
57                                                       CustomAttributeBuilder [] attribs)
58                 {
59                         Regex.CompileToAssembly(regexes, aname, attribs, null);
60                 }
61
62                 [MonoTODO]
63                 public static void CompileToAssembly (RegexCompilationInfo [] regexes, AssemblyName aname,
64                                                       CustomAttributeBuilder [] attribs, string resourceFile)
65                 {
66                         throw new NotImplementedException ();
67                         // TODO : Make use of attribs and resourceFile parameters
68                         /*
69                         AssemblyBuilder asmBuilder = AppDomain.CurrentDomain.DefineDynamicAssembly (aname, AssemblyBuilderAccess.RunAndSave);
70                         ModuleBuilder modBuilder = asmBuilder.DefineDynamicModule("InnerRegexModule",aname.Name);
71                         Parser psr = new Parser ();     
72                         
73                         System.Console.WriteLine("CompileToAssembly");
74                                
75                         for(int i=0; i < regexes.Length; i++)
76                                 {
77                                         System.Console.WriteLine("Compiling expression :" + regexes[i].Pattern);
78                                         RegularExpression re = psr.ParseRegularExpression (regexes[i].Pattern, regexes[i].Options);
79                                         
80                                         // compile
81                                                                                 
82                                         CILCompiler cmp = new CILCompiler (modBuilder, i);
83                                         bool reverse = (regexes[i].Options & RegexOptions.RightToLeft) !=0;
84                                         re.Compile (cmp, reverse);
85                                         cmp.Close();
86                                         
87                                 }
88                        
89
90                         // Define a runtime class with specified name and attributes.
91                         TypeBuilder builder = modBuilder.DefineType("ITest");
92                         builder.CreateType();
93                         asmBuilder.Save(aname.Name);
94                         */
95                 }
96 #endif
97                 
98                 public static string Escape (string str)
99                 {
100                         return Parser.Escape (str);
101                 }
102
103                 public static string Unescape (string str)
104                 {
105                         return Parser.Unescape (str);
106                 }
107
108                 public static bool IsMatch (string input, string pattern)
109                 {
110                         return IsMatch (input, pattern, RegexOptions.None);
111                 }
112
113                 public static bool IsMatch (string input, string pattern, RegexOptions options)
114                 {
115                         Regex re = new Regex (pattern, options);
116                         return re.IsMatch (input);
117                 }
118
119                 public static Match Match (string input, string pattern)
120                 {
121                         return Regex.Match (input, pattern, RegexOptions.None);
122                 }
123
124                 public static Match Match (string input, string pattern, RegexOptions options)
125                 {
126                         Regex re = new Regex (pattern, options);
127                         return re.Match (input);
128                 }
129
130                 public static MatchCollection Matches (string input, string pattern)
131                 {
132                         return Matches (input, pattern, RegexOptions.None);
133                 }
134
135                 public static MatchCollection Matches (string input, string pattern, RegexOptions options)
136                 {
137                         Regex re = new Regex (pattern, options);
138                         return re.Matches (input);
139                 }
140
141                 public static string Replace (string input, string pattern, MatchEvaluator evaluator)
142                 {
143                         return Regex.Replace (input, pattern, evaluator, RegexOptions.None);
144                 }
145
146                 public static string Replace (string input, string pattern, MatchEvaluator evaluator,
147                                               RegexOptions options)
148                 {
149                         Regex re = new Regex (pattern, options);
150                         return re.Replace (input, evaluator);
151                 }
152
153                 public static string Replace (string input, string pattern, string replacement)
154                 {
155                         return Regex.Replace (input, pattern, replacement, RegexOptions.None);
156                 }
157
158                 public static string Replace (string input, string pattern, string replacement,
159                                               RegexOptions options)
160                 {
161                         Regex re = new Regex (pattern, options);
162                         return re.Replace (input, replacement);
163                 }
164
165                 public static string [] Split (string input, string pattern)
166                 {
167                         return Regex.Split (input, pattern, RegexOptions.None);
168                 }
169
170                 public static string [] Split (string input, string pattern, RegexOptions options)
171                 {
172                         Regex re = new Regex (pattern, options);
173                         return re.Split (input);
174                 }
175
176 #if NET_2_0
177                 static FactoryCache cache = new FactoryCache (15);
178                 public static int CacheSize {
179                         get { return cache.Capacity; }
180                         set {
181                                 if (value < 0)
182                                         throw new ArgumentOutOfRangeException ("CacheSize");
183
184                                 cache.Capacity = value; 
185                         }
186                 }
187 #else
188                 static FactoryCache cache = new FactoryCache (200);
189 #endif
190
191                 // private
192
193
194                 // constructors
195
196                 // This constructor is used by compiled regular expressions that are
197                 // classes derived from Regex class. No initialization required.
198                 protected Regex ()
199                 {
200                 }
201
202                 public Regex (string pattern) : this (pattern, RegexOptions.None)
203                 {
204                 }
205
206                 public Regex (string pattern, RegexOptions options)
207                 {
208                         this.pattern = pattern;
209                         this.roptions = options;
210                         Init ();
211                 }
212 #if !TARGET_JVM
213                 private void Init ()
214                 {
215                         this.machineFactory = cache.Lookup (this.pattern, this.roptions);
216
217                         if (this.machineFactory == null) {
218                                 InitNewRegex();
219                         } else {
220                                 this.group_count = this.machineFactory.GroupCount;
221                                 this.mapping = this.machineFactory.Mapping;
222                                 this._groupNumberToNameMap = this.machineFactory.NamesMapping;
223                         }
224                 }
225 #endif
226
227                 private void InitNewRegex () 
228                 {
229                         this.machineFactory = CreateMachineFactory (this.pattern, this.roptions);
230                         cache.Add (this.pattern, this.roptions, this.machineFactory);
231                         this.group_count = machineFactory.GroupCount;
232                         this.mapping = machineFactory.Mapping;
233                         this._groupNumberToNameMap = this.machineFactory.NamesMapping;
234                 }
235
236 #if !NET_2_1
237                 // The new rx engine has blocking bugs like
238                 // https://bugzilla.novell.com/show_bug.cgi?id=470827
239                 static readonly bool old_rx =
240                         Environment.GetEnvironmentVariable ("MONO_NEW_RX") == null;
241 #endif
242
243                 private static IMachineFactory CreateMachineFactory (string pattern, RegexOptions options) 
244                 {
245                         Parser psr = new Parser ();
246                         RegularExpression re = psr.ParseRegularExpression (pattern, options);
247
248 #if NET_2_1
249                         ICompiler cmp = new PatternCompiler ();
250 #else
251                         ICompiler cmp;
252                         if (!old_rx) {
253                                 if ((options & RegexOptions.Compiled) != 0)
254                                         cmp = new CILCompiler ();
255                                 else
256                                         cmp = new RxCompiler ();
257                         } else {
258                                 cmp = new PatternCompiler ();
259                         }
260 #endif
261
262                         re.Compile (cmp, (options & RegexOptions.RightToLeft) != 0);
263
264                         IMachineFactory machineFactory = cmp.GetMachineFactory ();
265                         machineFactory.Mapping = psr.GetMapping ();
266                         machineFactory.NamesMapping = GetGroupNamesArray (machineFactory.GroupCount, machineFactory.Mapping);
267
268                         return machineFactory;
269                 }
270
271 #if NET_2_0
272                 protected
273 #else
274                 private
275 #endif
276                 Regex (SerializationInfo info, StreamingContext context) :
277                         this (info.GetString ("pattern"), 
278                               (RegexOptions) info.GetValue ("options", typeof (RegexOptions)))
279                 {
280                 }
281
282 #if ONLY_1_1 && !TARGET_JVM
283                 // fixes public API signature
284                 ~Regex ()
285                 {
286                 }
287 #endif
288                 // public instance properties
289                 
290                 public RegexOptions Options {
291                         get { return roptions; }
292                 }
293
294                 public bool RightToLeft {
295                         get { return (roptions & RegexOptions.RightToLeft) != 0; }
296                 }
297
298                 // public instance methods
299                 
300                 public string [] GetGroupNames ()
301                 {
302                         string [] names = new string [1 + group_count];
303                         Array.Copy (_groupNumberToNameMap, names, 1 + group_count);
304                         return names;
305                 }
306
307                 public int[] GetGroupNumbers ()
308                 {
309                         int[] numbers = new int [1 + group_count];
310                         for (int i = 0; i <= group_count; ++i)
311                                 numbers [i] = i;
312                         // FIXME: needs to handle arbitrarily numbered groups '(?<43>abc)'
313                         return numbers;
314                 }
315
316                 public string GroupNameFromNumber (int i)
317                 {
318                         if (i < 0 || i > group_count)
319                                 return "";
320
321                         return _groupNumberToNameMap [i];
322                 }
323
324                 public int GroupNumberFromName (string name)
325                 {
326                         if (mapping.Contains (name))
327                                 return (int) mapping [name];
328
329                         return -1;
330                 }
331
332                 // match methods
333                 
334                 public bool IsMatch (string input)
335                 {
336                         return IsMatch (input, RightToLeft ? input.Length : 0);
337                 }
338
339                 public bool IsMatch (string input, int startat)
340                 {
341                         return Match (input, startat).Success;
342                 }
343
344                 public Match Match (string input)
345                 {
346                         return Match (input, RightToLeft ? input.Length : 0);
347                 }
348
349                 public Match Match (string input, int startat)
350                 {
351                         return CreateMachine ().Scan (this, input, startat, input.Length);
352                 }
353
354                 public Match Match (string input, int startat, int length)
355                 {
356                         return CreateMachine ().Scan (this, input, startat, startat + length);
357                 }
358
359                 public MatchCollection Matches (string input)
360                 {
361                         return Matches (input, RightToLeft ? input.Length : 0);
362                 }
363
364                 public MatchCollection Matches (string input, int startat)
365                 {
366                         Match m = Match (input, startat);
367                         return new MatchCollection (m);
368                 }
369
370                 // replace methods
371
372                 public string Replace (string input, MatchEvaluator evaluator)
373                 {
374                         return Replace (input, evaluator, Int32.MaxValue, RightToLeft ? input.Length : 0);
375                 }
376
377                 public string Replace (string input, MatchEvaluator evaluator, int count)
378                 {
379                         return Replace (input, evaluator, count, RightToLeft ? input.Length : 0);
380                 }
381
382                 class Adapter {
383                         MatchEvaluator ev;
384                         public Adapter (MatchEvaluator ev) { this.ev = ev; }
385                         public void Evaluate (Match m, StringBuilder sb) { sb.Append (ev (m)); }
386                 }
387
388                 public string Replace (string input, MatchEvaluator evaluator, int count, int startat)
389                 {
390                         if (input == null)
391                                 throw new ArgumentNullException ("null");
392                         if (evaluator == null)
393                                 throw new ArgumentNullException ("evaluator");
394
395                         BaseMachine m = (BaseMachine)CreateMachine ();
396
397                         if (RightToLeft)
398                                 return m.RTLReplace (this, input, evaluator, count, startat);
399
400                         // NOTE: If this is a cause of a lot of allocations, we can convert it to
401                         //       use a ThreadStatic allocation mitigator
402                         Adapter a = new Adapter (evaluator);
403
404                         return m.LTRReplace (this, input, new BaseMachine.MatchAppendEvaluator (a.Evaluate),
405                                                                  count, startat);
406                 }
407
408                 public string Replace (string input, string replacement)
409                 {
410                         return Replace (input, replacement, Int32.MaxValue, RightToLeft ? input.Length : 0);
411                 }
412
413                 public string Replace (string input, string replacement, int count)
414                 {
415                         return Replace (input, replacement, count, RightToLeft ? input.Length : 0);
416                 }
417
418                 public string Replace (string input, string replacement, int count, int startat)
419                 {
420                         return CreateMachine ().Replace (this, input, replacement, count, startat);
421                 }
422
423                 // split methods
424
425                 public string [] Split (string input)
426                 {
427                         return Split (input, Int32.MaxValue, RightToLeft ? input.Length : 0);
428                 }
429
430                 public string [] Split (string input, int count)
431                 {
432                         return Split (input, count, RightToLeft ? input.Length : 0);
433                 }
434
435                 public string [] Split (string input, int count, int startat)
436                 {
437                         return CreateMachine ().Split (this, input, count, startat);
438                 }
439
440                 // This method is called at the end of the constructor of compiled
441                 // regular expression classes to do internal initialization.
442                 protected void InitializeReferences ()
443                 {
444                         if (refsInitialized)
445                                 throw new NotSupportedException ("This operation is only allowed once per object.");
446
447                         refsInitialized = true;
448
449                         // Compile pattern that results in performance loss as existing
450                         // CIL code is ignored but provides support for regular
451                         // expressions compiled to assemblies.
452                         Init ();
453                 }
454 #if !NET_2_1
455                 protected bool UseOptionC ()
456                 {
457                         return ((roptions & RegexOptions.Compiled) != 0);
458                 }
459 #endif
460                 protected bool UseOptionR ()
461                 {
462                         return ((roptions & RegexOptions.RightToLeft) != 0);
463                 }
464
465                 // object methods
466                 
467                 public override string ToString ()
468                 {
469                         return pattern;
470                 }
471
472                 // ISerializable interface
473                 void ISerializable.GetObjectData (SerializationInfo info, StreamingContext context)
474                 {
475                         info.AddValue ("pattern", this.ToString (), typeof (string));
476                         info.AddValue ("options", this.Options, typeof (RegexOptions));
477                 }
478
479                 // internal
480
481                 internal int GroupCount {
482                         get { return group_count; }
483                 }
484
485                 // private
486
487                 private IMachine CreateMachine ()
488                 {
489                         return machineFactory.NewInstance ();
490                 }
491
492                 private static string [] GetGroupNamesArray (int groupCount, IDictionary mapping) 
493                 {
494                         string [] groupNumberToNameMap = new string [groupCount + 1];
495                         foreach (string name in mapping.Keys) {
496                                 groupNumberToNameMap [(int) mapping [name]] = name;
497                         }
498                         return groupNumberToNameMap;
499                 }
500                 
501                 private IMachineFactory machineFactory;
502                 private IDictionary mapping;
503                 private int group_count;
504                 private bool refsInitialized;
505                 private string [] _groupNumberToNameMap;
506
507                 
508                 // protected members
509
510                 protected internal string pattern;
511                 protected internal RegexOptions roptions;
512                 
513                 // MS undocumented members
514 #if NET_2_1
515                 [MonoTODO]
516                 internal System.Collections.Generic.Dictionary<string, int> capnames;
517                 [MonoTODO]
518                 internal System.Collections.Generic.Dictionary<int, int> caps;
519 #else
520                 [MonoTODO]
521                 protected internal System.Collections.Hashtable capnames;
522                 [MonoTODO]
523                 protected internal System.Collections.Hashtable caps;
524
525                 [MonoTODO]
526                 protected internal RegexRunnerFactory factory;
527 #endif
528                 [MonoTODO]
529                 protected internal int capsize;
530                 [MonoTODO]
531                 protected internal string [] capslist;
532         }
533 }