regex: A few more null and bounds checks
[mono.git] / mcs / class / System / System.Text.RegularExpressions / Regex.cs
1 //
2 // assembly:    System
3 // namespace:   System.Text.RegularExpressions
4 // file:        regex.cs
5 //
6 // author:      Dan Lewis (dlewis@gmx.co.uk)
7 //              (c) 2002
8
9 //
10 // Permission is hereby granted, free of charge, to any person obtaining
11 // a copy of this software and associated documentation files (the
12 // "Software"), to deal in the Software without restriction, including
13 // without limitation the rights to use, copy, modify, merge, publish,
14 // distribute, sublicense, and/or sell copies of the Software, and to
15 // permit persons to whom the Software is furnished to do so, subject to
16 // the following conditions:
17 // 
18 // The above copyright notice and this permission notice shall be
19 // included in all copies or substantial portions of the Software.
20 // 
21 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 //
29
30 using System;
31 using System.Text;
32 using System.Collections;
33 using System.Reflection;
34 using System.Reflection.Emit;
35 using System.Runtime.Serialization;
36
37 using RegularExpression = System.Text.RegularExpressions.Syntax.RegularExpression;
38 using Parser = System.Text.RegularExpressions.Syntax.Parser;
39
40 using System.Diagnostics;
41
42
43 namespace System.Text.RegularExpressions {
44         
45         [Serializable]
46         public partial class Regex : ISerializable {
47
48 #if !TARGET_JVM
49                 [MonoTODO]
50                 public static void CompileToAssembly (RegexCompilationInfo [] regexes, AssemblyName aname)
51                 {
52                         Regex.CompileToAssembly(regexes, aname, new CustomAttributeBuilder [] {}, null);
53                 }
54
55                 [MonoTODO]
56                 public static void CompileToAssembly (RegexCompilationInfo [] regexes, AssemblyName aname,
57                                                       CustomAttributeBuilder [] attribs)
58                 {
59                         Regex.CompileToAssembly(regexes, aname, attribs, null);
60                 }
61
62                 [MonoTODO]
63                 public static void CompileToAssembly (RegexCompilationInfo [] regexes, AssemblyName aname,
64                                                       CustomAttributeBuilder [] attribs, string resourceFile)
65                 {
66                         throw new NotImplementedException ();
67                         // TODO : Make use of attribs and resourceFile parameters
68                         /*
69                         AssemblyBuilder asmBuilder = AppDomain.CurrentDomain.DefineDynamicAssembly (aname, AssemblyBuilderAccess.RunAndSave);
70                         ModuleBuilder modBuilder = asmBuilder.DefineDynamicModule("InnerRegexModule",aname.Name);
71                         Parser psr = new Parser ();     
72                         
73                         System.Console.WriteLine("CompileToAssembly");
74                                
75                         for(int i=0; i < regexes.Length; i++)
76                                 {
77                                         System.Console.WriteLine("Compiling expression :" + regexes[i].Pattern);
78                                         RegularExpression re = psr.ParseRegularExpression (regexes[i].Pattern, regexes[i].Options);
79                                         
80                                         // compile
81                                                                                 
82                                         CILCompiler cmp = new CILCompiler (modBuilder, i);
83                                         bool reverse = (regexes[i].Options & RegexOptions.RightToLeft) !=0;
84                                         re.Compile (cmp, reverse);
85                                         cmp.Close();
86                                         
87                                 }
88                        
89
90                         // Define a runtime class with specified name and attributes.
91                         TypeBuilder builder = modBuilder.DefineType("ITest");
92                         builder.CreateType();
93                         asmBuilder.Save(aname.Name);
94                         */
95                 }
96 #endif
97                 
98                 public static string Escape (string str)
99                 {
100                         if (str == null)
101                                 throw new ArgumentNullException ("str");
102                         return Parser.Escape (str);
103                 }
104
105                 public static string Unescape (string str)
106                 {
107                         if (str == null)
108                                 throw new ArgumentNullException ("str");
109                         return Parser.Unescape (str);
110                 }
111
112                 public static bool IsMatch (string input, string pattern)
113                 {
114                         return IsMatch (input, pattern, RegexOptions.None);
115                 }
116
117                 public static bool IsMatch (string input, string pattern, RegexOptions options)
118                 {
119                         Regex re = new Regex (pattern, options);
120                         return re.IsMatch (input);
121                 }
122
123                 public static Match Match (string input, string pattern)
124                 {
125                         return Regex.Match (input, pattern, RegexOptions.None);
126                 }
127
128                 public static Match Match (string input, string pattern, RegexOptions options)
129                 {
130                         Regex re = new Regex (pattern, options);
131                         return re.Match (input);
132                 }
133
134                 public static MatchCollection Matches (string input, string pattern)
135                 {
136                         return Matches (input, pattern, RegexOptions.None);
137                 }
138
139                 public static MatchCollection Matches (string input, string pattern, RegexOptions options)
140                 {
141                         Regex re = new Regex (pattern, options);
142                         return re.Matches (input);
143                 }
144
145                 public static string Replace (string input, string pattern, MatchEvaluator evaluator)
146                 {
147                         return Regex.Replace (input, pattern, evaluator, RegexOptions.None);
148                 }
149
150                 public static string Replace (string input, string pattern, MatchEvaluator evaluator,
151                                               RegexOptions options)
152                 {
153                         Regex re = new Regex (pattern, options);
154                         return re.Replace (input, evaluator);
155                 }
156
157                 public static string Replace (string input, string pattern, string replacement)
158                 {
159                         return Regex.Replace (input, pattern, replacement, RegexOptions.None);
160                 }
161
162                 public static string Replace (string input, string pattern, string replacement,
163                                               RegexOptions options)
164                 {
165                         Regex re = new Regex (pattern, options);
166                         return re.Replace (input, replacement);
167                 }
168
169                 public static string [] Split (string input, string pattern)
170                 {
171                         return Regex.Split (input, pattern, RegexOptions.None);
172                 }
173
174                 public static string [] Split (string input, string pattern, RegexOptions options)
175                 {
176                         Regex re = new Regex (pattern, options);
177                         return re.Split (input);
178                 }
179
180 #if NET_2_0
181                 static FactoryCache cache = new FactoryCache (15);
182                 public static int CacheSize {
183                         get { return cache.Capacity; }
184                         set {
185                                 if (value < 0)
186                                         throw new ArgumentOutOfRangeException ("CacheSize");
187
188                                 cache.Capacity = value; 
189                         }
190                 }
191 #else
192                 static FactoryCache cache = new FactoryCache (200);
193 #endif
194
195                 // private
196
197
198                 // constructors
199
200                 // This constructor is used by compiled regular expressions that are
201                 // classes derived from Regex class. No initialization required.
202                 protected Regex ()
203                 {
204                 }
205
206                 public Regex (string pattern) : this (pattern, RegexOptions.None)
207                 {
208                 }
209
210                 public Regex (string pattern, RegexOptions options)
211                 {
212                         if (pattern == null)
213                                 throw new ArgumentNullException ("pattern");
214                         validate_options (options);
215                         this.pattern = pattern;
216                         this.roptions = options;
217                         Init ();
218                 }
219
220                 static void validate_options (RegexOptions options)
221                 {
222                         const RegexOptions allopts =
223                                 RegexOptions.None |
224                                 RegexOptions.IgnoreCase |
225                                 RegexOptions.Multiline |
226                                 RegexOptions.ExplicitCapture |
227 #if !NET_2_1
228                                 RegexOptions.Compiled |
229 #endif
230                                 RegexOptions.Singleline |
231                                 RegexOptions.IgnorePatternWhitespace |
232                                 RegexOptions.RightToLeft |
233                                 RegexOptions.ECMAScript |
234                                 RegexOptions.CultureInvariant;
235
236                         const RegexOptions ecmaopts =
237                                 RegexOptions.IgnoreCase |
238                                 RegexOptions.Multiline |
239 #if !NET_2_1
240                                 RegexOptions.Compiled |
241 #endif
242                                 RegexOptions.ECMAScript;
243
244                         if ((options & ~allopts) != 0)
245                                 throw new ArgumentOutOfRangeException ("options");
246                         if ((options & RegexOptions.ECMAScript) != 0 && (options & ~ecmaopts) != 0)
247                                 throw new ArgumentOutOfRangeException ("options");
248                 }
249
250 #if !TARGET_JVM
251                 private void Init ()
252                 {
253                         this.machineFactory = cache.Lookup (this.pattern, this.roptions);
254
255                         if (this.machineFactory == null) {
256                                 InitNewRegex();
257                         } else {
258                                 this.group_count = this.machineFactory.GroupCount;
259                                 this.gap = this.machineFactory.Gap;
260                                 this.mapping = this.machineFactory.Mapping;
261                                 this.group_names = this.machineFactory.NamesMapping;
262                         }
263                 }
264 #endif
265
266                 private void InitNewRegex () 
267                 {
268                         this.machineFactory = CreateMachineFactory (this.pattern, this.roptions);
269                         cache.Add (this.pattern, this.roptions, this.machineFactory);
270                         this.group_count = machineFactory.GroupCount;
271                         this.gap = this.machineFactory.Gap;
272                         this.mapping = machineFactory.Mapping;
273                         this.group_names = this.machineFactory.NamesMapping;
274                 }
275
276 #if !NET_2_1
277                 // The new rx engine has blocking bugs like
278                 // https://bugzilla.novell.com/show_bug.cgi?id=470827
279                 static readonly bool old_rx =
280                         Environment.GetEnvironmentVariable ("MONO_NEW_RX") == null;
281 #endif
282
283                 private static IMachineFactory CreateMachineFactory (string pattern, RegexOptions options) 
284                 {
285                         Parser psr = new Parser ();
286                         RegularExpression re = psr.ParseRegularExpression (pattern, options);
287
288 #if NET_2_1
289                         ICompiler cmp = new PatternCompiler ();
290 #else
291                         ICompiler cmp;
292                         if (!old_rx) {
293                                 if ((options & RegexOptions.Compiled) != 0)
294                                         cmp = new CILCompiler ();
295                                 else
296                                         cmp = new RxCompiler ();
297                         } else {
298                                 cmp = new PatternCompiler ();
299                         }
300 #endif
301
302                         re.Compile (cmp, (options & RegexOptions.RightToLeft) != 0);
303
304                         IMachineFactory machineFactory = cmp.GetMachineFactory ();
305                         Hashtable mapping = new Hashtable ();
306                         machineFactory.Gap = psr.GetMapping (mapping);
307                         machineFactory.Mapping = mapping;
308                         machineFactory.NamesMapping = GetGroupNamesArray (machineFactory.GroupCount, machineFactory.Mapping);
309
310                         return machineFactory;
311                 }
312
313 #if NET_2_0
314                 protected
315 #else
316                 private
317 #endif
318                 Regex (SerializationInfo info, StreamingContext context) :
319                         this (info.GetString ("pattern"), 
320                               (RegexOptions) info.GetValue ("options", typeof (RegexOptions)))
321                 {
322                 }
323
324 #if ONLY_1_1 && !TARGET_JVM
325                 // fixes public API signature
326                 ~Regex ()
327                 {
328                 }
329 #endif
330                 // public instance properties
331                 
332                 public RegexOptions Options {
333                         get { return roptions; }
334                 }
335
336                 public bool RightToLeft {
337                         get { return (roptions & RegexOptions.RightToLeft) != 0; }
338                 }
339
340                 // public instance methods
341                 
342                 public string [] GetGroupNames ()
343                 {
344                         string [] names = new string [1 + group_count];
345                         Array.Copy (group_names, names, 1 + group_count);
346                         return names;
347                 }
348
349                 public int [] GetGroupNumbers ()
350                 {
351                         int [] numbers = new int [1 + group_count];
352                         Array.Copy (GroupNumbers, numbers, 1 + group_count);
353                         return numbers;
354                 }
355
356                 public string GroupNameFromNumber (int i)
357                 {
358                         i = GetGroupIndex (i);
359                         if (i < 0)
360                                 return "";
361
362                         return group_names [i];
363                 }
364
365                 public int GroupNumberFromName (string name)
366                 {
367                         if (!mapping.Contains (name))
368                                 return -1;
369                         int i = (int) mapping [name];
370                         if (i >= gap)
371                                 i = Int32.Parse (name);
372                         return i;
373                 }
374
375                 internal int GetGroupIndex (int number)
376                 {
377                         if (number < gap)
378                                 return number;
379                         if (gap > group_count)
380                                 return -1;
381                         return Array.BinarySearch (GroupNumbers, gap, group_count - gap + 1, number);
382                 }
383
384                 int default_startat (string input)
385                 {
386                         return (RightToLeft && input != null) ? input.Length : 0;
387                 }
388
389                 // match methods
390                 
391                 public bool IsMatch (string input)
392                 {
393                         return IsMatch (input, default_startat (input));
394                 }
395
396                 public bool IsMatch (string input, int startat)
397                 {
398                         return Match (input, startat).Success;
399                 }
400
401                 public Match Match (string input)
402                 {
403                         return Match (input, default_startat (input));
404                 }
405
406                 public Match Match (string input, int startat)
407                 {
408                         if (input == null)
409                                 throw new ArgumentNullException ("input");
410                         if (startat < 0 || startat > input.Length)
411                                 throw new ArgumentOutOfRangeException ("startat");
412                         return CreateMachine ().Scan (this, input, startat, input.Length);
413                 }
414
415                 public Match Match (string input, int startat, int length)
416                 {
417                         if (input == null)
418                                 throw new ArgumentNullException ("input");
419                         if (startat < 0 || startat > input.Length)
420                                 throw new ArgumentOutOfRangeException ("startat");
421                         if (length < 0 || length > input.Length - startat)
422                                 throw new ArgumentOutOfRangeException ("length");
423                         return CreateMachine ().Scan (this, input, startat, startat + length);
424                 }
425
426                 public MatchCollection Matches (string input)
427                 {
428                         return Matches (input, default_startat (input));
429                 }
430
431                 public MatchCollection Matches (string input, int startat)
432                 {
433                         Match m = Match (input, startat);
434                         return new MatchCollection (m);
435                 }
436
437                 // replace methods
438
439                 public string Replace (string input, MatchEvaluator evaluator)
440                 {
441                         return Replace (input, evaluator, Int32.MaxValue, default_startat (input));
442                 }
443
444                 public string Replace (string input, MatchEvaluator evaluator, int count)
445                 {
446                         return Replace (input, evaluator, count, default_startat (input));
447                 }
448
449                 class Adapter {
450                         MatchEvaluator ev;
451                         public Adapter (MatchEvaluator ev) { this.ev = ev; }
452                         public void Evaluate (Match m, StringBuilder sb) { sb.Append (ev (m)); }
453                 }
454
455                 public string Replace (string input, MatchEvaluator evaluator, int count, int startat)
456                 {
457                         if (input == null)
458                                 throw new ArgumentNullException ("input");
459                         if (evaluator == null)
460                                 throw new ArgumentNullException ("evaluator");
461                         if (count < -1)
462                                 throw new ArgumentOutOfRangeException ("count");
463                         if (startat < 0 || startat > input.Length)
464                                 throw new ArgumentOutOfRangeException ("startat");
465
466                         BaseMachine m = (BaseMachine)CreateMachine ();
467
468                         if (RightToLeft)
469                                 return m.RTLReplace (this, input, evaluator, count, startat);
470
471                         // NOTE: If this is a cause of a lot of allocations, we can convert it to
472                         //       use a ThreadStatic allocation mitigator
473                         Adapter a = new Adapter (evaluator);
474
475                         return m.LTRReplace (this, input, new BaseMachine.MatchAppendEvaluator (a.Evaluate),
476                                                                  count, startat);
477                 }
478
479                 public string Replace (string input, string replacement)
480                 {
481                         return Replace (input, replacement, Int32.MaxValue, default_startat (input));
482                 }
483
484                 public string Replace (string input, string replacement, int count)
485                 {
486                         return Replace (input, replacement, count, default_startat (input));
487                 }
488
489                 public string Replace (string input, string replacement, int count, int startat)
490                 {
491                         if (input == null)
492                                 throw new ArgumentNullException ("input");
493                         if (replacement == null)
494                                 throw new ArgumentNullException ("replacement");
495                         if (count < -1)
496                                 throw new ArgumentOutOfRangeException ("count");
497                         if (startat < 0 || startat > input.Length)
498                                 throw new ArgumentOutOfRangeException ("startat");
499
500                         return CreateMachine ().Replace (this, input, replacement, count, startat);
501                 }
502
503                 // split methods
504
505                 public string [] Split (string input)
506                 {
507                         return Split (input, Int32.MaxValue, default_startat (input));
508                 }
509
510                 public string [] Split (string input, int count)
511                 {
512                         return Split (input, count, default_startat (input));
513                 }
514
515                 public string [] Split (string input, int count, int startat)
516                 {
517                         if (input == null)
518                                 throw new ArgumentNullException ("input");
519                         if (count < 0)
520                                 throw new ArgumentOutOfRangeException ("count");
521                         if (startat < 0 || startat > input.Length)
522                                 throw new ArgumentOutOfRangeException ("startat");
523
524                         return CreateMachine ().Split (this, input, count, startat);
525                 }
526
527                 // This method is called at the end of the constructor of compiled
528                 // regular expression classes to do internal initialization.
529                 protected void InitializeReferences ()
530                 {
531                         if (refsInitialized)
532                                 throw new NotSupportedException ("This operation is only allowed once per object.");
533
534                         refsInitialized = true;
535
536                         // Compile pattern that results in performance loss as existing
537                         // CIL code is ignored but provides support for regular
538                         // expressions compiled to assemblies.
539                         Init ();
540                 }
541 #if !NET_2_1
542                 protected bool UseOptionC ()
543                 {
544                         return ((roptions & RegexOptions.Compiled) != 0);
545                 }
546 #endif
547                 protected bool UseOptionR ()
548                 {
549                         return ((roptions & RegexOptions.RightToLeft) != 0);
550                 }
551
552                 // object methods
553                 
554                 public override string ToString ()
555                 {
556                         return pattern;
557                 }
558
559                 // ISerializable interface
560                 void ISerializable.GetObjectData (SerializationInfo info, StreamingContext context)
561                 {
562                         info.AddValue ("pattern", this.ToString (), typeof (string));
563                         info.AddValue ("options", this.Options, typeof (RegexOptions));
564                 }
565
566                 // internal
567
568                 internal int GroupCount {
569                         get { return group_count; }
570                 }
571
572                 internal int Gap {
573                         get { return gap; }
574                 }
575
576                 // private
577
578                 private IMachine CreateMachine ()
579                 {
580                         return machineFactory.NewInstance ();
581                 }
582
583                 private static string [] GetGroupNamesArray (int groupCount, IDictionary mapping) 
584                 {
585                         string [] group_names = new string [groupCount + 1];
586                         IDictionaryEnumerator de = mapping.GetEnumerator ();
587                         while (de.MoveNext ())
588                                 group_names [(int) de.Value] = (string) de.Key;
589                         return group_names;
590                 }
591
592                 private int [] GroupNumbers {
593                         get {
594                                 if (group_numbers == null) {
595                                         group_numbers = new int [1 + group_count];
596                                         for (int i = 0; i < gap; ++i)
597                                                 group_numbers [i] = i;
598                                         for (int i = gap; i <= group_count; ++i)
599                                                 group_numbers [i] = Int32.Parse (group_names [i]);
600                                         return group_numbers;
601                                 }
602                                 return group_numbers;
603                         }
604                 }
605
606                 private IMachineFactory machineFactory;
607                 private IDictionary mapping;
608                 private int group_count;
609                 private int gap;
610                 private bool refsInitialized;
611                 private string [] group_names;
612                 private int [] group_numbers;
613                 
614                 // protected members
615
616                 protected internal string pattern;
617                 protected internal RegexOptions roptions;
618                 
619                 // MS undocumented members
620 #if NET_2_1
621                 [MonoTODO]
622                 internal System.Collections.Generic.Dictionary<string, int> capnames;
623                 [MonoTODO]
624                 internal System.Collections.Generic.Dictionary<int, int> caps;
625 #else
626                 [MonoTODO]
627                 protected internal System.Collections.Hashtable capnames;
628                 [MonoTODO]
629                 protected internal System.Collections.Hashtable caps;
630
631                 [MonoTODO]
632                 protected internal RegexRunnerFactory factory;
633 #endif
634                 [MonoTODO]
635                 protected internal int capsize;
636                 [MonoTODO]
637                 protected internal string [] capslist;
638         }
639 }