[Cleanup] Removed TARGET_JVM
[mono.git] / mcs / class / System / System.Text.RegularExpressions / Regex.cs
1 //
2 // assembly:    System
3 // namespace:   System.Text.RegularExpressions
4 // file:        regex.cs
5 //
6 // author:      Dan Lewis (dlewis@gmx.co.uk)
7 //              (c) 2002
8
9 //
10 // Permission is hereby granted, free of charge, to any person obtaining
11 // a copy of this software and associated documentation files (the
12 // "Software"), to deal in the Software without restriction, including
13 // without limitation the rights to use, copy, modify, merge, publish,
14 // distribute, sublicense, and/or sell copies of the Software, and to
15 // permit persons to whom the Software is furnished to do so, subject to
16 // the following conditions:
17 // 
18 // The above copyright notice and this permission notice shall be
19 // included in all copies or substantial portions of the Software.
20 // 
21 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 //
29
30 using System;
31 using System.Text;
32 using System.Collections;
33 using System.Reflection;
34 #if !FULL_AOT_RUNTIME
35 using System.Reflection.Emit;
36 #endif
37 using System.Runtime.Serialization;
38
39 using RegularExpression = System.Text.RegularExpressions.Syntax.RegularExpression;
40 using Parser = System.Text.RegularExpressions.Syntax.Parser;
41
42 using System.Diagnostics;
43
44
45 namespace System.Text.RegularExpressions {
46         
47         [Serializable]
48         public partial class Regex : ISerializable {
49
50 #if !FULL_AOT_RUNTIME
51                 [MonoTODO]
52                 public static void CompileToAssembly (RegexCompilationInfo [] regexes, AssemblyName aname)
53                 {
54                         Regex.CompileToAssembly(regexes, aname, new CustomAttributeBuilder [] {}, null);
55                 }
56
57                 [MonoTODO]
58                 public static void CompileToAssembly (RegexCompilationInfo [] regexes, AssemblyName aname,
59                                                       CustomAttributeBuilder [] attribs)
60                 {
61                         Regex.CompileToAssembly(regexes, aname, attribs, null);
62                 }
63
64                 [MonoTODO]
65                 public static void CompileToAssembly (RegexCompilationInfo [] regexes, AssemblyName aname,
66                                                       CustomAttributeBuilder [] attribs, string resourceFile)
67                 {
68                         throw new NotImplementedException ();
69                         // TODO : Make use of attribs and resourceFile parameters
70                         /*
71                         AssemblyBuilder asmBuilder = AppDomain.CurrentDomain.DefineDynamicAssembly (aname, AssemblyBuilderAccess.RunAndSave);
72                         ModuleBuilder modBuilder = asmBuilder.DefineDynamicModule("InnerRegexModule",aname.Name);
73                         Parser psr = new Parser ();     
74                         
75                         System.Console.WriteLine("CompileToAssembly");
76                                
77                         for(int i=0; i < regexes.Length; i++)
78                                 {
79                                         System.Console.WriteLine("Compiling expression :" + regexes[i].Pattern);
80                                         RegularExpression re = psr.ParseRegularExpression (regexes[i].Pattern, regexes[i].Options);
81                                         
82                                         // compile
83                                                                                 
84                                         CILCompiler cmp = new CILCompiler (modBuilder, i);
85                                         bool reverse = (regexes[i].Options & RegexOptions.RightToLeft) !=0;
86                                         re.Compile (cmp, reverse);
87                                         cmp.Close();
88                                         
89                                 }
90                        
91
92                         // Define a runtime class with specified name and attributes.
93                         TypeBuilder builder = modBuilder.DefineType("ITest");
94                         builder.CreateType();
95                         asmBuilder.Save(aname.Name);
96                         */
97                 }
98 #endif
99                 
100                 public static string Escape (string str)
101                 {
102                         if (str == null)
103                                 throw new ArgumentNullException ("str");
104                         return Parser.Escape (str);
105                 }
106
107                 public static string Unescape (string str)
108                 {
109                         if (str == null)
110                                 throw new ArgumentNullException ("str");
111                         return Parser.Unescape (str);
112                 }
113
114                 public static bool IsMatch (string input, string pattern)
115                 {
116                         return IsMatch (input, pattern, RegexOptions.None);
117                 }
118
119                 public static bool IsMatch (string input, string pattern, RegexOptions options)
120                 {
121                         Regex re = new Regex (pattern, options);
122                         return re.IsMatch (input);
123                 }
124
125                 public static Match Match (string input, string pattern)
126                 {
127                         return Regex.Match (input, pattern, RegexOptions.None);
128                 }
129
130                 public static Match Match (string input, string pattern, RegexOptions options)
131                 {
132                         Regex re = new Regex (pattern, options);
133                         return re.Match (input);
134                 }
135
136                 public static MatchCollection Matches (string input, string pattern)
137                 {
138                         return Matches (input, pattern, RegexOptions.None);
139                 }
140
141                 public static MatchCollection Matches (string input, string pattern, RegexOptions options)
142                 {
143                         Regex re = new Regex (pattern, options);
144                         return re.Matches (input);
145                 }
146
147                 public static string Replace (string input, string pattern, MatchEvaluator evaluator)
148                 {
149                         return Regex.Replace (input, pattern, evaluator, RegexOptions.None);
150                 }
151
152                 public static string Replace (string input, string pattern, MatchEvaluator evaluator,
153                                               RegexOptions options)
154                 {
155                         Regex re = new Regex (pattern, options);
156                         return re.Replace (input, evaluator);
157                 }
158
159                 public static string Replace (string input, string pattern, string replacement)
160                 {
161                         return Regex.Replace (input, pattern, replacement, RegexOptions.None);
162                 }
163
164                 public static string Replace (string input, string pattern, string replacement,
165                                               RegexOptions options)
166                 {
167                         Regex re = new Regex (pattern, options);
168                         return re.Replace (input, replacement);
169                 }
170
171                 public static string [] Split (string input, string pattern)
172                 {
173                         return Regex.Split (input, pattern, RegexOptions.None);
174                 }
175
176                 public static string [] Split (string input, string pattern, RegexOptions options)
177                 {
178                         Regex re = new Regex (pattern, options);
179                         return re.Split (input);
180                 }
181
182                 static FactoryCache cache = new FactoryCache (15);
183                 public static int CacheSize {
184                         get { return cache.Capacity; }
185                         set {
186                                 if (value < 0)
187                                         throw new ArgumentOutOfRangeException ("CacheSize");
188
189                                 cache.Capacity = value; 
190                         }
191                 }
192
193                 // private
194
195
196                 // constructors
197
198                 // This constructor is used by compiled regular expressions that are
199                 // classes derived from Regex class. No initialization required.
200                 protected Regex ()
201                 {
202                 }
203
204                 public Regex (string pattern) : this (pattern, RegexOptions.None)
205                 {
206                 }
207
208                 public Regex (string pattern, RegexOptions options)
209                 {
210                         if (pattern == null)
211                                 throw new ArgumentNullException ("pattern");
212                         validate_options (options);
213                         this.pattern = pattern;
214                         this.roptions = options;
215                         Init ();
216                 }
217                 
218 #if NET_4_5
219                 [MonoTODO ("Timeouts are ignored.")]
220                 public Regex (string pattern, RegexOptions options, TimeSpan matchTimeout)
221                         : this (pattern, options)
222                 {
223                         MatchTimeout = matchTimeout;
224                 }
225                 
226                 [MonoTODO ("Timeouts are ignored.")]
227                 public TimeSpan MatchTimeout {
228                         get;
229                         private set;
230                 }
231                 
232                 [MonoTODO ("Timeouts are ignored.")]
233                 public static bool IsMatch (
234                         string input, string pattern, RegexOptions options, TimeSpan matchTimeout)
235                 {
236                         return IsMatch (input, pattern, options);
237                 }
238                 
239                 [MonoTODO ("Timeouts are ignored.")]
240                 public static Match Match (
241                         string input, string pattern, RegexOptions options, TimeSpan matchTimeout)
242                 {
243                         return Match (input, pattern, options);
244                 }
245                 
246                 [MonoTODO ("Timeouts are ignored.")]
247                 public static MatchCollection Matches (
248                         string input, string pattern, RegexOptions options, TimeSpan matchTimeout)
249                 {
250                         return Matches (input, pattern, options, matchTimeout);
251                 }
252                 
253                 [MonoTODO ("Timeouts are ignored.")]
254                 public static string Replace (
255                         string input, string pattern, string replacement, RegexOptions options,
256                         TimeSpan matchTimeout)
257                 {
258                         return Replace (input, pattern, replacement, options);
259                 }
260                 
261                 [MonoTODO ("Timeouts are ignored.")]
262                 public static string Replace (
263                         string input, string pattern, MatchEvaluator evaluator, RegexOptions options,
264                         TimeSpan matchTimeout)
265                 {
266                         return Replace (input, pattern, evaluator, options);
267                 }
268                 
269                 [MonoTODO ("Timeouts are ignored.")]
270                 public static string[] Split (
271                         string input, string pattern, RegexOptions options, TimeSpan matchTimeout)
272                 {
273                         return Split (input, pattern, options);
274                 }
275
276                 public static readonly TimeSpan InfiniteMatchTimeout = TimeSpan.FromMilliseconds (-1);
277 #endif
278
279                 static void validate_options (RegexOptions options)
280                 {
281                         const RegexOptions allopts =
282                                 RegexOptions.None |
283                                 RegexOptions.IgnoreCase |
284                                 RegexOptions.Multiline |
285                                 RegexOptions.ExplicitCapture |
286 #if MOBILE || !NET_2_1
287                                 RegexOptions.Compiled |
288 #endif
289                                 RegexOptions.Singleline |
290                                 RegexOptions.IgnorePatternWhitespace |
291                                 RegexOptions.RightToLeft |
292                                 RegexOptions.ECMAScript |
293                                 RegexOptions.CultureInvariant;
294
295                         const RegexOptions ecmaopts =
296                                 RegexOptions.IgnoreCase |
297                                 RegexOptions.Multiline |
298 #if MOBILE || !NET_2_1
299                                 RegexOptions.Compiled |
300 #endif
301                                 RegexOptions.ECMAScript;
302
303                         if ((options & ~allopts) != 0)
304                                 throw new ArgumentOutOfRangeException ("options");
305                         if ((options & RegexOptions.ECMAScript) != 0 && (options & ~ecmaopts) != 0)
306                                 throw new ArgumentOutOfRangeException ("options");
307                 }
308
309                 private void Init ()
310                 {
311                         this.machineFactory = cache.Lookup (this.pattern, this.roptions);
312
313                         if (this.machineFactory == null) {
314                                 InitNewRegex();
315                         } else {
316                                 this.group_count = this.machineFactory.GroupCount;
317                                 this.gap = this.machineFactory.Gap;
318                                 this.mapping = this.machineFactory.Mapping;
319                                 this.group_names = this.machineFactory.NamesMapping;
320                         }
321                 }
322
323                 private void InitNewRegex () 
324                 {
325                         this.machineFactory = CreateMachineFactory (this.pattern, this.roptions);
326                         cache.Add (this.pattern, this.roptions, this.machineFactory);
327                         this.group_count = machineFactory.GroupCount;
328                         this.gap = this.machineFactory.Gap;
329                         this.mapping = machineFactory.Mapping;
330                         this.group_names = this.machineFactory.NamesMapping;
331                 }
332
333 #if !NET_2_1
334                 // The new rx engine seems to be working now, but
335                 // potential problems are being tracked down here:
336                 // https://bugzilla.novell.com/show_bug.cgi?id=470827
337                 static readonly bool old_rx =
338                         Environment.GetEnvironmentVariable ("MONO_NEW_RX") == null;
339 #endif
340
341                 private static IMachineFactory CreateMachineFactory (string pattern, RegexOptions options) 
342                 {
343                         Parser psr = new Parser ();
344                         RegularExpression re = psr.ParseRegularExpression (pattern, options);
345
346 #if NET_2_1
347                         ICompiler cmp = new PatternCompiler ();
348 #else
349                         ICompiler cmp;
350                         if (!old_rx) {
351                                 if ((options & RegexOptions.Compiled) != 0)
352                                         cmp = new CILCompiler ();
353                                 else
354                                         cmp = new RxCompiler ();
355                         } else {
356                                 cmp = new PatternCompiler ();
357                         }
358 #endif
359
360                         re.Compile (cmp, (options & RegexOptions.RightToLeft) != 0);
361
362                         IMachineFactory machineFactory = cmp.GetMachineFactory ();
363                         Hashtable mapping = new Hashtable ();
364                         machineFactory.Gap = psr.GetMapping (mapping);
365                         machineFactory.Mapping = mapping;
366                         machineFactory.NamesMapping = GetGroupNamesArray (machineFactory.GroupCount, machineFactory.Mapping);
367
368                         return machineFactory;
369                 }
370
371                 protected Regex (SerializationInfo info, StreamingContext context) :
372                         this (info.GetString ("pattern"), 
373                               (RegexOptions) info.GetValue ("options", typeof (RegexOptions)))
374                 {
375                 }
376
377                 // public instance properties
378                 
379                 public RegexOptions Options {
380                         get { return roptions; }
381                 }
382
383                 public bool RightToLeft {
384                         get { return (roptions & RegexOptions.RightToLeft) != 0; }
385                 }
386
387                 // public instance methods
388                 
389                 public string [] GetGroupNames ()
390                 {
391                         string [] names = new string [1 + group_count];
392                         Array.Copy (group_names, names, 1 + group_count);
393                         return names;
394                 }
395
396                 public int [] GetGroupNumbers ()
397                 {
398                         int [] numbers = new int [1 + group_count];
399                         Array.Copy (GroupNumbers, numbers, 1 + group_count);
400                         return numbers;
401                 }
402
403                 public string GroupNameFromNumber (int i)
404                 {
405                         i = GetGroupIndex (i);
406                         if (i < 0)
407                                 return "";
408
409                         return group_names [i];
410                 }
411
412                 public int GroupNumberFromName (string name)
413                 {
414                         if (!mapping.Contains (name))
415                                 return -1;
416                         int i = (int) mapping [name];
417                         if (i >= gap)
418                                 i = Int32.Parse (name);
419                         return i;
420                 }
421
422                 internal int GetGroupIndex (int number)
423                 {
424                         if (number < gap)
425                                 return number;
426                         if (gap > group_count)
427                                 return -1;
428                         return Array.BinarySearch (GroupNumbers, gap, group_count - gap + 1, number);
429                 }
430
431                 int default_startat (string input)
432                 {
433                         return (RightToLeft && input != null) ? input.Length : 0;
434                 }
435
436                 // match methods
437                 
438                 public bool IsMatch (string input)
439                 {
440                         return IsMatch (input, default_startat (input));
441                 }
442
443                 public bool IsMatch (string input, int startat)
444                 {
445                         return Match (input, startat).Success;
446                 }
447
448                 public Match Match (string input)
449                 {
450                         return Match (input, default_startat (input));
451                 }
452
453                 public Match Match (string input, int startat)
454                 {
455                         if (input == null)
456                                 throw new ArgumentNullException ("input");
457                         if (startat < 0 || startat > input.Length)
458                                 throw new ArgumentOutOfRangeException ("startat");
459                         return CreateMachine ().Scan (this, input, startat, input.Length);
460                 }
461
462                 public Match Match (string input, int beginning, int length)
463                 {
464                         if (input == null)
465                                 throw new ArgumentNullException ("input");
466                         if (beginning < 0 || beginning > input.Length)
467                                 throw new ArgumentOutOfRangeException ("beginning");
468                         if (length < 0 || length > input.Length - beginning)
469                                 throw new ArgumentOutOfRangeException ("length");
470                         return CreateMachine ().Scan (this, input, beginning, beginning + length);
471                 }
472
473                 public MatchCollection Matches (string input)
474                 {
475                         return Matches (input, default_startat (input));
476                 }
477
478                 public MatchCollection Matches (string input, int startat)
479                 {
480                         Match m = Match (input, startat);
481                         return new MatchCollection (m);
482                 }
483
484                 // replace methods
485
486                 public string Replace (string input, MatchEvaluator evaluator)
487                 {
488                         return Replace (input, evaluator, Int32.MaxValue, default_startat (input));
489                 }
490
491                 public string Replace (string input, MatchEvaluator evaluator, int count)
492                 {
493                         return Replace (input, evaluator, count, default_startat (input));
494                 }
495
496                 class Adapter {
497                         MatchEvaluator ev;
498                         public Adapter (MatchEvaluator ev) { this.ev = ev; }
499                         public void Evaluate (Match m, StringBuilder sb) { sb.Append (ev (m)); }
500                 }
501
502                 public string Replace (string input, MatchEvaluator evaluator, int count, int startat)
503                 {
504                         if (input == null)
505                                 throw new ArgumentNullException ("input");
506                         if (evaluator == null)
507                                 throw new ArgumentNullException ("evaluator");
508                         if (count < -1)
509                                 throw new ArgumentOutOfRangeException ("count");
510                         if (startat < 0 || startat > input.Length)
511                                 throw new ArgumentOutOfRangeException ("startat");
512
513                         BaseMachine m = (BaseMachine)CreateMachine ();
514
515                         if (RightToLeft)
516                                 return m.RTLReplace (this, input, evaluator, count, startat);
517
518                         // NOTE: If this is a cause of a lot of allocations, we can convert it to
519                         //       use a ThreadStatic allocation mitigator
520                         Adapter a = new Adapter (evaluator);
521
522                         return m.LTRReplace (this, input, new BaseMachine.MatchAppendEvaluator (a.Evaluate),
523                                                                  count, startat);
524                 }
525
526                 public string Replace (string input, string replacement)
527                 {
528                         return Replace (input, replacement, Int32.MaxValue, default_startat (input));
529                 }
530
531                 public string Replace (string input, string replacement, int count)
532                 {
533                         return Replace (input, replacement, count, default_startat (input));
534                 }
535
536                 public string Replace (string input, string replacement, int count, int startat)
537                 {
538                         if (input == null)
539                                 throw new ArgumentNullException ("input");
540                         if (replacement == null)
541                                 throw new ArgumentNullException ("replacement");
542                         if (count < -1)
543                                 throw new ArgumentOutOfRangeException ("count");
544                         if (startat < 0 || startat > input.Length)
545                                 throw new ArgumentOutOfRangeException ("startat");
546
547                         return CreateMachine ().Replace (this, input, replacement, count, startat);
548                 }
549
550                 // split methods
551
552                 public string [] Split (string input)
553                 {
554                         return Split (input, Int32.MaxValue, default_startat (input));
555                 }
556
557                 public string [] Split (string input, int count)
558                 {
559                         return Split (input, count, default_startat (input));
560                 }
561
562                 public string [] Split (string input, int count, int startat)
563                 {
564                         if (input == null)
565                                 throw new ArgumentNullException ("input");
566                         if (count < 0)
567                                 throw new ArgumentOutOfRangeException ("count");
568                         if (startat < 0 || startat > input.Length)
569                                 throw new ArgumentOutOfRangeException ("startat");
570
571                         return CreateMachine ().Split (this, input, count, startat);
572                 }
573
574                 // This method is called at the end of the constructor of compiled
575                 // regular expression classes to do internal initialization.
576                 protected void InitializeReferences ()
577                 {
578                         if (refsInitialized)
579                                 throw new NotSupportedException ("This operation is only allowed once per object.");
580
581                         refsInitialized = true;
582
583                         // Compile pattern that results in performance loss as existing
584                         // CIL code is ignored but provides support for regular
585                         // expressions compiled to assemblies.
586                         Init ();
587                 }
588 #if !NET_2_1
589                 protected bool UseOptionC ()
590                 {
591                         return ((roptions & RegexOptions.Compiled) != 0);
592                 }
593 #endif
594                 protected bool UseOptionR ()
595                 {
596                         return ((roptions & RegexOptions.RightToLeft) != 0);
597                 }
598
599                 // object methods
600                 
601                 public override string ToString ()
602                 {
603                         return pattern;
604                 }
605
606                 // ISerializable interface
607                 void ISerializable.GetObjectData (SerializationInfo info, StreamingContext context)
608                 {
609                         info.AddValue ("pattern", this.ToString (), typeof (string));
610                         info.AddValue ("options", this.Options, typeof (RegexOptions));
611                 }
612
613                 // internal
614
615                 internal int GroupCount {
616                         get { return group_count; }
617                 }
618
619                 internal int Gap {
620                         get { return gap; }
621                 }
622
623                 // private
624
625                 private IMachine CreateMachine ()
626                 {
627                         return machineFactory.NewInstance ();
628                 }
629
630                 private static string [] GetGroupNamesArray (int groupCount, IDictionary mapping) 
631                 {
632                         string [] group_names = new string [groupCount + 1];
633                         IDictionaryEnumerator de = mapping.GetEnumerator ();
634                         while (de.MoveNext ())
635                                 group_names [(int) de.Value] = (string) de.Key;
636                         return group_names;
637                 }
638
639                 private int [] GroupNumbers {
640                         get {
641                                 if (group_numbers == null) {
642                                         group_numbers = new int [1 + group_count];
643                                         for (int i = 0; i < gap; ++i)
644                                                 group_numbers [i] = i;
645                                         for (int i = gap; i <= group_count; ++i)
646                                                 group_numbers [i] = Int32.Parse (group_names [i]);
647                                         return group_numbers;
648                                 }
649                                 return group_numbers;
650                         }
651                 }
652
653                 private IMachineFactory machineFactory;
654                 private IDictionary mapping;
655                 private int group_count;
656                 private int gap;
657                 private bool refsInitialized;
658                 private string [] group_names;
659                 private int [] group_numbers;
660                 
661                 // protected members
662
663                 protected internal string pattern;
664                 protected internal RegexOptions roptions;
665                 
666                 // MS undocumented members
667 #if NET_2_1
668                 [MonoTODO]
669                 internal System.Collections.Generic.Dictionary<string, int> capnames;
670                 [MonoTODO]
671                 internal System.Collections.Generic.Dictionary<int, int> caps;
672 #else
673                 [MonoTODO]
674                 protected internal System.Collections.Hashtable capnames;
675                 [MonoTODO]
676                 protected internal System.Collections.Hashtable caps;
677
678                 [MonoTODO]
679                 protected internal RegexRunnerFactory factory;
680 #endif
681                 [MonoTODO]
682                 protected internal int capsize;
683                 [MonoTODO]
684                 protected internal string [] capslist;
685         }
686 }