9ee0d3a6cb04539bc80d9dc7669bc208d93143e5
[mono.git] / mcs / class / System / System.Text.RegularExpressions / Regex.cs
1 //
2 // assembly:    System
3 // namespace:   System.Text.RegularExpressions
4 // file:        regex.cs
5 //
6 // author:      Dan Lewis (dlewis@gmx.co.uk)
7 //              (c) 2002
8
9 //
10 // Permission is hereby granted, free of charge, to any person obtaining
11 // a copy of this software and associated documentation files (the
12 // "Software"), to deal in the Software without restriction, including
13 // without limitation the rights to use, copy, modify, merge, publish,
14 // distribute, sublicense, and/or sell copies of the Software, and to
15 // permit persons to whom the Software is furnished to do so, subject to
16 // the following conditions:
17 // 
18 // The above copyright notice and this permission notice shall be
19 // included in all copies or substantial portions of the Software.
20 // 
21 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 //
29
30 using System;
31 using System.Text;
32 using System.Collections;
33 using System.Reflection;
34 using System.Reflection.Emit;
35 using System.Runtime.Serialization;
36
37 using RegularExpression = System.Text.RegularExpressions.Syntax.RegularExpression;
38 using Parser = System.Text.RegularExpressions.Syntax.Parser;
39
40 using System.Diagnostics;
41
42
43 namespace System.Text.RegularExpressions {
44         
45         [Serializable]
46         public partial class Regex : ISerializable {
47
48 #if !TARGET_JVM
49                 [MonoTODO]
50                 public static void CompileToAssembly (RegexCompilationInfo [] regexes, AssemblyName aname)
51                 {
52                         Regex.CompileToAssembly(regexes, aname, new CustomAttributeBuilder [] {}, null);
53                 }
54
55                 [MonoTODO]
56                 public static void CompileToAssembly (RegexCompilationInfo [] regexes, AssemblyName aname,
57                                                       CustomAttributeBuilder [] attribs)
58                 {
59                         Regex.CompileToAssembly(regexes, aname, attribs, null);
60                 }
61
62                 [MonoTODO]
63                 public static void CompileToAssembly (RegexCompilationInfo [] regexes, AssemblyName aname,
64                                                       CustomAttributeBuilder [] attribs, string resourceFile)
65                 {
66                         throw new NotImplementedException ();
67                         // TODO : Make use of attribs and resourceFile parameters
68                         /*
69                         AssemblyBuilder asmBuilder = AppDomain.CurrentDomain.DefineDynamicAssembly (aname, AssemblyBuilderAccess.RunAndSave);
70                         ModuleBuilder modBuilder = asmBuilder.DefineDynamicModule("InnerRegexModule",aname.Name);
71                         Parser psr = new Parser ();     
72                         
73                         System.Console.WriteLine("CompileToAssembly");
74                                
75                         for(int i=0; i < regexes.Length; i++)
76                                 {
77                                         System.Console.WriteLine("Compiling expression :" + regexes[i].Pattern);
78                                         RegularExpression re = psr.ParseRegularExpression (regexes[i].Pattern, regexes[i].Options);
79                                         
80                                         // compile
81                                                                                 
82                                         CILCompiler cmp = new CILCompiler (modBuilder, i);
83                                         bool reverse = (regexes[i].Options & RegexOptions.RightToLeft) !=0;
84                                         re.Compile (cmp, reverse);
85                                         cmp.Close();
86                                         
87                                 }
88                        
89
90                         // Define a runtime class with specified name and attributes.
91                         TypeBuilder builder = modBuilder.DefineType("ITest");
92                         builder.CreateType();
93                         asmBuilder.Save(aname.Name);
94                         */
95                 }
96 #endif
97                 
98                 public static string Escape (string str)
99                 {
100                         return Parser.Escape (str);
101                 }
102
103                 public static string Unescape (string str)
104                 {
105                         return Parser.Unescape (str);
106                 }
107
108                 public static bool IsMatch (string input, string pattern)
109                 {
110                         return IsMatch (input, pattern, RegexOptions.None);
111                 }
112
113                 public static bool IsMatch (string input, string pattern, RegexOptions options)
114                 {
115                         Regex re = new Regex (pattern, options);
116                         return re.IsMatch (input);
117                 }
118
119                 public static Match Match (string input, string pattern)
120                 {
121                         return Regex.Match (input, pattern, RegexOptions.None);
122                 }
123
124                 public static Match Match (string input, string pattern, RegexOptions options)
125                 {
126                         Regex re = new Regex (pattern, options);
127                         return re.Match (input);
128                 }
129
130                 public static MatchCollection Matches (string input, string pattern)
131                 {
132                         return Matches (input, pattern, RegexOptions.None);
133                 }
134
135                 public static MatchCollection Matches (string input, string pattern, RegexOptions options)
136                 {
137                         Regex re = new Regex (pattern, options);
138                         return re.Matches (input);
139                 }
140
141                 public static string Replace (string input, string pattern, MatchEvaluator evaluator)
142                 {
143                         return Regex.Replace (input, pattern, evaluator, RegexOptions.None);
144                 }
145
146                 public static string Replace (string input, string pattern, MatchEvaluator evaluator,
147                                               RegexOptions options)
148                 {
149                         Regex re = new Regex (pattern, options);
150                         return re.Replace (input, evaluator);
151                 }
152
153                 public static string Replace (string input, string pattern, string replacement)
154                 {
155                         return Regex.Replace (input, pattern, replacement, RegexOptions.None);
156                 }
157
158                 public static string Replace (string input, string pattern, string replacement,
159                                               RegexOptions options)
160                 {
161                         Regex re = new Regex (pattern, options);
162                         return re.Replace (input, replacement);
163                 }
164
165                 public static string [] Split (string input, string pattern)
166                 {
167                         return Regex.Split (input, pattern, RegexOptions.None);
168                 }
169
170                 public static string [] Split (string input, string pattern, RegexOptions options)
171                 {
172                         Regex re = new Regex (pattern, options);
173                         return re.Split (input);
174                 }
175
176 #if NET_2_0
177                 static FactoryCache cache = new FactoryCache (15);
178                 public static int CacheSize {
179                         get { return cache.Capacity; }
180                         set {
181                                 if (value < 0)
182                                         throw new ArgumentOutOfRangeException ("CacheSize");
183
184                                 cache.Capacity = value; 
185                         }
186                 }
187 #else
188                 static FactoryCache cache = new FactoryCache (200);
189 #endif
190
191                 // private
192
193
194                 // constructors
195
196                 // This constructor is used by compiled regular expressions that are
197                 // classes derived from Regex class. No initialization required.
198                 protected Regex ()
199                 {
200                 }
201
202                 public Regex (string pattern) : this (pattern, RegexOptions.None)
203                 {
204                 }
205
206                 public Regex (string pattern, RegexOptions options)
207                 {
208                         this.pattern = pattern;
209                         this.roptions = options;
210                         Init ();
211                 }
212 #if !TARGET_JVM
213                 private void Init ()
214                 {
215                         this.machineFactory = cache.Lookup (this.pattern, this.roptions);
216
217                         if (this.machineFactory == null) {
218                                 InitNewRegex();
219                         } else {
220                                 this.group_count = this.machineFactory.GroupCount;
221                                 this.mapping = this.machineFactory.Mapping;
222                                 this.group_names = this.machineFactory.NamesMapping;
223                         }
224                 }
225 #endif
226
227                 private void InitNewRegex () 
228                 {
229                         this.machineFactory = CreateMachineFactory (this.pattern, this.roptions);
230                         cache.Add (this.pattern, this.roptions, this.machineFactory);
231                         this.group_count = machineFactory.GroupCount;
232                         this.mapping = machineFactory.Mapping;
233                         this.group_names = this.machineFactory.NamesMapping;
234                 }
235
236 #if !NET_2_1
237                 // The new rx engine has blocking bugs like
238                 // https://bugzilla.novell.com/show_bug.cgi?id=470827
239                 static readonly bool old_rx =
240                         Environment.GetEnvironmentVariable ("MONO_NEW_RX") == null;
241 #endif
242
243                 private static IMachineFactory CreateMachineFactory (string pattern, RegexOptions options) 
244                 {
245                         Parser psr = new Parser ();
246                         RegularExpression re = psr.ParseRegularExpression (pattern, options);
247
248 #if NET_2_1
249                         ICompiler cmp = new PatternCompiler ();
250 #else
251                         ICompiler cmp;
252                         if (!old_rx) {
253                                 if ((options & RegexOptions.Compiled) != 0)
254                                         cmp = new CILCompiler ();
255                                 else
256                                         cmp = new RxCompiler ();
257                         } else {
258                                 cmp = new PatternCompiler ();
259                         }
260 #endif
261
262                         re.Compile (cmp, (options & RegexOptions.RightToLeft) != 0);
263
264                         IMachineFactory machineFactory = cmp.GetMachineFactory ();
265                         Hashtable mapping = new Hashtable ();
266                         machineFactory.Gap = psr.GetMapping (mapping);
267                         machineFactory.Mapping = mapping;
268                         machineFactory.NamesMapping = GetGroupNamesArray (machineFactory.GroupCount, machineFactory.Mapping);
269
270                         return machineFactory;
271                 }
272
273 #if NET_2_0
274                 protected
275 #else
276                 private
277 #endif
278                 Regex (SerializationInfo info, StreamingContext context) :
279                         this (info.GetString ("pattern"), 
280                               (RegexOptions) info.GetValue ("options", typeof (RegexOptions)))
281                 {
282                 }
283
284 #if ONLY_1_1 && !TARGET_JVM
285                 // fixes public API signature
286                 ~Regex ()
287                 {
288                 }
289 #endif
290                 // public instance properties
291                 
292                 public RegexOptions Options {
293                         get { return roptions; }
294                 }
295
296                 public bool RightToLeft {
297                         get { return (roptions & RegexOptions.RightToLeft) != 0; }
298                 }
299
300                 // public instance methods
301                 
302                 public string [] GetGroupNames ()
303                 {
304                         string [] names = new string [1 + group_count];
305                         Array.Copy (group_names, names, 1 + group_count);
306                         return names;
307                 }
308
309                 public int[] GetGroupNumbers ()
310                 {
311                         int[] numbers = new int [1 + group_count];
312                         for (int i = 0; i <= group_count; ++i)
313                                 numbers [i] = i;
314                         // FIXME: needs to handle arbitrarily numbered groups '(?<43>abc)'
315                         return numbers;
316                 }
317
318                 public string GroupNameFromNumber (int i)
319                 {
320                         i = GetGroupIndex (i);
321                         if (i < 0)
322                                 return "";
323
324                         return group_names [i];
325                 }
326
327                 public int GroupNumberFromName (string name)
328                 {
329                         if (mapping.Contains (name))
330                                 return (int) mapping [name];
331
332                         return -1;
333                 }
334
335                 internal int GetGroupIndex (int number)
336                 {
337                         int gap = group_count + 1;
338                         if (number < gap)
339                                 return number;
340                         // FIXME: handle arbitrarily numbered groups here
341                         return -1;
342                 }
343
344                 // match methods
345                 
346                 public bool IsMatch (string input)
347                 {
348                         return IsMatch (input, RightToLeft ? input.Length : 0);
349                 }
350
351                 public bool IsMatch (string input, int startat)
352                 {
353                         return Match (input, startat).Success;
354                 }
355
356                 public Match Match (string input)
357                 {
358                         return Match (input, RightToLeft ? input.Length : 0);
359                 }
360
361                 public Match Match (string input, int startat)
362                 {
363                         return CreateMachine ().Scan (this, input, startat, input.Length);
364                 }
365
366                 public Match Match (string input, int startat, int length)
367                 {
368                         return CreateMachine ().Scan (this, input, startat, startat + length);
369                 }
370
371                 public MatchCollection Matches (string input)
372                 {
373                         return Matches (input, RightToLeft ? input.Length : 0);
374                 }
375
376                 public MatchCollection Matches (string input, int startat)
377                 {
378                         Match m = Match (input, startat);
379                         return new MatchCollection (m);
380                 }
381
382                 // replace methods
383
384                 public string Replace (string input, MatchEvaluator evaluator)
385                 {
386                         return Replace (input, evaluator, Int32.MaxValue, RightToLeft ? input.Length : 0);
387                 }
388
389                 public string Replace (string input, MatchEvaluator evaluator, int count)
390                 {
391                         return Replace (input, evaluator, count, RightToLeft ? input.Length : 0);
392                 }
393
394                 class Adapter {
395                         MatchEvaluator ev;
396                         public Adapter (MatchEvaluator ev) { this.ev = ev; }
397                         public void Evaluate (Match m, StringBuilder sb) { sb.Append (ev (m)); }
398                 }
399
400                 public string Replace (string input, MatchEvaluator evaluator, int count, int startat)
401                 {
402                         if (input == null)
403                                 throw new ArgumentNullException ("null");
404                         if (evaluator == null)
405                                 throw new ArgumentNullException ("evaluator");
406
407                         BaseMachine m = (BaseMachine)CreateMachine ();
408
409                         if (RightToLeft)
410                                 return m.RTLReplace (this, input, evaluator, count, startat);
411
412                         // NOTE: If this is a cause of a lot of allocations, we can convert it to
413                         //       use a ThreadStatic allocation mitigator
414                         Adapter a = new Adapter (evaluator);
415
416                         return m.LTRReplace (this, input, new BaseMachine.MatchAppendEvaluator (a.Evaluate),
417                                                                  count, startat);
418                 }
419
420                 public string Replace (string input, string replacement)
421                 {
422                         return Replace (input, replacement, Int32.MaxValue, RightToLeft ? input.Length : 0);
423                 }
424
425                 public string Replace (string input, string replacement, int count)
426                 {
427                         return Replace (input, replacement, count, RightToLeft ? input.Length : 0);
428                 }
429
430                 public string Replace (string input, string replacement, int count, int startat)
431                 {
432                         return CreateMachine ().Replace (this, input, replacement, count, startat);
433                 }
434
435                 // split methods
436
437                 public string [] Split (string input)
438                 {
439                         return Split (input, Int32.MaxValue, RightToLeft ? input.Length : 0);
440                 }
441
442                 public string [] Split (string input, int count)
443                 {
444                         return Split (input, count, RightToLeft ? input.Length : 0);
445                 }
446
447                 public string [] Split (string input, int count, int startat)
448                 {
449                         return CreateMachine ().Split (this, input, count, startat);
450                 }
451
452                 // This method is called at the end of the constructor of compiled
453                 // regular expression classes to do internal initialization.
454                 protected void InitializeReferences ()
455                 {
456                         if (refsInitialized)
457                                 throw new NotSupportedException ("This operation is only allowed once per object.");
458
459                         refsInitialized = true;
460
461                         // Compile pattern that results in performance loss as existing
462                         // CIL code is ignored but provides support for regular
463                         // expressions compiled to assemblies.
464                         Init ();
465                 }
466 #if !NET_2_1
467                 protected bool UseOptionC ()
468                 {
469                         return ((roptions & RegexOptions.Compiled) != 0);
470                 }
471 #endif
472                 protected bool UseOptionR ()
473                 {
474                         return ((roptions & RegexOptions.RightToLeft) != 0);
475                 }
476
477                 // object methods
478                 
479                 public override string ToString ()
480                 {
481                         return pattern;
482                 }
483
484                 // ISerializable interface
485                 void ISerializable.GetObjectData (SerializationInfo info, StreamingContext context)
486                 {
487                         info.AddValue ("pattern", this.ToString (), typeof (string));
488                         info.AddValue ("options", this.Options, typeof (RegexOptions));
489                 }
490
491                 // internal
492
493                 internal int GroupCount {
494                         get { return group_count; }
495                 }
496
497                 // private
498
499                 private IMachine CreateMachine ()
500                 {
501                         return machineFactory.NewInstance ();
502                 }
503
504                 private static string [] GetGroupNamesArray (int groupCount, IDictionary mapping) 
505                 {
506                         string [] group_names = new string [groupCount + 1];
507                         IDictionaryEnumerator de = mapping.GetEnumerator ();
508                         while (de.MoveNext ())
509                                 group_names [(int) de.Value] = (string) de.Key;
510                         return group_names;
511                 }
512                 
513                 private IMachineFactory machineFactory;
514                 private IDictionary mapping;
515                 private int group_count;
516                 private bool refsInitialized;
517                 private string [] group_names;
518
519                 
520                 // protected members
521
522                 protected internal string pattern;
523                 protected internal RegexOptions roptions;
524                 
525                 // MS undocumented members
526 #if NET_2_1
527                 [MonoTODO]
528                 internal System.Collections.Generic.Dictionary<string, int> capnames;
529                 [MonoTODO]
530                 internal System.Collections.Generic.Dictionary<int, int> caps;
531 #else
532                 [MonoTODO]
533                 protected internal System.Collections.Hashtable capnames;
534                 [MonoTODO]
535                 protected internal System.Collections.Hashtable caps;
536
537                 [MonoTODO]
538                 protected internal RegexRunnerFactory factory;
539 #endif
540                 [MonoTODO]
541                 protected internal int capsize;
542                 [MonoTODO]
543                 protected internal string [] capslist;
544         }
545 }