2007-10-22 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / System / System.Text.RegularExpressions / Regex.cs
1 //
2 // assembly:    System
3 // namespace:   System.Text.RegularExpressions
4 // file:        regex.cs
5 //
6 // author:      Dan Lewis (dlewis@gmx.co.uk)
7 //              (c) 2002
8
9 //
10 // Permission is hereby granted, free of charge, to any person obtaining
11 // a copy of this software and associated documentation files (the
12 // "Software"), to deal in the Software without restriction, including
13 // without limitation the rights to use, copy, modify, merge, publish,
14 // distribute, sublicense, and/or sell copies of the Software, and to
15 // permit persons to whom the Software is furnished to do so, subject to
16 // the following conditions:
17 // 
18 // The above copyright notice and this permission notice shall be
19 // included in all copies or substantial portions of the Software.
20 // 
21 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 //
29
30 using System;
31 using System.Text;
32 using System.Collections;
33 using System.Reflection;
34 using System.Reflection.Emit;
35 using System.Runtime.Serialization;
36
37 using RegularExpression = System.Text.RegularExpressions.Syntax.RegularExpression;
38 using Parser = System.Text.RegularExpressions.Syntax.Parser;
39
40 using System.Diagnostics;
41
42
43 namespace System.Text.RegularExpressions {
44         
45         [Serializable]
46         public class Regex : ISerializable {
47
48 #if NET_2_0
49                 private static int cache_size = 15;
50 #endif
51 #if !TARGET_JVM
52                 [MonoTODO]
53                 public static void CompileToAssembly (RegexCompilationInfo [] regexes, AssemblyName aname)
54                 {
55                         Regex.CompileToAssembly(regexes, aname, new CustomAttributeBuilder [] {}, null);
56                 }
57
58                 [MonoTODO]
59                 public static void CompileToAssembly (RegexCompilationInfo [] regexes, AssemblyName aname,
60                                                       CustomAttributeBuilder [] attribs)
61                 {
62                         Regex.CompileToAssembly(regexes, aname, attribs, null);
63                 }
64
65                 [MonoTODO]
66                 public static void CompileToAssembly (RegexCompilationInfo [] regexes, AssemblyName aname,
67                                                       CustomAttributeBuilder [] attribs, string resourceFile)
68                 {
69                         throw new NotImplementedException ();
70                         // TODO : Make use of attribs and resourceFile parameters
71                         /*
72                         AssemblyBuilder asmBuilder = AppDomain.CurrentDomain.DefineDynamicAssembly (aname, AssemblyBuilderAccess.RunAndSave);
73                         ModuleBuilder modBuilder = asmBuilder.DefineDynamicModule("InnerRegexModule",aname.Name);
74                         Parser psr = new Parser ();     
75                         
76                         System.Console.WriteLine("CompileToAssembly");
77                                
78                         for(int i=0; i < regexes.Length; i++)
79                                 {
80                                         System.Console.WriteLine("Compiling expression :" + regexes[i].Pattern);
81                                         RegularExpression re = psr.ParseRegularExpression (regexes[i].Pattern, regexes[i].Options);
82                                         
83                                         // compile
84                                                                                 
85                                         CILCompiler cmp = new CILCompiler (modBuilder, i);
86                                         bool reverse = (regexes[i].Options & RegexOptions.RightToLeft) !=0;
87                                         re.Compile (cmp, reverse);
88                                         cmp.Close();
89                                         
90                                 }
91                        
92
93                         // Define a runtime class with specified name and attributes.
94                         TypeBuilder builder = modBuilder.DefineType("ITest");
95                         builder.CreateType();
96                         asmBuilder.Save(aname.Name);
97                         */
98                 }
99 #endif
100                 
101                 public static string Escape (string str)
102                 {
103                         return Parser.Escape (str);
104                 }
105
106                 public static string Unescape (string str)
107                 {
108                         return Parser.Unescape (str);
109                 }
110
111                 public static bool IsMatch (string input, string pattern)
112                 {
113                         return IsMatch (input, pattern, RegexOptions.None);
114                 }
115
116                 public static bool IsMatch (string input, string pattern, RegexOptions options)
117                 {
118                         Regex re = new Regex (pattern, options);
119                         return re.IsMatch (input);
120                 }
121
122                 public static Match Match (string input, string pattern)
123                 {
124                         return Regex.Match (input, pattern, RegexOptions.None);
125                 }
126
127                 public static Match Match (string input, string pattern, RegexOptions options)
128                 {
129                         Regex re = new Regex (pattern, options);
130                         return re.Match (input);
131                 }
132
133                 public static MatchCollection Matches (string input, string pattern)
134                 {
135                         return Matches (input, pattern, RegexOptions.None);
136                 }
137
138                 public static MatchCollection Matches (string input, string pattern, RegexOptions options)
139                 {
140                         Regex re = new Regex (pattern, options);
141                         return re.Matches (input);
142                 }
143
144                 public static string Replace (string input, string pattern, MatchEvaluator evaluator)
145                 {
146                         return Regex.Replace (input, pattern, evaluator, RegexOptions.None);
147                 }
148
149                 public static string Replace (string input, string pattern, MatchEvaluator evaluator,
150                                               RegexOptions options)
151                 {
152                         Regex re = new Regex (pattern, options);
153                         return re.Replace (input, evaluator);
154                 }
155
156                 public static string Replace (string input, string pattern, string replacement)
157                 {
158                         return Regex.Replace (input, pattern, replacement, RegexOptions.None);
159                 }
160
161                 public static string Replace (string input, string pattern, string replacement,
162                                               RegexOptions options)
163                 {
164                         Regex re = new Regex (pattern, options);
165                         return re.Replace (input, replacement);
166                 }
167
168                 public static string [] Split (string input, string pattern)
169                 {
170                         return Regex.Split (input, pattern, RegexOptions.None);
171                 }
172
173                 public static string [] Split (string input, string pattern, RegexOptions options)
174                 {
175                         Regex re = new Regex (pattern, options);
176                         return re.Split (input);
177                 }
178
179 #if NET_2_0
180                 [MonoTODO ("should be used somewhere ? FactoryCache ?")]
181                 public static int CacheSize {
182                         get { return cache_size; }
183                         set {
184                                 if (value < 0)
185                                         throw new ArgumentOutOfRangeException ("CacheSize");
186                                 cache_size = value;
187                         }
188                 }
189 #endif
190
191                 // private
192
193                 private static FactoryCache cache = new FactoryCache (200);     // TODO put some meaningful number here
194
195                 // constructors
196
197                 // This constructor is used by compiled regular expressions that are
198                 // classes derived from Regex class. No initialization required.
199                 protected Regex ()
200                 {
201                 }
202
203                 public Regex (string pattern) : this (pattern, RegexOptions.None)
204                 {
205                 }
206
207                 public Regex (string pattern, RegexOptions options)
208                 {
209                         this.pattern = pattern;
210                         this.roptions = options;
211                         Init ();
212                 }
213
214                 private void Init ()
215                 {
216                         this.machineFactory = cache.Lookup (this.pattern, this.roptions);
217
218                         if (this.machineFactory == null) {
219                                 // parse and install group mapping
220
221                                 Parser psr = new Parser ();
222                                 RegularExpression re = psr.ParseRegularExpression (this.pattern, this.roptions);
223                                 this.group_count = re.GroupCount;
224                                 this.mapping = psr.GetMapping ();
225
226                                 // compile
227                                 
228                                 ICompiler cmp;
229                                 //if ((this.roptions & RegexOptions.Compiled) != 0)
230                                 //      //throw new Exception ("Not implemented.");
231                                 //      cmp = new CILCompiler ();
232                                 //else
233                                 cmp = new PatternCompiler ();
234
235                                 re.Compile (cmp, RightToLeft);
236
237                                 // install machine factory and add to pattern cache
238
239                                 this.machineFactory = cmp.GetMachineFactory ();
240                                 this.machineFactory.Mapping = mapping;
241                                 cache.Add (this.pattern, this.roptions, this.machineFactory);
242                         } else {
243                                 this.group_count = this.machineFactory.GroupCount;
244                                 this.mapping = this.machineFactory.Mapping;
245                         }
246                 }
247
248 #if NET_2_0
249                 protected
250 #else
251                 private
252 #endif
253                 Regex (SerializationInfo info, StreamingContext context) :
254                         this (info.GetString ("pattern"), 
255                               (RegexOptions) info.GetValue ("options", typeof (RegexOptions)))
256                 {
257                 }
258
259 #if ONLY_1_1 && !TARGET_JVM
260                 // fixes public API signature
261                 ~Regex ()
262                 {
263                 }
264 #endif
265                 // public instance properties
266                 
267                 public RegexOptions Options {
268                         get { return roptions; }
269                 }
270
271                 public bool RightToLeft {
272                         get { return (roptions & RegexOptions.RightToLeft) != 0; }
273                 }
274
275                 // public instance methods
276                 
277                 public string [] GetGroupNames ()
278                 {
279                         string [] names = new string [mapping.Count];
280                         mapping.Keys.CopyTo (names, 0);
281
282                         return names;
283                 }
284
285                 public int[] GetGroupNumbers ()
286                 {
287                         int[] numbers = new int [mapping.Count];
288                         mapping.Values.CopyTo (numbers, 0);
289
290                         return numbers;
291                 }
292
293                 public string GroupNameFromNumber (int i)
294                 {
295                         if (i > group_count)
296                                 return "";
297                 
298                         foreach (string name in mapping.Keys) {
299                                 if ((int) mapping [name] == i)
300                                         return name;
301                         }
302
303                         return "";
304                 }
305
306                 public int GroupNumberFromName (string name)
307                 {
308                         if (mapping.Contains (name))
309                                 return (int) mapping [name];
310
311                         return -1;
312                 }
313
314                 // match methods
315                 
316                 public bool IsMatch (string input)
317                 {
318                         return IsMatch (input, RightToLeft ? input.Length : 0);
319                 }
320
321                 public bool IsMatch (string input, int startat)
322                 {
323                         return Match (input, startat).Success;
324                 }
325
326                 public Match Match (string input)
327                 {
328                         return Match (input, RightToLeft ? input.Length : 0);
329                 }
330
331                 public Match Match (string input, int startat)
332                 {
333                         return CreateMachine ().Scan (this, input, startat, input.Length);
334                 }
335
336                 public Match Match (string input, int startat, int length)
337                 {
338                         return CreateMachine ().Scan (this, input, startat, startat + length);
339                 }
340
341                 public MatchCollection Matches (string input)
342                 {
343                         return Matches (input, RightToLeft ? input.Length : 0);
344                 }
345
346                 public MatchCollection Matches (string input, int startat)
347                 {
348                         Match m = Match (input, startat);
349                         return new MatchCollection (m);
350                 }
351
352                 // replace methods
353
354                 public string Replace (string input, MatchEvaluator evaluator)
355                 {
356                         return Replace (input, evaluator, Int32.MaxValue, RightToLeft ? input.Length : 0);
357                 }
358
359                 public string Replace (string input, MatchEvaluator evaluator, int count)
360                 {
361                         return Replace (input, evaluator, count, RightToLeft ? input.Length : 0);
362                 }
363
364                 class Adapter {
365                         MatchEvaluator ev;
366                         public Adapter (MatchEvaluator ev) { this.ev = ev; }
367                         public void Evaluate (Match m, StringBuilder sb) { sb.Append (ev (m)); }
368                 }
369
370                 delegate void MatchAppendEvaluator (Match match, StringBuilder sb);
371
372                 public string Replace (string input, MatchEvaluator evaluator, int count, int startat)
373                 {
374                         Adapter a = new Adapter (evaluator);
375                         return Replace (input, new MatchAppendEvaluator (a.Evaluate), count, startat);
376                 }
377
378                 string Replace (string input, MatchAppendEvaluator evaluator, int count, int startat)
379                 {
380                         StringBuilder result = new StringBuilder ();
381                         int ptr = startat;
382                         int counter = count;
383
384                         result.Append (input, 0, ptr);
385
386                         Match m = Match (input, startat);
387                         while (m.Success) {
388                                 if (count != -1)
389                                         if(counter -- <= 0)
390                                                 break;
391                                 if (m.Index < ptr)
392                                         throw new SystemException ("how");
393                                 result.Append (input, ptr, m.Index - ptr);
394                                 evaluator (m, result);
395
396                                 ptr = m.Index + m.Length;
397                                 m = m.NextMatch ();
398                         }
399                         
400                         if (ptr == 0)
401                                 return input;
402                         
403                         result.Append (input, ptr, input.Length - ptr);
404
405                         return result.ToString ();
406                 }
407
408                 public string Replace (string input, string replacement)
409                 {
410                         return Replace (input, replacement, Int32.MaxValue, RightToLeft ? input.Length : 0);
411                 }
412
413                 public string Replace (string input, string replacement, int count)
414                 {
415                         return Replace (input, replacement, count, RightToLeft ? input.Length : 0);
416                 }
417
418                 public string Replace (string input, string replacement, int count, int startat)
419                 {
420                         ReplacementEvaluator ev = new ReplacementEvaluator (this, replacement);
421                         return Replace (input, new MatchAppendEvaluator (ev.EvaluateAppend), count, startat);
422                 }
423
424                 // split methods
425
426                 public string [] Split (string input)
427                 {
428                         return Split (input, Int32.MaxValue, RightToLeft ? input.Length : 0);
429                 }
430
431                 public string [] Split (string input, int count)
432                 {
433                         return Split (input, count, RightToLeft ? input.Length : 0);
434                 }
435
436                 public string [] Split (string input, int count, int startat)
437                 {
438                         ArrayList splits = new ArrayList ();
439                         if (count == 0)
440                                 count = Int32.MaxValue;
441
442                         int ptr = startat;
443                         Match m = null;
444                         while (--count > 0) {
445                                 if (m != null)
446                                         m = m.NextMatch ();
447                                 else
448                                         m = Match (input, ptr);
449
450                                 if (!m.Success)
451                                         break;
452                         
453                                 if (RightToLeft)
454                                         splits.Add (input.Substring (m.Index + m.Length, ptr - m.Index - m.Length));
455                                 else
456                                         splits.Add (input.Substring (ptr, m.Index - ptr));
457                                         
458                                 int gcount = m.Groups.Count;
459                                 for (int gindex = 1; gindex < gcount; gindex++) {
460                                         Group grp = m.Groups [gindex];
461                                         splits.Add (input.Substring (grp.Index, grp.Length));
462                                 }
463
464                                 if (RightToLeft)
465                                         ptr = m.Index; 
466                                 else
467                                         ptr = m.Index + m.Length;
468                                         
469                         }
470
471                         if (RightToLeft && ptr >= 0)
472                                 splits.Add (input.Substring (0, ptr));
473                         if (!RightToLeft && ptr <= input.Length)
474                                 splits.Add (input.Substring (ptr));
475
476                         return (string []) splits.ToArray (typeof (string));
477                 }
478
479                 // This method is called at the end of the constructor of compiled
480                 // regular expression classes to do internal initialization.
481                 protected void InitializeReferences ()
482                 {
483                         if (refsInitialized)
484                                 throw new NotSupportedException ("This operation is only allowed once per object.");
485
486                         refsInitialized = true;
487
488                         // Compile pattern that results in performance loss as existing
489                         // CIL code is ignored but provides support for regular
490                         // expressions compiled to assemblies.
491                         Init ();
492                 }
493
494                 protected bool UseOptionC ()
495                 {
496                         return ((roptions & RegexOptions.Compiled) != 0);
497                 }
498
499                 protected bool UseOptionR ()
500                 {
501                         return ((roptions & RegexOptions.RightToLeft) != 0);
502                 }
503
504                 // object methods
505                 
506                 public override string ToString ()
507                 {
508                         return pattern;
509                 }
510
511                 // ISerializable interface
512                 void ISerializable.GetObjectData (SerializationInfo info, StreamingContext context)
513                 {
514                         info.AddValue ("pattern", this.ToString (), typeof (string));
515                         info.AddValue ("options", this.Options, typeof (RegexOptions));
516                 }
517
518                 // internal
519
520                 internal int GroupCount {
521                         get { return group_count; }
522                 }
523
524                 // private
525
526                 private IMachine CreateMachine ()
527                 {
528                         return machineFactory.NewInstance ();
529                 }
530
531                 private IMachineFactory machineFactory;
532                 private IDictionary mapping;
533                 private int group_count;
534                 private bool refsInitialized;
535
536                 
537                 // protected members
538
539                 protected internal string pattern;
540                 protected internal RegexOptions roptions;
541                 
542                 // MS undocumented members
543                 [MonoTODO]
544                 protected internal System.Collections.Hashtable capnames;
545                 [MonoTODO]
546                 protected internal System.Collections.Hashtable caps;
547                 [MonoTODO]
548                 protected internal int capsize;
549                 [MonoTODO]
550                 protected internal string [] capslist;
551                 [MonoTODO]
552                 protected internal RegexRunnerFactory factory;
553         }
554 }