Merge pull request #495 from nicolas-raoul/fix-for-issue2907-with-no-formatting-changes
[mono.git] / mcs / class / System / System.Text.RegularExpressions / Regex.cs
index 37339365479fd48b2802a2105b8c45937721053c..378a3b872fe2cd8379e6da72e5983990f0525824 100644 (file)
@@ -43,12 +43,9 @@ using System.Diagnostics;
 namespace System.Text.RegularExpressions {
        
        [Serializable]
-       public class Regex : ISerializable {
+       public partial class Regex : ISerializable {
 
-#if NET_2_0
-               private static int cache_size = 15;
-#endif
-#if !TARGET_JVM
+#if !TARGET_JVM && !FULL_AOT_RUNTIME
                [MonoTODO]
                public static void CompileToAssembly (RegexCompilationInfo [] regexes, AssemblyName aname)
                {
@@ -100,11 +97,15 @@ namespace System.Text.RegularExpressions {
                
                public static string Escape (string str)
                {
+                       if (str == null)
+                               throw new ArgumentNullException ("str");
                        return Parser.Escape (str);
                }
 
                public static string Unescape (string str)
                {
+                       if (str == null)
+                               throw new ArgumentNullException ("str");
                        return Parser.Unescape (str);
                }
 
@@ -176,21 +177,19 @@ namespace System.Text.RegularExpressions {
                        return re.Split (input);
                }
 
-#if NET_2_0
-               [MonoTODO ("should be used somewhere ? FactoryCache ?")]
+               static FactoryCache cache = new FactoryCache (15);
                public static int CacheSize {
-                       get { return cache_size; }
+                       get { return cache.Capacity; }
                        set {
                                if (value < 0)
                                        throw new ArgumentOutOfRangeException ("CacheSize");
-                               cache_size = value;
+
+                               cache.Capacity = value; 
                        }
                }
-#endif
 
                // private
 
-               private static FactoryCache cache = new FactoryCache (200);     // TODO put some meaningful number here
 
                // constructors
 
@@ -206,62 +205,114 @@ namespace System.Text.RegularExpressions {
 
                public Regex (string pattern, RegexOptions options)
                {
+                       if (pattern == null)
+                               throw new ArgumentNullException ("pattern");
+                       validate_options (options);
                        this.pattern = pattern;
                        this.roptions = options;
                        Init ();
                }
 
+               static void validate_options (RegexOptions options)
+               {
+                       const RegexOptions allopts =
+                               RegexOptions.None |
+                               RegexOptions.IgnoreCase |
+                               RegexOptions.Multiline |
+                               RegexOptions.ExplicitCapture |
+#if MOBILE || !NET_2_1
+                               RegexOptions.Compiled |
+#endif
+                               RegexOptions.Singleline |
+                               RegexOptions.IgnorePatternWhitespace |
+                               RegexOptions.RightToLeft |
+                               RegexOptions.ECMAScript |
+                               RegexOptions.CultureInvariant;
+
+                       const RegexOptions ecmaopts =
+                               RegexOptions.IgnoreCase |
+                               RegexOptions.Multiline |
+#if MOBILE || !NET_2_1
+                               RegexOptions.Compiled |
+#endif
+                               RegexOptions.ECMAScript;
+
+                       if ((options & ~allopts) != 0)
+                               throw new ArgumentOutOfRangeException ("options");
+                       if ((options & RegexOptions.ECMAScript) != 0 && (options & ~ecmaopts) != 0)
+                               throw new ArgumentOutOfRangeException ("options");
+               }
+
+#if !TARGET_JVM
                private void Init ()
                {
                        this.machineFactory = cache.Lookup (this.pattern, this.roptions);
 
                        if (this.machineFactory == null) {
-                               // parse and install group mapping
-
-                               Parser psr = new Parser ();
-                               RegularExpression re = psr.ParseRegularExpression (this.pattern, this.roptions);
-                               this.group_count = re.GroupCount;
-                               this.mapping = psr.GetMapping ();
-
-                               // compile
-                               
-                               ICompiler cmp;
-                               //if ((this.roptions & RegexOptions.Compiled) != 0)
-                               //      //throw new Exception ("Not implemented.");
-                               //      cmp = new CILCompiler ();
-                               //else
-                               cmp = new PatternCompiler ();
-
-                               re.Compile (cmp, RightToLeft);
-
-                               // install machine factory and add to pattern cache
-
-                               this.machineFactory = cmp.GetMachineFactory ();
-                               this.machineFactory.Mapping = mapping;
-                               cache.Add (this.pattern, this.roptions, this.machineFactory);
+                               InitNewRegex();
                        } else {
                                this.group_count = this.machineFactory.GroupCount;
+                               this.gap = this.machineFactory.Gap;
                                this.mapping = this.machineFactory.Mapping;
+                               this.group_names = this.machineFactory.NamesMapping;
                        }
                }
+#endif
+
+               private void InitNewRegex () 
+               {
+                       this.machineFactory = CreateMachineFactory (this.pattern, this.roptions);
+                       cache.Add (this.pattern, this.roptions, this.machineFactory);
+                       this.group_count = machineFactory.GroupCount;
+                       this.gap = this.machineFactory.Gap;
+                       this.mapping = machineFactory.Mapping;
+                       this.group_names = this.machineFactory.NamesMapping;
+               }
+
+#if !NET_2_1
+               // The new rx engine seems to be working now, but
+               // potential problems are being tracked down here:
+               // https://bugzilla.novell.com/show_bug.cgi?id=470827
+               static readonly bool old_rx =
+                       Environment.GetEnvironmentVariable ("MONO_NEW_RX") == null;
+#endif
+
+               private static IMachineFactory CreateMachineFactory (string pattern, RegexOptions options) 
+               {
+                       Parser psr = new Parser ();
+                       RegularExpression re = psr.ParseRegularExpression (pattern, options);
 
-#if NET_2_0
-               protected
+#if NET_2_1
+                       ICompiler cmp = new PatternCompiler ();
 #else
-               private
+                       ICompiler cmp;
+                       if (!old_rx) {
+                               if ((options & RegexOptions.Compiled) != 0)
+                                       cmp = new CILCompiler ();
+                               else
+                                       cmp = new RxCompiler ();
+                       } else {
+                               cmp = new PatternCompiler ();
+                       }
 #endif
-               Regex (SerializationInfo info, StreamingContext context) :
+
+                       re.Compile (cmp, (options & RegexOptions.RightToLeft) != 0);
+
+                       IMachineFactory machineFactory = cmp.GetMachineFactory ();
+                       Hashtable mapping = new Hashtable ();
+                       machineFactory.Gap = psr.GetMapping (mapping);
+                       machineFactory.Mapping = mapping;
+                       machineFactory.NamesMapping = GetGroupNamesArray (machineFactory.GroupCount, machineFactory.Mapping);
+
+                       return machineFactory;
+               }
+
+               protected Regex (SerializationInfo info, StreamingContext context) :
                        this (info.GetString ("pattern"), 
                              (RegexOptions) info.GetValue ("options", typeof (RegexOptions)))
                {
                }
 
-#if NET_1_1 && !TARGET_JVM
-               // fixes public API signature
-               ~Regex ()
-               {
-               }
-#endif
                // public instance properties
                
                public RegexOptions Options {
@@ -276,46 +327,56 @@ namespace System.Text.RegularExpressions {
                
                public string [] GetGroupNames ()
                {
-                       string [] names = new string [mapping.Count];
-                       mapping.Keys.CopyTo (names, 0);
-
+                       string [] names = new string [1 + group_count];
+                       Array.Copy (group_names, names, 1 + group_count);
                        return names;
                }
 
-               public int[] GetGroupNumbers ()
+               public int [] GetGroupNumbers ()
                {
-                       int[] numbers = new int [mapping.Count];
-                       mapping.Values.CopyTo (numbers, 0);
-
+                       int [] numbers = new int [1 + group_count];
+                       Array.Copy (GroupNumbers, numbers, 1 + group_count);
                        return numbers;
                }
 
                public string GroupNameFromNumber (int i)
                {
-                       if (i > group_count)
+                       i = GetGroupIndex (i);
+                       if (i < 0)
                                return "";
-               
-                       foreach (string name in mapping.Keys) {
-                               if ((int) mapping [name] == i)
-                                       return name;
-                       }
 
-                       return "";
+                       return group_names [i];
                }
 
                public int GroupNumberFromName (string name)
                {
-                       if (mapping.Contains (name))
-                               return (int) mapping [name];
+                       if (!mapping.Contains (name))
+                               return -1;
+                       int i = (int) mapping [name];
+                       if (i >= gap)
+                               i = Int32.Parse (name);
+                       return i;
+               }
 
-                       return -1;
+               internal int GetGroupIndex (int number)
+               {
+                       if (number < gap)
+                               return number;
+                       if (gap > group_count)
+                               return -1;
+                       return Array.BinarySearch (GroupNumbers, gap, group_count - gap + 1, number);
+               }
+
+               int default_startat (string input)
+               {
+                       return (RightToLeft && input != null) ? input.Length : 0;
                }
 
                // match methods
                
                public bool IsMatch (string input)
                {
-                       return IsMatch (input, RightToLeft ? input.Length : 0);
+                       return IsMatch (input, default_startat (input));
                }
 
                public bool IsMatch (string input, int startat)
@@ -325,22 +386,32 @@ namespace System.Text.RegularExpressions {
 
                public Match Match (string input)
                {
-                       return Match (input, RightToLeft ? input.Length : 0);
+                       return Match (input, default_startat (input));
                }
 
                public Match Match (string input, int startat)
                {
+                       if (input == null)
+                               throw new ArgumentNullException ("input");
+                       if (startat < 0 || startat > input.Length)
+                               throw new ArgumentOutOfRangeException ("startat");
                        return CreateMachine ().Scan (this, input, startat, input.Length);
                }
 
-               public Match Match (string input, int startat, int length)
+               public Match Match (string input, int beginning, int length)
                {
-                       return CreateMachine ().Scan (this, input, startat, startat + length);
+                       if (input == null)
+                               throw new ArgumentNullException ("input");
+                       if (beginning < 0 || beginning > input.Length)
+                               throw new ArgumentOutOfRangeException ("beginning");
+                       if (length < 0 || length > input.Length - beginning)
+                               throw new ArgumentOutOfRangeException ("length");
+                       return CreateMachine ().Scan (this, input, beginning, beginning + length);
                }
 
                public MatchCollection Matches (string input)
                {
-                       return Matches (input, RightToLeft ? input.Length : 0);
+                       return Matches (input, default_startat (input));
                }
 
                public MatchCollection Matches (string input, int startat)
@@ -353,12 +424,12 @@ namespace System.Text.RegularExpressions {
 
                public string Replace (string input, MatchEvaluator evaluator)
                {
-                       return Replace (input, evaluator, Int32.MaxValue, RightToLeft ? input.Length : 0);
+                       return Replace (input, evaluator, Int32.MaxValue, default_startat (input));
                }
 
                public string Replace (string input, MatchEvaluator evaluator, int count)
                {
-                       return Replace (input, evaluator, count, RightToLeft ? input.Length : 0);
+                       return Replace (input, evaluator, count, default_startat (input));
                }
 
                class Adapter {
@@ -367,113 +438,76 @@ namespace System.Text.RegularExpressions {
                        public void Evaluate (Match m, StringBuilder sb) { sb.Append (ev (m)); }
                }
 
-               delegate void MatchAppendEvaluator (Match match, StringBuilder sb);
-
                public string Replace (string input, MatchEvaluator evaluator, int count, int startat)
                {
-                       Adapter a = new Adapter (evaluator);
-                       return Replace (input, new MatchAppendEvaluator (a.Evaluate), count, startat);
-               }
+                       if (input == null)
+                               throw new ArgumentNullException ("input");
+                       if (evaluator == null)
+                               throw new ArgumentNullException ("evaluator");
+                       if (count < -1)
+                               throw new ArgumentOutOfRangeException ("count");
+                       if (startat < 0 || startat > input.Length)
+                               throw new ArgumentOutOfRangeException ("startat");
 
-               string Replace (string input, MatchAppendEvaluator evaluator, int count, int startat)
-               {
-                       StringBuilder result = new StringBuilder ();
-                       int ptr = startat;
-                       int counter = count;
+                       BaseMachine m = (BaseMachine)CreateMachine ();
 
-                       result.Append (input, 0, ptr);
+                       if (RightToLeft)
+                               return m.RTLReplace (this, input, evaluator, count, startat);
 
-                       Match m = Match (input, startat);
-                       while (m.Success) {
-                               if (count != -1)
-                                       if(counter -- <= 0)
-                                               break;
-                               if (m.Index < ptr)
-                                       throw new SystemException ("how");
-                               result.Append (input, ptr, m.Index - ptr);
-                               evaluator (m, result);
-
-                               ptr = m.Index + m.Length;
-                               m = m.NextMatch ();
-                       }
-                       
-                       if (ptr == 0)
-                               return input;
-                       
-                       result.Append (input, ptr, input.Length - ptr);
+                       // NOTE: If this is a cause of a lot of allocations, we can convert it to
+                       //       use a ThreadStatic allocation mitigator
+                       Adapter a = new Adapter (evaluator);
 
-                       return result.ToString ();
+                       return m.LTRReplace (this, input, new BaseMachine.MatchAppendEvaluator (a.Evaluate),
+                                                                count, startat);
                }
 
                public string Replace (string input, string replacement)
                {
-                       return Replace (input, replacement, Int32.MaxValue, RightToLeft ? input.Length : 0);
+                       return Replace (input, replacement, Int32.MaxValue, default_startat (input));
                }
 
                public string Replace (string input, string replacement, int count)
                {
-                       return Replace (input, replacement, count, RightToLeft ? input.Length : 0);
+                       return Replace (input, replacement, count, default_startat (input));
                }
 
                public string Replace (string input, string replacement, int count, int startat)
                {
-                       ReplacementEvaluator ev = new ReplacementEvaluator (this, replacement);
-                       return Replace (input, new MatchAppendEvaluator (ev.EvaluateAppend), count, startat);
+                       if (input == null)
+                               throw new ArgumentNullException ("input");
+                       if (replacement == null)
+                               throw new ArgumentNullException ("replacement");
+                       if (count < -1)
+                               throw new ArgumentOutOfRangeException ("count");
+                       if (startat < 0 || startat > input.Length)
+                               throw new ArgumentOutOfRangeException ("startat");
+
+                       return CreateMachine ().Replace (this, input, replacement, count, startat);
                }
 
                // split methods
 
                public string [] Split (string input)
                {
-                       return Split (input, Int32.MaxValue, RightToLeft ? input.Length : 0);
+                       return Split (input, Int32.MaxValue, default_startat (input));
                }
 
                public string [] Split (string input, int count)
                {
-                       return Split (input, count, RightToLeft ? input.Length : 0);
+                       return Split (input, count, default_startat (input));
                }
 
                public string [] Split (string input, int count, int startat)
                {
-                       ArrayList splits = new ArrayList ();
-                       if (count == 0)
-                               count = Int32.MaxValue;
-
-                       int ptr = startat;
-                       Match m = null;
-                       while (--count > 0) {
-                               if (m != null)
-                                       m = m.NextMatch ();
-                               else
-                                       m = Match (input, ptr);
-
-                               if (!m.Success)
-                                       break;
-                       
-                               if (RightToLeft)
-                                       splits.Add (input.Substring (m.Index + m.Length, ptr - m.Index - m.Length));
-                               else
-                                       splits.Add (input.Substring (ptr, m.Index - ptr));
-                                       
-                               int gcount = m.Groups.Count;
-                               for (int gindex = 1; gindex < gcount; gindex++) {
-                                       Group grp = m.Groups [gindex];
-                                       splits.Add (input.Substring (grp.Index, grp.Length));
-                               }
-
-                               if (RightToLeft)
-                                       ptr = m.Index; 
-                               else
-                                       ptr = m.Index + m.Length;
-                                       
-                       }
-
-                       if (RightToLeft && ptr >= 0)
-                               splits.Add (input.Substring (0, ptr));
-                       if (!RightToLeft && ptr <= input.Length)
-                               splits.Add (input.Substring (ptr));
+                       if (input == null)
+                               throw new ArgumentNullException ("input");
+                       if (count < 0)
+                               throw new ArgumentOutOfRangeException ("count");
+                       if (startat < 0 || startat > input.Length)
+                               throw new ArgumentOutOfRangeException ("startat");
 
-                       return (string []) splits.ToArray (typeof (string));
+                       return CreateMachine ().Split (this, input, count, startat);
                }
 
                // This method is called at the end of the constructor of compiled
@@ -490,12 +524,12 @@ namespace System.Text.RegularExpressions {
                        // expressions compiled to assemblies.
                        Init ();
                }
-
+#if !NET_2_1
                protected bool UseOptionC ()
                {
                        return ((roptions & RegexOptions.Compiled) != 0);
                }
-
+#endif
                protected bool UseOptionR ()
                {
                        return ((roptions & RegexOptions.RightToLeft) != 0);
@@ -521,6 +555,10 @@ namespace System.Text.RegularExpressions {
                        get { return group_count; }
                }
 
+               internal int Gap {
+                       get { return gap; }
+               }
+
                // private
 
                private IMachine CreateMachine ()
@@ -528,11 +566,36 @@ namespace System.Text.RegularExpressions {
                        return machineFactory.NewInstance ();
                }
 
+               private static string [] GetGroupNamesArray (int groupCount, IDictionary mapping) 
+               {
+                       string [] group_names = new string [groupCount + 1];
+                       IDictionaryEnumerator de = mapping.GetEnumerator ();
+                       while (de.MoveNext ())
+                               group_names [(int) de.Value] = (string) de.Key;
+                       return group_names;
+               }
+
+               private int [] GroupNumbers {
+                       get {
+                               if (group_numbers == null) {
+                                       group_numbers = new int [1 + group_count];
+                                       for (int i = 0; i < gap; ++i)
+                                               group_numbers [i] = i;
+                                       for (int i = gap; i <= group_count; ++i)
+                                               group_numbers [i] = Int32.Parse (group_names [i]);
+                                       return group_numbers;
+                               }
+                               return group_numbers;
+                       }
+               }
+
                private IMachineFactory machineFactory;
                private IDictionary mapping;
                private int group_count;
+               private int gap;
                private bool refsInitialized;
-
+               private string [] group_names;
+               private int [] group_numbers;
                
                // protected members
 
@@ -540,15 +603,23 @@ namespace System.Text.RegularExpressions {
                protected internal RegexOptions roptions;
                
                // MS undocumented members
+#if NET_2_1
+               [MonoTODO]
+               internal System.Collections.Generic.Dictionary<string, int> capnames;
+               [MonoTODO]
+               internal System.Collections.Generic.Dictionary<int, int> caps;
+#else
                [MonoTODO]
                protected internal System.Collections.Hashtable capnames;
                [MonoTODO]
                protected internal System.Collections.Hashtable caps;
+
+               [MonoTODO]
+               protected internal RegexRunnerFactory factory;
+#endif
                [MonoTODO]
                protected internal int capsize;
                [MonoTODO]
                protected internal string [] capslist;
-               [MonoTODO]
-               protected internal RegexRunnerFactory factory;
        }
 }