2007-10-22 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / System / System.Text.RegularExpressions / replace.cs
index 5bb0b1f3296727bc8f16503b0b14fa0c8466b687..7b8a56b279f5c4275fd1fa53bc44544574035fe9 100644 (file)
@@ -43,78 +43,134 @@ namespace System.Text.RegularExpressions {
 
                public ReplacementEvaluator (Regex regex, string replacement) {
                        this.regex = regex;
-                       terms = new ArrayList ();
-                       Compile (replacement);
+                       this.replacement = replacement;
+                       this.pieces = null;
+                       this.n_pieces = 0;
+                       Compile ();
                }
 
-               public string Evaluate (Match match) {
-                       StringBuilder result = new StringBuilder ();
-                       foreach (Term term in terms)
-                               term.AppendResult (match, result);
+               public string Evaluate (Match match) 
+               {
+                       StringBuilder sb = new StringBuilder ();
+                       EvaluateAppend (match, sb);
+                       return sb.ToString ();
+               }
+
+               public void EvaluateAppend (Match match, StringBuilder sb)
+               {
+                       int i = 0, k, count;
+
+                       if (n_pieces == 0) {
+                               sb.Append (replacement);
+                               return;
+                       }
 
-                       return result.ToString ();
+                       while (i < n_pieces) {
+                               k = pieces [i++];
+                               if (k >= 0) {
+                                       count = pieces [i++];
+                                       sb.Append (replacement, k, count);
+                               } else if (k < -3) {
+                                       Group group = match.Groups [-(k + 4)];
+                                       sb.Append (group.Text, group.Index, group.Length);
+                               } else if (k == -1) {
+                                       sb.Append (match.Text);
+                               } else if (k == -2) {
+                                       sb.Append (match.Text, 0, match.Index);
+                               } else { // k == -3
+                                       int matchend = match.Index + match.Length;
+                                       sb.Append (match.Text, matchend, match.Text.Length - matchend);
+                               } 
+                       }
                }
 
-               // private
+               void Ensure (int size)
+               {
+                       int new_size;
+                       if (pieces == null) {
+                               new_size = 4;
+                               if (new_size < size)
+                                       new_size = size;
+                               pieces = new int [new_size];
+                       } else if (size >= pieces.Length) {
+                               new_size = pieces.Length + (pieces.Length >> 1);
+                               if (new_size < size)
+                                       new_size = size;
+                               int [] new_pieces = new int [new_size];
+                               Array.Copy (pieces, new_pieces, n_pieces);
+                               pieces = new_pieces;
+                       }
+               }
+
+               void AddFromReplacement (int start, int end)
+               {
+                       if (start == end)
+                               return;
+                       Ensure (n_pieces + 2);
+                       pieces [n_pieces++] = start;
+                       pieces [n_pieces++] = end - start;
+               }
 
-               private void Compile (string replacement) {
+               void AddInt (int i)
+               {
+                       Ensure (n_pieces + 1);
+                       pieces [n_pieces++] = i;
+               }
+
+               // private
+               private void Compile () {
                        replacement = Parser.Unescape (replacement);
-                       StringBuilder literal = new StringBuilder ();
 
-                       int ptr = 0;
+                       int anchor = 0, ptr = 0, saveptr;
                        char c;
-                       Term term = null;
                        while (ptr < replacement.Length) {
-                               c = replacement[ptr ++];
+                               c = replacement [ptr++];
 
-                               if (c != '$') {
-                                       literal.Append (c);
+                               if (c != '$')
                                        continue;
-                               }
 
                                // If the '$' was the last character, just emit it as is
-                               if (ptr == replacement.Length) {
-                                       literal.Append (c);
+                               if (ptr == replacement.Length)
                                        break;
-                               }
 
                                // If we saw a '$$'
-                               if (replacement[ptr] == '$') {
-                                       literal.Append (c);
-                                       ++ ptr;
+                               if (replacement [ptr] == '$') {
+                                       // Everthing from 'anchor' upto and including the first '$' is copied from the replacement string
+                                       AddFromReplacement (anchor, ptr);
+                                       // skip over the second '$'.
+                                       anchor = ++ptr;
                                        continue;
                                }
 
-                               int saveptr = ptr - 1;
+                               saveptr = ptr - 1;
 
-                               term = CompileTerm (replacement, ref ptr);
+                               int from_match = CompileTerm (ref ptr);
 
-                               if (term != null) {
-                                       term.Literal = literal.ToString ();
-                                       terms.Add (term);
+                               // We couldn't recognize the term following the '$'.  Just treat it as a literal.
+                               // 'ptr' has already been advanced, no need to rewind it back
+                               if (from_match >= 0)
+                                       continue;
 
-                                       term = null;
-                                       literal.Length = 0;
-                               } else {
-                                       // If 'CompileTerm' couldn't identify it, don't abort, simply copy it over.
-                                       literal.Append (replacement, saveptr, ptr - saveptr);
-                               }
+                               AddFromReplacement (anchor, saveptr);
+                               AddInt (from_match);
+                               anchor = ptr;
                        }
 
-                       if (term == null && literal.Length > 0) {
-                               terms.Add (new Term (literal.ToString ()));
-                       }
+                       // If we never needed to advance anchor, it means the result is the whole replacement string.
+                       // We optimize that case by never allocating the pieces array.
+                       if (anchor != 0)
+                               AddFromReplacement (anchor, ptr);
                }
 
-               private Term CompileTerm (string str, ref int ptr) {
-                       char c = str[ptr];
+               private int CompileTerm (ref int ptr) {
+                       char c = replacement [ptr];
 
                        if (Char.IsDigit (c)) {         // numbered group
-                               int n = Parser.ParseDecimal (str, ref ptr);
+                               int n = Parser.ParseDecimal (replacement, ref ptr);
                                if (n < 0 || n > regex.GroupCount)
-                                       return null;
+                                       return 0;
                                
-                               return new Term (TermOp.Match, n);
+                               return -n - 4;
                        }
                        
                        ++ ptr;
@@ -128,112 +184,53 @@ namespace System.Text.RegularExpressions {
                                        // The parser is written such that there are few explicit range checks
                                        // and depends on 'IndexOutOfRangeException' being thrown.
 
-                                       if (Char.IsDigit (str [ptr])) {
-                                               n = Parser.ParseDecimal (str, ref ptr);
+                                       if (Char.IsDigit (replacement [ptr])) {
+                                               n = Parser.ParseDecimal (replacement, ref ptr);
                                                name = "";
                                        } else {
-                                               name = Parser.ParseName (str, ref ptr);
+                                               name = Parser.ParseName (replacement, ref ptr);
                                        }
                                } catch (IndexOutOfRangeException) {
-                                       ptr = str.Length;
-                                       return null;
+                                       ptr = replacement.Length;
+                                       return 0;
                                }
 
-                               if (ptr == str.Length || str[ptr] != '}' || name == null)
-                                       return null;
+                               if (ptr == replacement.Length || replacement[ptr] != '}' || name == null)
+                                       return 0;
                                ++ptr;                  // Swallow the '}'
 
                                if (name != "")
                                        n = regex.GroupNumberFromName (name);
 
                                if (n < 0 || n > regex.GroupCount)
-                                       return null;
+                                       return 0;
 
-                               return new Term (TermOp.Match, n);
+                               return -n - 4;
                        }
 
-                       case '&':                       // entire match
-                               return new Term (TermOp.Match, 0);
+                       case '&':                       // entire match.  Value should be same as $0
+                               return -4;
 
                        case '`':                       // text before match
-                               return new Term (TermOp.PreMatch, 0);
+                               return -2;
 
                        case '\'':                      // text after match
-                               return new Term (TermOp.PostMatch, 0);
+                               return -3;
 
                        case '+':                       // last group
-                               return new Term (TermOp.Match, regex.GroupCount - 1);
+                               return -regex.GroupCount - 4;
 
                        case '_':                       // entire text
-                               return new Term (TermOp.All, 0);
+                               return -1;
 
                        default:
-                               return null;
+                               return 0;
                        }
                }
 
                private Regex regex;
-               private ArrayList terms;
-
-               private enum TermOp {
-                       None,                           // no action
-                       Match,                          // input within group
-                       PreMatch,                       // input before group
-                       PostMatch,                      // input after group
-                       All                             // entire input
-               }
-
-               private class Term {
-                       public Term (TermOp op, int arg) {
-                               this.op = op;
-                               this.arg = arg;
-                               this.literal = "";
-                       }
-
-                       public Term (string literal) {
-                               this.op = TermOp.None;
-                               this.arg = 0;
-                               this.literal = literal;
-                       }
-
-                       public string Literal {
-                               set { literal = value; }
-                       }
-
-                       public void AppendResult (Match match, StringBuilder sb) {
-                               Group group = match.Groups[arg];
-
-                               sb.Append (literal);
-                               switch (op) {
-                               case TermOp.None:
-                                       break;
-
-                               case TermOp.Match:
-                                       sb.Append (group.Text, group.Index, group.Length);
-                                       break;
-
-                               case TermOp.PreMatch:
-                                       sb.Append (group.Text, 0, group.Index);
-                                       break;
-
-                               case TermOp.PostMatch:
-                                       int matchend = group.Index + group.Length;
-                                       sb.Append (group.Text, matchend, group.Text.Length - matchend);
-                                       break;
-
-                               case TermOp.All:
-                                       sb.Append (group.Text);
-                                       break;
-                               }
-                       }
-               
-                       public TermOp op;               // term type
-                       public int arg;                 // group argument
-                       public string literal;          // literal to prepend
-
-                       public override string ToString () {
-                               return op.ToString () + "(" + arg + ") " + literal;
-                       }
-               }
+               int n_pieces;
+               private int [] pieces;
+               string replacement;
        }
 }