public ReplacementEvaluator (Regex regex, string replacement) {
this.regex = regex;
- terms = new ArrayList ();
- Compile (replacement);
+ this.replacement = replacement;
+ this.pieces = null;
+ this.n_pieces = 0;
+ Compile ();
}
- public string Evaluate (Match match) {
- StringBuilder result = new StringBuilder ();
- foreach (Term term in terms)
- term.AppendResult (match, result);
+ public string Evaluate (Match match)
+ {
+ StringBuilder sb = new StringBuilder ();
+ EvaluateAppend (match, sb);
+ return sb.ToString ();
+ }
+
+ public void EvaluateAppend (Match match, StringBuilder sb)
+ {
+ int i = 0, k, count;
+
+ if (n_pieces == 0) {
+ sb.Append (replacement);
+ return;
+ }
- return result.ToString ();
+ while (i < n_pieces) {
+ k = pieces [i++];
+ if (k >= 0) {
+ count = pieces [i++];
+ sb.Append (replacement, k, count);
+ } else if (k < -3) {
+ Group group = match.Groups [-(k + 4)];
+ sb.Append (group.Text, group.Index, group.Length);
+ } else if (k == -1) {
+ sb.Append (match.Text);
+ } else if (k == -2) {
+ sb.Append (match.Text, 0, match.Index);
+ } else { // k == -3
+ int matchend = match.Index + match.Length;
+ sb.Append (match.Text, matchend, match.Text.Length - matchend);
+ }
+ }
}
- // private
+ void Ensure (int size)
+ {
+ int new_size;
+ if (pieces == null) {
+ new_size = 4;
+ if (new_size < size)
+ new_size = size;
+ pieces = new int [new_size];
+ } else if (size >= pieces.Length) {
+ new_size = pieces.Length + (pieces.Length >> 1);
+ if (new_size < size)
+ new_size = size;
+ int [] new_pieces = new int [new_size];
+ Array.Copy (pieces, new_pieces, n_pieces);
+ pieces = new_pieces;
+ }
+ }
+
+ void AddFromReplacement (int start, int end)
+ {
+ if (start == end)
+ return;
+ Ensure (n_pieces + 2);
+ pieces [n_pieces++] = start;
+ pieces [n_pieces++] = end - start;
+ }
+
+ void AddInt (int i)
+ {
+ Ensure (n_pieces + 1);
+ pieces [n_pieces++] = i;
+ }
- private void Compile (string replacement) {
+ // private
+ private void Compile () {
replacement = Parser.Unescape (replacement);
- StringBuilder literal = new StringBuilder ();
- int ptr = 0;
+ int anchor = 0, ptr = 0, saveptr;
char c;
- Term term = null;
while (ptr < replacement.Length) {
- c = replacement[ptr ++];
+ c = replacement [ptr++];
- if (c == '$') {
- if (replacement[ptr] != '$')
- term = CompileTerm (replacement, ref ptr);
- else
- ++ ptr;
- }
+ if (c != '$')
+ continue;
- if (term != null) {
- term.Literal = literal.ToString ();
- terms.Add (term);
+ // If the '$' was the last character, just emit it as is
+ if (ptr == replacement.Length)
+ break;
- term = null;
- literal.Length = 0;
+ // If we saw a '$$'
+ if (replacement [ptr] == '$') {
+ // Everthing from 'anchor' upto and including the first '$' is copied from the replacement string
+ AddFromReplacement (anchor, ptr);
+ // skip over the second '$'.
+ anchor = ++ptr;
+ continue;
}
- else
- literal.Append (c);
- }
- if (term == null && literal.Length > 0) {
- terms.Add (new Term (literal.ToString ()));
+ saveptr = ptr - 1;
+
+ int from_match = CompileTerm (ref ptr);
+
+ // We couldn't recognize the term following the '$'. Just treat it as a literal.
+ // 'ptr' has already been advanced, no need to rewind it back
+ if (from_match >= 0)
+ continue;
+
+ AddFromReplacement (anchor, saveptr);
+ AddInt (from_match);
+ anchor = ptr;
}
+
+ // If we never needed to advance anchor, it means the result is the whole replacement string.
+ // We optimize that case by never allocating the pieces array.
+ if (anchor != 0)
+ AddFromReplacement (anchor, ptr);
}
- private Term CompileTerm (string str, ref int ptr) {
- char c = str[ptr];
+ private int CompileTerm (ref int ptr) {
+ char c = replacement [ptr];
if (Char.IsDigit (c)) { // numbered group
- int n = Parser.ParseDecimal (str, ref ptr);
+ int n = Parser.ParseDecimal (replacement, ref ptr);
if (n < 0 || n > regex.GroupCount)
- throw new ArgumentException ("Bad group number.");
+ return 0;
- return new Term (TermOp.Match, n);
+ return -n - 4;
}
++ ptr;
switch (c) {
case '{': { // named group
- string name = Parser.ParseName (str, ref ptr);
- if (str[ptr ++] != '}' || name == null)
- throw new ArgumentException ("Bad group name.");
-
- int n = regex.GroupNumberFromName (name);
-
- if (n < 0)
- throw new ArgumentException ("Bad group name.");
+ string name;
+ int n = -1;
+
+ try {
+ // The parser is written such that there are few explicit range checks
+ // and depends on 'IndexOutOfRangeException' being thrown.
+
+ if (Char.IsDigit (replacement [ptr])) {
+ n = Parser.ParseDecimal (replacement, ref ptr);
+ name = "";
+ } else {
+ name = Parser.ParseName (replacement, ref ptr);
+ }
+ } catch (IndexOutOfRangeException) {
+ ptr = replacement.Length;
+ return 0;
+ }
+
+ if (ptr == replacement.Length || replacement[ptr] != '}' || name == null)
+ return 0;
+ ++ptr; // Swallow the '}'
+
+ if (name != "")
+ n = regex.GroupNumberFromName (name);
- return new Term (TermOp.Match, n);
+ if (n < 0 || n > regex.GroupCount)
+ return 0;
+
+ return -n - 4;
}
- case '&': // entire match
- return new Term (TermOp.Match, 0);
+ case '&': // entire match. Value should be same as $0
+ return -4;
case '`': // text before match
- return new Term (TermOp.PreMatch, 0);
+ return -2;
case '\'': // text after match
- return new Term (TermOp.PostMatch, 0);
+ return -3;
case '+': // last group
- return new Term (TermOp.Match, regex.GroupCount - 1);
+ return -regex.GroupCount - 4;
case '_': // entire text
- return new Term (TermOp.All, 0);
+ return -1;
default:
- throw new ArgumentException ("Bad replacement pattern.");
+ return 0;
}
}
private Regex regex;
- private ArrayList terms;
-
- private enum TermOp {
- None, // no action
- Match, // input within group
- PreMatch, // input before group
- PostMatch, // input after group
- All // entire input
- }
-
- private class Term {
- public Term (TermOp op, int arg) {
- this.op = op;
- this.arg = arg;
- this.literal = "";
- }
-
- public Term (string literal) {
- this.op = TermOp.None;
- this.arg = 0;
- this.literal = literal;
- }
-
- public string Literal {
- set { literal = value; }
- }
-
- public void AppendResult (Match match, StringBuilder sb) {
- Group group = match.Groups[arg];
-
- sb.Append (literal);
- switch (op) {
- case TermOp.None:
- break;
-
- case TermOp.Match:
- sb.Append (group.Text, group.Index, group.Length);
- break;
-
- case TermOp.PreMatch:
- sb.Append (group.Text, 0, group.Index);
- break;
-
- case TermOp.PostMatch:
- int matchend = group.Index + group.Length;
- sb.Append (group.Text, matchend, group.Text.Length - matchend);
- break;
-
- case TermOp.All:
- sb.Append (group.Text);
- break;
- }
- }
-
- public TermOp op; // term type
- public int arg; // group argument
- public string literal; // literal to prepend
-
- public override string ToString () {
- return op.ToString () + "(" + arg + ") " + literal;
- }
- }
+ int n_pieces;
+ private int [] pieces;
+ string replacement;
}
}