1 //------------------------------------------------------------------------------
2 // <copyright file="RegexMatch.cs" company="Microsoft">
3 // Copyright (c) Microsoft Corporation. All rights reserved.
5 //------------------------------------------------------------------------------
7 // Match is the result class for a regex search.
8 // It returns the location, length, and substring for
9 // the entire match as well as every captured group.
11 // Match is also used during the search to keep track of each capture for each group. This is
12 // done using the "_matches" array. _matches[x] represents an array of the captures for group x.
13 // This array consists of start and length pairs, and may have empty entries at the end. _matchcount[x]
14 // stores how many captures a group has. Note that _matchcount[x]*2 is the length of all the valid
15 // values in _matches. _matchcount[x]*2-2 is the Start of the last capture, and _matchcount[x]*2-1 is the
16 // Length of the last capture
18 // For example, if group 2 has one capture starting at position 4 with length 6,
19 // _matchcount[2] == 1
20 // _matches[2][0] == 4
21 // _matches[2][1] == 6
23 // Values in the _matches array can also be negative. This happens when using the balanced match
24 // construct, "(?<start-end>...)". When the "end" group matches, a capture is added for both the "start"
25 // and "end" groups. The capture added for "start" receives the negative values, and these values point to
26 // the next capture to be balanced. They do NOT point to the capture that "end" just balanced out. The negative
27 // values are indices into the _matches array transformed by the formula -3-x. This formula also untransforms.
30 namespace System.Text.RegularExpressions {
32 using System.Collections;
33 using System.Collections.Generic;
34 using System.Diagnostics;
35 using System.Security.Permissions;
36 using System.Globalization;
41 /// the results from a single regular expression match.
47 public class Match : Group {
48 internal static Match _empty = new Match(null, 1, String.Empty, 0, 0, 0);
49 internal GroupCollection _groupcoll;
52 internal Regex _regex;
53 internal int _textbeg;
54 internal int _textpos;
55 internal int _textend;
56 internal int _textstart;
58 // output from the match
59 internal int[][] _matches;
60 internal int[] _matchcount;
61 internal bool _balancing; // whether we've done any balancing with this match. If we
62 // have done balancing, we'll need to do extra work in Tidy().
66 /// Returns an empty Match object.
69 public static Match Empty {
76 * Nonpublic constructor
78 internal Match(Regex regex, int capcount, String text, int begpos, int len, int startpos)
80 : base(text, new int[2], 0) {
83 _matchcount = new int[capcount];
85 _matches = new int[capcount][];
88 _textend = begpos + len;
89 _textstart = startpos;
92 // No need for an exception here. This is only called internally, so we'll use an Assert instead
93 System.Diagnostics.Debug.Assert(!(_textbeg < 0 || _textstart < _textbeg || _textend < _textstart || _text.Length < _textend),
94 "The parameters are out of range.");
99 * Nonpublic set-text method
101 internal virtual void Reset(Regex regex, String text, int textbeg, int textend, int textstart) {
106 _textstart = textstart;
108 for (int i = 0; i < _matchcount.Length; i++) {
116 /// <para>[To be supplied.]</para>
118 public virtual GroupCollection Groups {
120 if (_groupcoll == null)
121 _groupcoll = new GroupCollection(this, null);
128 * Returns the next match
131 /// <para>Returns a new Match with the results for the next match, starting
132 /// at the position at which the last match ended (at the character beyond the last
133 /// matched character).</para>
135 public Match NextMatch() {
139 return _regex.Run(false, _length, _text, _textbeg, _textend - _textbeg, _textpos);
144 * Return the result string (using the replacement pattern)
148 /// Returns the expansion of the passed replacement pattern. For
149 /// example, if the replacement pattern is ?$1$2?, Result returns the concatenation
150 /// of Group(1).ToString() and Group(2).ToString().
153 public virtual String Result(String replacement) {
154 RegexReplacement repl;
156 if (replacement == null)
157 throw new ArgumentNullException("replacement");
160 throw new NotSupportedException(SR.GetString(SR.NoResultOnFailed));
162 repl = (RegexReplacement)_regex.replref.Get();
164 if (repl == null || !repl.Pattern.Equals(replacement)) {
165 repl = RegexParser.ParseReplacement(replacement, _regex.caps, _regex.capsize, _regex.capnames, _regex.roptions);
166 _regex.replref.Cache(repl);
169 return repl.Replacement(this);
173 * Used by the replacement code
175 internal virtual String GroupToStringImpl(int groupnum) {
176 int c = _matchcount[groupnum];
180 int [] matches = _matches[groupnum];
182 return _text.Substring(matches[(c - 1) * 2], matches[(c * 2) - 1]);
186 * Used by the replacement code
188 internal String LastGroupToStringImpl() {
189 return GroupToStringImpl(_matchcount.Length - 1);
194 * Convert to a thread-safe object by precomputing cache contents
198 /// Returns a Match instance equivalent to the one supplied that is safe to share
199 /// between multiple threads.
205 [HostProtection(Synchronization=true)]
207 static public Match Synchronized(Match inner) {
209 static internal Match Synchronized(Match inner) {
212 throw new ArgumentNullException("inner");
214 int numgroups = inner._matchcount.Length;
216 // Populate all groups by looking at each one
217 for (int i = 0; i < numgroups; i++) {
218 Group group = inner.Groups[i];
220 // Depends on the fact that Group.Synchronized just
221 // operates on and returns the same instance
222 System.Text.RegularExpressions.Group.Synchronized(group);
229 * Nonpublic builder: add a capture to the group specified by "cap"
231 internal virtual void AddMatch(int cap, int start, int len) {
234 if (_matches[cap] == null)
235 _matches[cap] = new int[2];
237 capcount = _matchcount[cap];
239 if (capcount * 2 + 2 > _matches[cap].Length) {
240 int[] oldmatches = _matches[cap];
241 int[] newmatches = new int[capcount * 8];
242 for (int j = 0; j < capcount * 2; j++)
243 newmatches[j] = oldmatches[j];
244 _matches[cap] = newmatches;
247 _matches[cap][capcount * 2] = start;
248 _matches[cap][capcount * 2 + 1] = len;
249 _matchcount[cap] = capcount + 1;
253 * Nonpublic builder: Add a capture to balance the specified group. This is used by the
254 balanced match construct. (?<foo-foo2>...)
256 If there were no such thing as backtracking, this would be as simple as calling RemoveMatch(cap).
257 However, since we have backtracking, we need to keep track of everything.
259 internal virtual void BalanceMatch(int cap) {
265 // we'll look at the last capture first
266 capcount = _matchcount[cap];
267 target = capcount * 2 - 2;
269 // first see if it is negative, and therefore is a reference to the next available
270 // capture group for balancing. If it is, we'll reset target to point to that capture.
271 if (_matches[cap][target] < 0)
272 target = -3 - _matches[cap][target];
274 // move back to the previous capture
277 // if the previous capture is a reference, just copy that reference to the end. Otherwise, point to it.
278 if (target >= 0 && _matches[cap][target] < 0)
279 AddMatch(cap, _matches[cap][target], _matches[cap][target+1]);
281 AddMatch(cap, -3 - target, -4 - target /* == -3 - (target + 1) */ );
286 * Nonpublic builder: removes a group match by capnum
288 internal virtual void RemoveMatch(int cap) {
293 * Nonpublic: tells if a group was matched by capnum
295 internal virtual bool IsMatched(int cap) {
296 return cap < _matchcount.Length && _matchcount[cap] > 0 && _matches[cap][_matchcount[cap] * 2 - 1] != (-3 + 1);
300 * Nonpublic: returns the index of the last specified matched group by capnum
302 internal virtual int MatchIndex(int cap) {
303 int i = _matches[cap][_matchcount[cap] * 2 - 2];
307 return _matches[cap][-3 - i];
311 * Nonpublic: returns the length of the last specified matched group by capnum
313 internal virtual int MatchLength(int cap) {
314 int i = _matches[cap][_matchcount[cap] * 2 - 1];
318 return _matches[cap][-3 - i];
322 * Nonpublic: tidy the match so that it can be used as an immutable result
324 internal virtual void Tidy(int textpos) {
327 interval = _matches[0];
328 _index = interval[0];
329 _length = interval[1];
331 _capcount = _matchcount[0];
334 // The idea here is that we want to compact all of our unbalanced captures. To do that we
335 // use j basically as a count of how many unbalanced captures we have at any given time
336 // (really j is an index, but j/2 is the count). First we skip past all of the real captures
337 // until we find a balance captures. Then we check each subsequent entry. If it's a balance
338 // capture (it's negative), we decrement j. If it's a real capture, we increment j and copy
339 // it down to the last free position.
340 for (int cap = 0; cap < _matchcount.Length; cap++) {
344 limit = _matchcount[cap] * 2;
345 matcharray = _matches[cap];
350 for (i = 0; i < limit; i++) {
351 if (matcharray[i] < 0)
355 for (j = i; i < limit; i++) {
356 if (matcharray[i] < 0) {
357 // skip negative values
361 // but if we find something positive (an actual capture), copy it back to the last
362 // unbalanced position.
364 matcharray[j] = matcharray[i];
369 _matchcount[cap] = j / 2;
392 internal virtual void Dump() {
395 for (i = 0; i < _matchcount.Length; i++) {
396 System.Diagnostics.Debug.WriteLine("Capnum " + i.ToString(CultureInfo.InvariantCulture) + ":");
398 for (j = 0; j < _matchcount[i]; j++) {
401 if (_matches[i][j * 2] >= 0)
402 text = _text.Substring(_matches[i][j * 2], _matches[i][j * 2 + 1]);
404 System.Diagnostics.Debug.WriteLine(" (" + _matches[i][j * 2].ToString(CultureInfo.InvariantCulture) + "," + _matches[i][j * 2 + 1].ToString(CultureInfo.InvariantCulture) + ") " + text);
413 * MatchSparse is for handling the case where slots are
414 * sparsely arranged (e.g., if somebody says use slot 100000)
416 internal class MatchSparse : Match {
417 // the lookup hashtable
419 new internal Dictionary<Int32, Int32> _caps;
421 new internal Hashtable _caps;
425 * Nonpublic constructor
428 internal MatchSparse(Regex regex, Dictionary<Int32, Int32> caps, int capcount,
430 internal MatchSparse(Regex regex, Hashtable caps, int capcount,
432 String text, int begpos, int len, int startpos)
434 : base(regex, capcount, text, begpos, len, startpos) {
439 public override GroupCollection Groups {
441 if (_groupcoll == null)
442 _groupcoll = new GroupCollection(this, _caps);
449 internal override void Dump() {
452 IEnumerator<Int32> e = _caps.Keys.GetEnumerator();
454 IEnumerator e = _caps.Keys.GetEnumerator();
456 while (e.MoveNext()) {
457 System.Diagnostics.Debug.WriteLine("Slot " + e.Current.ToString() + " -> " + _caps[e.Current].ToString());