// file: quicksearch.cs
//
// Authors: Dan Lewis (dlewis@gmx.co.uk)
-// Juraj Skripsky (juraj@hotfeet.ch)
+// Juraj Skripsky (juraj@hotfeet.ch)
//
// (c) 2002 Dan Lewis
-// (c) 2003 Juraj Skripsky
+// (c) 2007 Juraj Skripsky
+//
+
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the
+// "Software"), to deal in the Software without restriction, including
+// without limitation the rights to use, copy, modify, merge, publish,
+// distribute, sublicense, and/or sell copies of the Software, and to
+// permit persons to whom the Software is furnished to do so, subject to
+// the following conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
using System;
using System.Collections;
namespace System.Text.RegularExpressions {
- public class QuickSearch {
+ internal class QuickSearch {
// simplified boyer-moore for fast substring matching
// (for short strings, we use simple scans)
public QuickSearch (string str, bool ignore)
// private
private void SetupShiftTable () {
- shift = new Hashtable ();
- if (reverse)
- {
- for (int i = len ; i > 0; -- i)
- {
- char c = str[i -1];
- shift[GetChar(c)] = i;
- }
+ bool needsExtendedTable = len > (byte.MaxValue - 1);
+
+ byte maxLowChar = 0;
+ for (int i = 0; i < len; i++) {
+ char cur = str [i];
+ if (cur <= (char)byte.MaxValue) {
+ if ((byte)cur > maxLowChar)
+ maxLowChar = (byte)cur;
+ } else
+ needsExtendedTable = true;
}
- else
- {
- for (int i = 0; i < len; ++ i)
- {
- char c = str[i];
- shift[GetChar(c)] = len - i;
+
+ shift = new byte [maxLowChar + 1];
+ if (needsExtendedTable)
+ shiftExtended = new Hashtable ();
+
+ for (int i = 0, j = len; i < len; i++, j--) {
+ char c = str [(!reverse ? i : j - 1)];
+ if (c < shift.Length) {
+ if (j < byte.MaxValue) {
+ shift [c] = (byte)j;
+ continue;
+ } else {
+ shift [c] = byte.MaxValue;
+ }
}
+ shiftExtended [c] = j;
}
-
}
private int GetShiftDistance (char c) {
- if(shift == null)
+ if (shift == null)
return 1;
+
+ c = GetChar (c);
+ if (c < shift.Length) {
+ int dist = shift [c];
+ if (dist == 0) {
+ return len + 1;
+ } else {
+ if (dist != byte.MaxValue)
+ return dist;
+ }
+ } else {
+ if (c < (char)byte.MaxValue)
+ return len + 1;
+ }
+
+ if (shiftExtended == null)
+ return len + 1;
- object s = shift[c];
+ object s = shiftExtended [c];
return (s != null ? (int)s : len + 1);
}
private char GetChar(char c) {
return (!ignore ? c : Char.ToLower(c));
}
-
+
private string str;
private int len;
private bool ignore;
private bool reverse;
- private Hashtable shift;
+ // shift[idx] contains value x means
+ // x == 0 => shift dist. == len + 1
+ // x == byte.Maxvalue => shift dist. >= 255, look it up in shiftExtended
+ // otherwise => shift dist. == x
+ private byte[] shift;
+ private Hashtable shiftExtended;
private readonly static int THRESHOLD = 5;
}