2007-10-22 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / System / System.Text.RegularExpressions / quicksearch.cs
index 57d5b5fb67d23cb6fc24aeff0983bdafd591a140..165290741da8e7d7c870997d760849d7940d6227 100644 (file)
@@ -4,17 +4,38 @@
 // file:       quicksearch.cs
 //
 // Authors:    Dan Lewis (dlewis@gmx.co.uk)
-//             Juraj Skripsky (juraj@hotfeet.ch)
+//              Juraj Skripsky (juraj@hotfeet.ch)
 //
 // (c) 2002 Dan Lewis
-// (c) 2003 Juraj Skripsky
+// (c) 2007 Juraj Skripsky
+//
+
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the
+// "Software"), to deal in the Software without restriction, including
+// without limitation the rights to use, copy, modify, merge, publish,
+// distribute, sublicense, and/or sell copies of the Software, and to
+// permit persons to whom the Software is furnished to do so, subject to
+// the following conditions:
+// 
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 //
 
 using System;
 using System.Collections;
 
 namespace System.Text.RegularExpressions {
-       public class QuickSearch {
+       internal class QuickSearch {
                // simplified boyer-moore for fast substring matching
                // (for short strings, we use simple scans)
                public QuickSearch (string str, bool ignore) 
@@ -139,44 +160,76 @@ namespace System.Text.RegularExpressions {
                        // private
 
                private void SetupShiftTable () {
-                       shift = new Hashtable ();
-                       if (reverse)
-                       {
-                               for (int i = len ; i > 0; -- i) 
-                               {
-                                       char c = str[i -1];
-                                       shift[GetChar(c)] = i;
-                               }
+                       bool needsExtendedTable = len > (byte.MaxValue - 1);
+
+                       byte maxLowChar = 0;
+                       for (int i = 0; i < len; i++) {
+                               char cur = str [i];
+                               if (cur <= (char)byte.MaxValue) {
+                                       if ((byte)cur > maxLowChar)
+                                               maxLowChar = (byte)cur;
+                               } else
+                                       needsExtendedTable = true;
                        }
-                       else
-                       {
-                               for (int i = 0; i < len; ++ i) 
-                               {
-                                       char c = str[i];
-                                       shift[GetChar(c)] = len - i;
+                       
+                       shift = new byte [maxLowChar + 1];
+                       if (needsExtendedTable)
+                               shiftExtended = new Hashtable ();
+
+                       for (int i = 0, j = len; i < len; i++, j--) {
+                               char c = str [(!reverse ? i : j - 1)];
+                               if (c < shift.Length) {
+                                       if (j < byte.MaxValue) {
+                                               shift [c] = (byte)j;
+                                               continue;
+                                       } else {
+                                               shift [c] = byte.MaxValue;
+                                       }
                                }
+                               shiftExtended [c] = j;
                        }
-                       
                }
            
                private int GetShiftDistance (char c) {
-                       if(shift == null)
+                       if (shift == null)
                                return 1;
+                               
+                       c = GetChar (c);
+                       if (c < shift.Length) {
+                               int dist = shift [c];
+                               if (dist == 0) {
+                                       return len + 1;
+                               } else {
+                                       if (dist != byte.MaxValue)
+                                               return dist;
+                               }
+                       } else {
+                               if (c < (char)byte.MaxValue)    
+                                       return len + 1;
+                       }
+
+                       if (shiftExtended == null)
+                               return len + 1;
 
-                       object s = shift[c];
+                       object s = shiftExtended [c];
                        return (s != null ? (int)s : len + 1);
                }
 
                private char GetChar(char c) {
                        return (!ignore ? c : Char.ToLower(c));
                }
-               
+
                private string str;
                private int len;
                private bool ignore;
                private bool reverse;
 
-               private Hashtable shift;
+               // shift[idx] contains value x  means 
+               //  x == 0             => shift dist. == len + 1
+               //  x == byte.Maxvalue => shift dist. >= 255, look it up in shiftExtended
+               //  otherwise          => shift dist. == x 
+               private byte[] shift;
+               private Hashtable shiftExtended;
                private readonly static int THRESHOLD = 5;
        }