Merge pull request #2419 from esdrubal/xmlsubclassel
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / SimpleCollator.cs
index 6c187cd9d2ea33a604921c3943a3ef00c03c4107..47ab8cdd9138056f73d8d8cd8a9b0498e1eed3a7 100644 (file)
@@ -86,7 +86,7 @@ namespace Mono.Globalization.Unicode
 
                unsafe internal struct Context
                {
-                       public Context (CompareOptions opt, byte* alwaysMatchFlags, byte* neverMatchFlags, byte* buffer1, byte* buffer2, byte* prev1, bool quickCheckPossible)
+                       public Context (CompareOptions opt, byte* alwaysMatchFlags, byte* neverMatchFlags, byte* buffer1, byte* buffer2, byte* prev1/*, bool quickCheckPossible*/)
                        {
                                Option = opt;
                                AlwaysMatchFlags = alwaysMatchFlags;
@@ -95,7 +95,7 @@ namespace Mono.Globalization.Unicode
                                Buffer2 = buffer2;
                                PrevSortKey = prev1;
                                PrevCode = -1;
-                               QuickCheckPossible = quickCheckPossible;
+//                             QuickCheckPossible = quickCheckPossible;
                        }
 
                        public readonly CompareOptions Option;
@@ -105,7 +105,7 @@ namespace Mono.Globalization.Unicode
                        public byte* Buffer2;
                        public int PrevCode;
                        public byte* PrevSortKey;
-                       public readonly bool QuickCheckPossible;
+//                     public readonly bool QuickCheckPossible;
 
                        public void ClearPrevInfo ()
                        {
@@ -139,20 +139,21 @@ namespace Mono.Globalization.Unicode
                        new SimpleCollator (CultureInfo.InvariantCulture);
 
                readonly TextInfo textInfo; // for ToLower().
-               readonly bool frenchSort;
-               unsafe readonly byte* cjkCatTable;
-               unsafe readonly byte* cjkLv1Table;
                readonly CodePointIndexer cjkIndexer;
-               unsafe readonly byte* cjkLv2Table;
-               readonly CodePointIndexer cjkLv2Indexer;
-               readonly int lcid;
                readonly Contraction [] contractions;
                readonly Level2Map [] level2Maps;
-
                // This flag marks characters as "unsafe", where the character
                // could be used as part of a contraction (whose length > 1).
                readonly byte [] unsafeFlags;
 
+               unsafe readonly byte* cjkCatTable;
+               unsafe readonly byte* cjkLv1Table;
+               unsafe readonly byte* cjkLv2Table;
+               readonly CodePointIndexer cjkLv2Indexer;
+               readonly int lcid;
+               readonly bool frenchSort;
+
+
                const int UnsafeFlagLength = 0x300 / 8;
 
 //             readonly byte [] contractionFlags = new byte [16];
@@ -541,7 +542,7 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                {
                        byte* prevbuf = stackalloc byte [4];
                        ClearBuffer (prevbuf, 4);
-                       Context ctx = new Context (opt, null, null, null, null, prevbuf, false);
+                       Context ctx = new Context (opt, null, null, null, null, prevbuf);
 
                        for (int n = start; n < end; n++) {
                                int i = s [n];
@@ -683,14 +684,9 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
 
                public int Compare (string s1, string s2)
                {
-                       return Compare (s1, s2, CompareOptions.None);
-               }
-
-               public int Compare (string s1, string s2, CompareOptions options)
-               {
-                       return Compare (s1, 0, s1.Length, s2, 0, s2.Length, options);
+                       return Compare (s1, 0, s1.Length, s2, 0, s2.Length, CompareOptions.None);
                }
-
+/*
                private int CompareOrdinal (string s1, int idx1, int len1,
                        string s2, int idx2, int len2)
                {
@@ -757,19 +753,10 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                        return len1 == len2 ? 0 :
                                len1 == min ? - 1 : 1;
                }
-
-               public unsafe int Compare (string s1, int idx1, int len1,
+*/
+               internal unsafe int Compare (string s1, int idx1, int len1,
                        string s2, int idx2, int len2, CompareOptions options)
                {
-                       // quick equality check
-                       if (idx1 == idx2 && len1 == len2 &&
-                               Object.ReferenceEquals (s1, s2))
-                               return 0;
-                       if (options == CompareOptions.Ordinal)
-                               return CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
-                       if (options == CompareOptions.OrdinalIgnoreCase)
-                               return CompareOrdinalIgnoreCase (s1, idx1, len1, s2, idx2, len2);
-
 #if false // stable easy version, depends on GetSortKey().
                        SortKey sk1 = GetSortKey (s1, idx1, len1, options);
                        SortKey sk2 = GetSortKey (s2, idx2, len2, options);
@@ -785,8 +772,8 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                        byte* sk2 = stackalloc byte [4];
                        ClearBuffer (sk1, 4);
                        ClearBuffer (sk2, 4);
-                       Context ctx = new Context (options, null, null, sk1, sk2, null,
-                               QuickCheckPossible (s1, idx1, idx1 + len1, s2, idx2, idx2 + len2));
+                       Context ctx = new Context (options, null, null, sk1, sk2, null);
+                       //      QuickCheckPossible (s1, idx1, idx1 + len1, s2, idx2, idx2 + len2));
 
                        bool dummy, dummy2;
                        int ret = CompareInternal (s1, idx1, len1, s2, idx2, len2, out dummy, out dummy2, true, false, ref ctx);
@@ -800,6 +787,7 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                buffer [i] = 0;
                }
 
+/*
                bool QuickCheckPossible (string s1, int idx1, int end1,
                        string s2, int idx2, int end2)
                {
@@ -820,6 +808,7 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                        return true;
 #endif
                }
+*/
 
                unsafe int CompareInternal (string s1, int idx1, int len1, string s2,
                        int idx2, int len2,
@@ -836,8 +825,8 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                        sourceConsumed = false;
                        PreviousInfo prev2 = new PreviousInfo (false);
 
-                       if (opt == CompareOptions.None && ctx.QuickCheckPossible)
-                               return CompareQuick (s1, idx1, len1, s2, idx2, len2, out sourceConsumed, out targetConsumed, immediateBreakup);
+//                     if (opt == CompareOptions.None && ctx.QuickCheckPossible)
+//                             return CompareQuick (s1, idx1, len1, s2, idx2, len2, out sourceConsumed, out targetConsumed, immediateBreakup);
 
                        // It holds final result that comes from the comparison
                        // at level 2 or lower. Even if Compare() found the
@@ -1274,8 +1263,8 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                        byte* sk2 = stackalloc byte [4];
                        ClearBuffer (sk1, 4);
                        ClearBuffer (sk2, 4);
-                       Context ctx = new Context (opt, null, null, sk1, sk2, null,
-                               QuickCheckPossible (s, start, start + length, target, 0, target.Length));
+                       Context ctx = new Context (opt, null, null, sk1, sk2, null); 
+                               //QuickCheckPossible (s, start, start + length, target, 0, target.Length));
                        return IsPrefix (s, target, start, length, true, ref ctx);
                }
 
@@ -1431,7 +1420,8 @@ Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, leng
                                bool no = false;
                                for (int j = 0; j < target.Length; j++) {
                                        if (testedTargetPos < j) {
-                                               if (target [j] >= 0x80) {
+                                               char c = target [j];
+                                               if (c == 0 || c >= 0x80) {
                                                        testWasUnable = true;
                                                        return -1;
                                                }
@@ -1439,7 +1429,8 @@ Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, leng
                                                        testedTargetPos = j;
                                        }
                                        if (testedSourcePos < i + j) {
-                                               if (s [i + j] >= 0x80) {
+                                               char c = s [i + j];
+                                               if (c == 0 || c >= 0x80) {
                                                        testWasUnable = true;
                                                        return -1;
                                                }
@@ -1461,9 +1452,9 @@ Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, leng
                public unsafe int IndexOf (string s, string target, int start, int length, CompareOptions opt)
                {
                        if (opt == CompareOptions.Ordinal)
-                               return IndexOfOrdinal (s, target, start, length);
+                               throw new NotSupportedException ("Should not be reached");
                        if (opt == CompareOptions.OrdinalIgnoreCase)
-                               return IndexOfOrdinalIgnoreCase (s, target, start, length);
+                               throw new NotSupportedException ("Should not be reached");
                        if (opt == CompareOptions.None) {
                                bool testWasUnable;
                                int ret = QuickIndexOf (s, target, start, length, out testWasUnable);
@@ -1481,7 +1472,7 @@ Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, leng
                        ClearBuffer (targetSortKey, 4);
                        ClearBuffer (sk1, 4);
                        ClearBuffer (sk2, 4);
-                       Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
+                       Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null);
 
                        return IndexOf (s, target, start, length,
                                targetSortKey, ref ctx);
@@ -1512,30 +1503,6 @@ Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, leng
                        return -1;
                }
 
-               int IndexOfOrdinalIgnoreCase (string s, string target, int start, int length)
-               {
-                       if (target.Length == 0)
-                               return 0;
-                       else if (target.Length > length)
-                               return -1;
-
-                       int end = start + length - target.Length + 1;
-                       for (int i = start; i < end; i++) {
-                               bool no = false;
-                               for (int j = 0; j < target.Length; j++) {
-                                       // I think almost all text has more lower letters than upper ones. Thus with this invariant comparison ToLower() should be faster since it costs less operations.
-                                       if (textInfo.ToLower (s [i + j]) != textInfo.ToLower (target [j])) {
-                                               no = true;
-                                               break;
-                                       }
-                               }
-                               if (no)
-                                       continue;
-                               return i;
-                       }
-                       return -1;
-               }
-
                // char
 
                public int IndexOf (string s, char target, CompareOptions opt)
@@ -1546,9 +1513,9 @@ Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, leng
                public unsafe int IndexOf (string s, char target, int start, int length, CompareOptions opt)
                {
                        if (opt == CompareOptions.Ordinal)
-                               return IndexOfOrdinal (s, target, start, length);
+                               throw new NotSupportedException ("Should not be reached");                      
                        if (opt == CompareOptions.OrdinalIgnoreCase)
-                               return IndexOfOrdinalIgnoreCase (s, target, start, length);
+                               throw new NotSupportedException ("Should not be reached");
                        byte* alwaysMatchFlags = stackalloc byte [16];
                        byte* neverMatchFlags = stackalloc byte [16];
                        byte* targetSortKey = stackalloc byte [4];
@@ -1559,7 +1526,7 @@ Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, leng
                        ClearBuffer (targetSortKey, 4);
                        ClearBuffer (sk1, 4);
                        ClearBuffer (sk2, 4);
-                       Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
+                       Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null);
 
                        // If target is contraction, then use string search.
                        Contraction ct = GetContraction (target);
@@ -1597,16 +1564,6 @@ Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, leng
                        return -1;
                }
 
-               int IndexOfOrdinalIgnoreCase (string s, char target, int start, int length)
-               {
-                       int end = start + length;
-                       target = textInfo.ToLower (target);
-                       for (int i = start; i < end; i++)
-                               if (textInfo.ToLower (s [i]) == target)
-                                       return i;
-                       return -1;
-               }
-
                // Searches target byte[] keydata
                unsafe int IndexOfSortKey (string s, int start, int length, byte* sortkey, char target, int ti, bool noLv4, ref Context ctx)
                {
@@ -1631,7 +1588,8 @@ Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, leng
                                if (!IsIgnorable (target [tidx], opt))
                                        break;
                        if (tidx == target.Length)
-                               return start;
+                               // FIXME: this is likely a hack. A string that is consists of \0 differs from those of other ignorable characters.
+                               return IndexOfOrdinal (target, '\0', 0, target.Length) >= 0 ? IndexOfOrdinal (s, target, start, length) : start;
                        Contraction ct = GetContraction (target, tidx, target.Length - tidx);
                        string replace = ct != null ? ct.Replacement : null;
                        byte* sk = replace == null ? targetSortKey : null;
@@ -1703,7 +1661,7 @@ Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, leng
                        if (opt == CompareOptions.Ordinal)
                                return LastIndexOfOrdinal (s, target, start, length);
                        if (opt == CompareOptions.OrdinalIgnoreCase)
-                               return LastIndexOfOrdinalIgnoreCase (s, target, start, length);
+                               throw new NotSupportedException ("Should not be reached");
                        byte* alwaysMatchFlags = stackalloc byte [16];
                        byte* neverMatchFlags = stackalloc byte [16];
                        byte* targetSortKey = stackalloc byte [4];
@@ -1715,7 +1673,7 @@ Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, leng
                        ClearBuffer (sk1, 4);
                        ClearBuffer (sk2, 4);
                        // For some unknown reason CompareQuick() does not work fine w/ LastIndexOf().
-                       Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
+                       Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null);
                        return LastIndexOf (s, target, start, length,
                                targetSortKey, ref ctx);
                }
@@ -1723,7 +1681,7 @@ Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, leng
                int LastIndexOfOrdinal (string s, string target, int start, int length)
                {
                        if (target.Length == 0)
-                               return 0;
+                               return start;
                        if (s.Length < target.Length || target.Length > length)
                                return -1;
                        int end = start - length + target.Length -1;
@@ -1748,34 +1706,6 @@ Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, leng
                        return -1;
                }
 
-               int LastIndexOfOrdinalIgnoreCase (string s, string target, int start, int length)
-               {
-                       if (target.Length == 0)
-                               return 0;
-                       if (s.Length < length || target.Length > length)
-                               return -1;
-                       int end = start - length + target.Length - 1;
-                       char tail = textInfo.ToLower (target [target.Length - 1]);
-                       for (int i = start; i > end;) {
-                               if (textInfo.ToLower (s [i]) != tail) {
-                                       i--;
-                                       continue;
-                               }
-                               int x = i - target.Length + 1;
-                               i--;
-                               bool mismatch = false;
-                               for (int j = target.Length - 2; j >= 0; j--)
-                                       if (textInfo.ToLower (s [x + j]) != textInfo.ToLower (target [j])) {
-                                               mismatch = true;
-                                               break;
-                                       }
-                               if (mismatch)
-                                       continue;
-                               return x;
-                       }
-                       return -1;
-               }
-
                // char
 
                public int LastIndexOf (string s, char target, CompareOptions opt)
@@ -1786,9 +1716,9 @@ Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, leng
                public unsafe int LastIndexOf (string s, char target, int start, int length, CompareOptions opt)
                {
                        if (opt == CompareOptions.Ordinal)
-                               return LastIndexOfOrdinal (s, target, start, length);
+                               throw new NotSupportedException ();
                        if (opt == CompareOptions.OrdinalIgnoreCase)
-                               return LastIndexOfOrdinalIgnoreCase (s, target, start, length);
+                               throw new NotSupportedException ();                     
                        byte* alwaysMatchFlags = stackalloc byte [16];
                        byte* neverMatchFlags = stackalloc byte [16];
                        byte* targetSortKey = stackalloc byte [4];
@@ -1799,7 +1729,7 @@ Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, leng
                        ClearBuffer (targetSortKey, 4);
                        ClearBuffer (sk1, 4);
                        ClearBuffer (sk2, 4);
-                       Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
+                       Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null);
 
                        // If target is a replacement contraction, then use 
                        // string search.
@@ -1831,29 +1761,6 @@ Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, leng
                        }
                }
 
-               int LastIndexOfOrdinal (string s, char target, int start, int length)
-               {
-                       if (s.Length == 0)
-                               return -1;
-                       int end = start - length;
-                       for (int i = start; i > end; i--)
-                               if (s [i] == target)
-                                       return i;
-                       return -1;
-               }
-
-               int LastIndexOfOrdinalIgnoreCase (string s, char target, int start, int length)
-               {
-                       if (s.Length == 0)
-                               return -1;
-                       int end = start - length;
-                       char c = textInfo.ToUpper (target);
-                       for (int i = start; i > end; i--)
-                               if (textInfo.ToUpper (s [i]) == c)
-                                       return i;
-                       return -1;
-               }
-
                // Searches target byte[] keydata
                unsafe int LastIndexOfSortKey (string s, int start, int orgStart, int length, byte* sortkey, int ti, bool noLv4, ref Context ctx)
                {
@@ -1879,7 +1786,8 @@ Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, leng
                                if (!IsIgnorable (target [tidx], opt))
                                        break;
                        if (tidx == target.Length)
-                               return start;
+                               // FIXME: this is likely a hack. A string that is consists of \0 differs from those of other ignorable characters.
+                               return IndexOfOrdinal (target, '\0', 0, target.Length) >= 0 ? LastIndexOfOrdinal (s, target, start, length) : start;
                        Contraction ct = GetContraction (target, tidx, target.Length - tidx);
                        string replace = ct != null ? ct.Replacement : null;
                        byte* sk = replace == null ? targetSortKey : null;
@@ -2098,7 +2006,7 @@ Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, leng
                        // check next _primary_ character.
                        if (ext != ExtenderType.None) {
                                byte diacritical = 0;
-                               for (int tmp = 0; ; tmp--) {
+                               for (int tmp = idx; ; tmp--) {
                                        if (tmp < 0) // heading extender
                                                return false;
                                        if (IsIgnorable (s [tmp], opt))