2005-07-26 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / SimpleCollator.cs
index ba195f0fc1a13e331aea5d0070766703f0201727..cbe4d904eb17cdb8836bd2eb0ef914f839aff62b 100644 (file)
 // other character than the identical sequence (depending on CompareOptions
 // and of course, defined sortkey value itself), comparison cannot be done
 // at source char[] level.
-//
-// (to be continued.)
-//
-
 //
 // In IndexOf(), the first character in the target string or the target char
 // itself is turned into sortkey bytes. If the character has a contraction and
@@ -37,7 +33,6 @@
 // tries IsPrefix() from that location. If it returns true, then it returns
 // the index.
 //
-
 // LAMESPEC: IndexOf() is lame as a whole API. It never matches in the middle
 // of expansion and there is no proper way to return such indexes within
 // a single int return value.
@@ -62,30 +57,6 @@ namespace Mono.Globalization.Unicode
                static SimpleCollator invariant =
                        new SimpleCollator (CultureInfo.InvariantCulture);
 
-               internal static readonly byte [] ignorableFlags =
-                       Uni.ignorableFlags;
-               internal static readonly byte [] categories =
-                       Uni.categories;
-               internal static readonly byte [] level1 =
-                       Uni.level1;
-               internal static readonly byte [] level2 =
-                       Uni.level2;
-               internal static readonly byte [] level3 =
-                       Uni.level3;
-               internal static readonly ushort [] widthCompat =
-                       Uni.widthCompat;
-               internal static readonly CodePointIndexer categoryIndexer =
-                       UUtil.Category;
-               internal static readonly CodePointIndexer lv1Indexer =
-                       UUtil.Level1;
-               internal static readonly CodePointIndexer lv2Indexer =
-                       UUtil.Level2;
-               internal static readonly CodePointIndexer lv3Indexer =
-                       UUtil.Level3;
-               internal static readonly CodePointIndexer widthIndexer =
-                       UUtil.WidthCompat;
-
-
                SortKeyBuffer buf;
                // CompareOptions expanded.
                bool ignoreNonSpace; // used in IndexOf()
@@ -95,92 +66,26 @@ namespace Mono.Globalization.Unicode
                bool ignoreKanaType;
                TextInfo textInfo; // for ToLower().
                bool frenchSort;
-               readonly ushort [] cjkTable;
+               unsafe readonly byte* cjkCatTable;
+               unsafe readonly byte* cjkLv1Table;
                readonly CodePointIndexer cjkIndexer;
-               readonly byte [] cjkLv2Table;
+               unsafe readonly byte* cjkLv2Table;
                readonly CodePointIndexer cjkLv2Indexer;
                readonly int lcid;
                readonly Contraction [] contractions;
                readonly Level2Map [] level2Maps;
 
+               // This flag marks characters as "unsafe", where the character
+               // could be used as part of a contraction (whose length > 1).
+               readonly bool [] unsafeFlags;
+
+               const int UnsafeFlagLength = 0x300;
+
                // temporary sortkey buffer for index search/comparison
                byte [] charSortKey = new byte [4];
                byte [] charSortKey2 = new byte [4];
                byte [] charSortKeyIndexTarget = new byte [4];
 
-               #region Tailoring support classes
-               // Possible mapping types are:
-               //
-               //      - string to string (ReplacementMap)
-               //      - string to SortKey (SortKeyMap)
-               //      - diacritical byte to byte (DiacriticalMap)
-               //
-               // There could be mapping from string to sortkeys, but
-               // for now there is none as such.
-               //
-               internal class Contraction
-               {
-                       public readonly char [] Source;
-                       // only either of them is used.
-                       public readonly string Replacement;
-                       public readonly byte [] SortKey;
-
-                       public Contraction (char [] source,
-                               string replacement, byte [] sortkey)
-                       {
-                               Source = source;
-                               Replacement = replacement;
-                               SortKey = sortkey;
-                       }
-               }
-
-               internal class ContractionComparer : IComparer
-               {
-                       public static readonly ContractionComparer Instance =
-                               new ContractionComparer ();
-
-                       public int Compare (object o1, object o2)
-                       {
-                               Contraction c1 = (Contraction) o1;
-                               Contraction c2 = (Contraction) o2;
-                               char [] a1 = c1.Source;
-                               char [] a2 = c2.Source;
-                               int min = a1.Length > a2.Length ?
-                                       a2.Length : a1.Length;
-                               for (int i = 0; i < min; i++)
-                                       if (a1 [i] != a2 [i])
-                                               return a1 [i] - a2 [i];
-                               return a1.Length - a2.Length;
-                       }
-               }
-
-               internal class Level2Map
-               {
-                       public byte Source;
-                       public byte Replace;
-
-                       public Level2Map (byte source, byte replace)
-                       {
-                               Source = source;
-                               Replace = replace;
-                       }
-               }
-
-               internal class Level2MapComparer : IComparer
-               {
-                       public static readonly Level2MapComparer Instance =
-                               new Level2MapComparer ();
-
-                       public int Compare (object o1, object o2)
-                       {
-                               Level2Map m1 = (Level2Map) o1;
-                               Level2Map m2 = (Level2Map) o2;
-                               return (m1.Source - m2.Source);
-                       }
-               }
-
-               #endregion
-
                #region .ctor() and split functions
 
                public SimpleCollator (CultureInfo culture)
@@ -189,8 +94,11 @@ namespace Mono.Globalization.Unicode
                        textInfo = culture.TextInfo;
                        buf = new SortKeyBuffer (culture.LCID);
 
-                       SetCJKTable (culture, ref cjkTable, ref cjkIndexer,
-                               ref cjkLv2Table, ref cjkLv2Indexer);
+                       unsafe {
+                               SetCJKTable (culture, ref cjkIndexer,
+                                       ref cjkCatTable, ref cjkLv1Table,
+                                       ref cjkLv2Indexer, ref cjkLv2Table);
+                       }
 
                        // Get tailoring info
                        TailoringInfo t = null;
@@ -203,8 +111,14 @@ namespace Mono.Globalization.Unicode
                                t = Uni.GetTailoringInfo (127);
 
                        frenchSort = t.FrenchSort;
-                       BuildTailoringTables (culture, t, ref contractions,
+                       Uni.BuildTailoringTables (culture, t, ref contractions,
                                ref level2Maps);
+                       unsafeFlags = new bool [UnsafeFlagLength];
+                       foreach (Contraction c in contractions)
+                               if (c.Source.Length > 1)
+                                       foreach (char ch in c.Source)
+                                               unsafeFlags [(int) ch] = true;
+
                        // FIXME: Since tailorings are mostly for latin
                        // (and in some cases Cyrillic) characters, it would
                        // be much better for performance to store "start 
@@ -222,97 +136,15 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
 */
                }
 
-               private void BuildTailoringTables (CultureInfo culture,
-                       TailoringInfo t,
-                       ref Contraction [] contractions,
-                       ref Level2Map [] diacriticals)
-               {
-                       // collect tailoring entries.
-                       ArrayList cmaps = new ArrayList ();
-                       ArrayList dmaps = new ArrayList ();
-                       char [] tarr = Uni.TailoringValues;
-                       int idx = t.TailoringIndex;
-                       int end = idx + t.TailoringCount;
-                       while (idx < end) {
-                               int ss = idx + 1;
-                               char [] src = null;
-                               switch (tarr [idx]) {
-                               case '\x1': // SortKeyMap
-                                       idx++;
-                                       while (tarr [ss] != 0)
-                                               ss++;
-                                       src = new char [ss - idx];
-                                       Array.Copy (tarr, idx, src, 0, ss - idx);
-                                       byte [] sortkey = new byte [4];
-                                       for (int i = 0; i < 4; i++)
-                                               sortkey [i] = (byte) tarr [ss + 1 + i];
-                                       cmaps.Add (new Contraction (
-                                               src, null, sortkey));
-                                       // it ends with 0
-                                       idx = ss + 6;
-                                       break;
-                               case '\x2': // DiacriticalMap
-                                       dmaps.Add (new Level2Map (
-                                               (byte) tarr [idx + 1],
-                                               (byte) tarr [idx + 2]));
-                                       idx += 3;
-                                       break;
-                               case '\x3': // ReplacementMap
-                                       idx++;
-                                       while (tarr [ss] != 0)
-                                               ss++;
-                                       src = new char [ss - idx];
-                                       Array.Copy (tarr, idx, src, 0, ss - idx);
-                                       ss++;
-                                       int l = ss;
-                                       while (tarr [l] != 0)
-                                               l++;
-                                       string r = new string (tarr, ss, l - ss);
-                                       cmaps.Add (new Contraction (
-                                               src, r, null));
-                                       idx = l + 1;
-                                       break;
-                               default:
-                                       throw new NotImplementedException (String.Format ("Mono INTERNAL ERROR (Should not happen): Collation tailoring table is broken for culture {0} ({1}) at 0x{2:X}", culture.LCID, culture.Name, idx));
-                               }
-                       }
-                       cmaps.Sort (ContractionComparer.Instance);
-                       dmaps.Sort (Level2MapComparer.Instance);
-                       contractions = cmaps.ToArray (typeof (Contraction))
-                               as Contraction [];
-                       diacriticals = dmaps.ToArray (typeof (Level2Map))
-                               as Level2Map [];
-               }
-
-               private void SetCJKTable (CultureInfo culture,
-                       ref ushort [] cjkTable, ref CodePointIndexer cjkIndexer,
-                       ref byte [] cjkLv2Table, ref CodePointIndexer cjkLv2Indexer)
+               unsafe private void SetCJKTable (
+                       CultureInfo culture, ref CodePointIndexer cjkIndexer,
+                       ref byte* catTable, ref byte* lv1Table,
+                       ref CodePointIndexer lv2Indexer, ref byte* lv2Table)
                {
                        string name = GetNeutralCulture (culture).Name;
 
-                       Uni.FillCJK (name);
-
-                       // custom CJK table support.
-                       switch (name) {
-                       case "zh-CHS":
-                               cjkTable = Uni.CjkCHS;
-                               cjkIndexer = UUtil.CjkCHS;
-                               break;
-                       case "zh-CHT":
-                               cjkTable = Uni.CjkCHT;
-                               cjkIndexer = UUtil.Cjk;
-                               break;
-                       case "ja":
-                               cjkTable = Uni.CjkJA;
-                               cjkIndexer = UUtil.Cjk;
-                               break;
-                       case "ko":
-                               cjkTable = Uni.CjkKO;
-                               cjkLv2Table = Uni.CjkKOLv2;
-                               cjkIndexer = UUtil.Cjk;
-                               cjkLv2Indexer = UUtil.Cjk;
-                               break;
-                       }
+                       Uni.FillCJK (name, ref cjkIndexer, ref catTable,
+                               ref lv1Table, ref lv2Indexer, ref lv2Table);
                }
 
                static CultureInfo GetNeutralCulture (CultureInfo info)
@@ -325,27 +157,23 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
 
                #endregion
 
-               byte Category (int cp)
+               unsafe byte Category (int cp)
                {
-                       if (cp < 0x3000 || cjkTable == null)
-                               return categories [categoryIndexer.ToIndex (cp)];
+                       if (cp < 0x3000 || cjkCatTable == null)
+                               return Uni.Category (cp);
                        int idx = cjkIndexer.ToIndex (cp);
-                       ushort cjk = idx < 0 ? (ushort) 0 : cjkTable [idx];
-                       return cjk != 0 ? (byte) ((cjk & 0xFF00) >> 8) :
-                               categories [categoryIndexer.ToIndex (cp)];
+                       return idx < 0 ? Uni.Category (cp) : cjkCatTable [idx];
                }
 
-               byte Level1 (int cp)
+               unsafe byte Level1 (int cp)
                {
-                       if (cp < 0x3000 || cjkTable == null)
-                               return level1 [lv1Indexer.ToIndex (cp)];
+                       if (cp < 0x3000 || cjkLv1Table == null)
+                               return Uni.Level1 (cp);
                        int idx = cjkIndexer.ToIndex (cp);
-                       ushort cjk = idx < 0 ? (ushort) 0 : cjkTable [idx];
-                       return cjk != 0 ? (byte) (cjk & 0xFF) :
-                               level1 [lv1Indexer.ToIndex (cp)];
+                       return idx < 0 ? Uni.Level1 (cp) : cjkLv1Table [idx];
                }
 
-               byte Level2 (int cp, ExtenderType ext)
+               unsafe byte Level2 (int cp, ExtenderType ext)
                {
                        if (ext == ExtenderType.Buggy)
                                return 5;
@@ -353,12 +181,12 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                return 0;
 
                        if (cp < 0x3000 || cjkLv2Table == null)
-                               return level2 [lv2Indexer.ToIndex (cp)];
+                               return Uni.Level2 (cp);
                        int idx = cjkLv2Indexer.ToIndex (cp);
                        byte ret = idx < 0 ? (byte) 0 : cjkLv2Table [idx];
                        if (ret != 0)
                                return ret;
-                       ret = level2 [lv2Indexer.ToIndex (cp)];
+                       ret = Uni.Level2 (cp);
                        if (level2Maps.Length == 0)
                                return ret;
                        for (int i = 0; i < level2Maps.Length; i++) {
@@ -475,7 +303,7 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                int FilterOptions (int i)
                {
                        if (ignoreWidth) {
-                               int x = widthCompat [widthIndexer.ToIndex (i)];
+                               int x = Uni.ToWidthCompat (i);
                                if (x != 0)
                                        i = x;
                        }
@@ -507,6 +335,11 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                        // Windows also expects true for U+3031 and U+3032,
                        // but they should *never* repeat one character.
 
+                       // U+2015 becomes an extender only when it is Japanese
+                       if (i == 0x2015)
+                               return lcid == 16 ? ExtenderType.Conditional :
+                                       ExtenderType.None;
+
                        if (i < 0x3005 || i > 0xFF70)
                                return ExtenderType.None;
                        if (i >= 0xFE7C) {
@@ -584,6 +417,10 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                ignoreNonSpace && Uni.IsIgnorableNonSpacing (i);
                }
 
+               bool IsSafe (int i)
+               {
+                       return i >= unsafeFlags.Length ? true : !unsafeFlags [i];
+               }
 
                #region GetSortKey()
 
@@ -623,7 +460,7 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                                        b [0],
                                                        b [1],
                                                        b [2] != 1 ? b [2] : Level2 (i, ext),
-                                                       b [3] != 1 ? b [3] : level3 [lv3Indexer.ToIndex (i)]);
+                                                       b [3] != 1 ? b [3] : Uni.Level3 (i));
                                        }
                                        // otherwise do nothing.
                                        // (if the extender is the first char
@@ -645,7 +482,7 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                                        b [0],
                                                        b [1],
                                                        b [2] != 1 ? b [2] : Level2 (i, ext),
-                                                       b [3] != 1 ? b [3] : level3 [lv3Indexer.ToIndex (i)]);
+                                                       b [3] != 1 ? b [3] : Uni.Level3 (i));
                                                previousSortKey = b;
                                                previousChar = -1;
                                        }
@@ -761,6 +598,7 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                        public int Index;
                        public int Start;
                        public int End;
+                       public int Optional;
                }
 
                // Those instances are reused not to invoke instantiation
@@ -855,6 +693,9 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                        ExtenderType ext1 = ExtenderType.None;
                        ExtenderType ext2 = ExtenderType.None;
 
+                       int quickCheckPos1 = idx1;
+                       int quickCheckPos2 = idx2;
+
                        while (true) {
                                for (; idx1 < end1; idx1++)
                                        if (!IsIgnorable (s1 [idx1]))
@@ -870,6 +711,7 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                        start1 = escape1.Start;
                                        idx1 = escape1.Index;
                                        end1 = escape1.End;
+                                       quickCheckPos1 = escape1.Optional;
                                        escape1.Source = null;
                                        continue;
                                }
@@ -880,21 +722,48 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                        start2 = escape2.Start;
                                        idx2 = escape2.Index;
                                        end2 = escape2.End;
+                                       quickCheckPos2 = escape2.Optional;
                                        escape2.Source = null;
                                        continue;
                                }
-#if false
-// FIXME: optimization could be done here.
+#if true
+// If comparison is unstable, then this part is one of the most doubtful part.
+// Here we run quick codepoint comparison and run back to "stable" area to
+// compare characters.
+
+                               // Strictly to say, even the first character
+                               // could be compared here, but it messed
+                               // backward step, so I just avoided mess.
+                               if (quickCheckPos1 < idx1 && quickCheckPos2 < idx2) {
+                                       while (idx1 < end1 && idx2 < end2 &&
+                                               s1 [idx1] == s2 [idx2]) {
+                                               idx1++;
+                                               idx2++;
+                                       }
+                                       if (idx1 == end1 || idx2 == end2)
+                                               continue; // check replacement
 
-                               if (s1 [idx1] == s2 [idx2]) {
-                                       idx1++;
-                                       idx2++;
-                                       continue;
+                                       int backwardEnd1 = quickCheckPos1;
+                                       int backwardEnd2 = quickCheckPos2;
+                                       quickCheckPos1 = idx1;
+                                       quickCheckPos2 = idx2;
+
+                                       idx1--;
+                                       idx2--;
+
+                                       for (;idx1 > backwardEnd1; idx1--)
+                                               if (Category (s1 [idx1]) != 1)
+                                                       break;
+                                       for (;idx2 > backwardEnd2; idx2--)
+                                               if (Category (s2 [idx2]) != 1)
+                                                       break;
+                                       for (;idx1 > backwardEnd1; idx1--)
+                                               if (IsSafe (s1 [idx1]))
+                                                       break;
+                                       for (;idx2 > backwardEnd2; idx2--)
+                                               if (IsSafe (s2 [idx2]))
+                                                       break;
                                }
-//                             while (idx1 >= start1 && !IsSafe ((int) s [idx1]))
-//                                     idx1--;
-//                             while (idx2 >= start2 && !IsSafe ((int) s [idx2]))
-//                                     idx2--;
 #endif
 
                                int cur1 = idx1;
@@ -987,10 +856,12 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                                escape1.Start = start1;
                                                escape1.Index = cur1 + ct1.Source.Length;
                                                escape1.End = end1;
+                                               escape1.Optional = quickCheckPos1;
                                                s1 = ct1.Replacement;
                                                idx1 = 0;
                                                start1 = 0;
                                                end1 = s1.Length;
+                                               quickCheckPos1 = 0;
                                                continue;
                                        }
                                }
@@ -1028,10 +899,12 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                                escape2.Start = start2;
                                                escape2.Index = cur2 + ct2.Source.Length;
                                                escape2.End = end2;
+                                               escape2.Optional = quickCheckPos2;
                                                s2 = ct2.Replacement;
                                                idx2 = 0;
                                                start2 = 0;
                                                end2 = s2.Length;
+                                               quickCheckPos2 = 0;
                                                continue;
                                        }
                                }
@@ -1148,6 +1021,14 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                        ext2 = ExtenderType.None;
                                }
                        }
+                       if (currentLevel == 1 && finalResult != 0) {
+                               while (idx1 < end1)
+                                       if (Uni.IsIgnorableNonSpacing (s1 [idx1]))
+                                               idx1++;
+                               while (idx2 < end2)
+                                       if (Uni.IsIgnorableNonSpacing (s2 [idx2]))
+                                               idx2++;
+                       }
                        // we still have to handle level 5
                        if (finalResult == 0) {
                                finalResult = lv5At1 - lv5At2;
@@ -1163,6 +1044,11 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                        return idx1 != end1 ? 1 : idx2 == end2 ? finalResult : -1;
                }
 
+               int CompareFlagPair (bool b1, bool b2)
+               {
+                       return b1 == b2 ? 0 : b1 ? 1 : -1;
+               }
+
                #endregion
 
                #region IsPrefix() and IsSuffix()
@@ -1213,7 +1099,7 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                (opt & CompareOptions.StringSort) != 0);
                }
 
-               bool New_IsSuffix (string s, string t, int start, int length,
+               bool IsSuffix (string s, string t, int start, int length,
                        bool stringSort)
                {
                        int tstart = 0;
@@ -1223,199 +1109,81 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                        if (tstart == t.Length)
                                return true; // as if target is String.Empty.
 
-                       // FIXME: could be more efficient.
-                       bool consumed, dummy;
-                       for (int i = 0; i < length; i++) {
-                               int ret = CompareInternal (s, start - i, i + 1,
-                                       t, tstart, t.Length, stringSort,
-                                       out dummy, out consumed, true);
-                               if (ret == 0)
-                                       return true;
-                       }
-                       return false;
-               }
-
-               // FIXME: This code has a fatal problem that it cannot handle
-               // extenders :(
-               bool IsSuffix (string s, string t, int start, int length,
-                       bool stringSort)
-               {
+#if false
+// This is still not working. If it is not likely to get working, then just remove it.
                        int si = start;
+                       int send = start - length;
                        int ti = t.Length - 1;
-                       int end = start - length + 1;
-                       int tend = 0;
+                       int tend = -1;
 
+                       int sStep = start + 1;
+                       int tStep = t.Length;
+                       bool sourceConsumed, targetConsumed;
                        while (true) {
-                               if (ti < tend) {
-                                       if (escape2.Source == null)
+                               for (; send < si; si--)
+                                       if (!IsIgnorable (s [si]))
                                                break;
-                                       t = escape2.Source;
-                                       ti = escape2.Index;
-                                       tend = escape2.End;
-                                       escape2.Source = null;
-                                       continue;
-                               }
-                               if (IsIgnorable (t [ti])) {
-                                       ti--;
-                                       continue;
-                               }
-                               if (si < end) {
-                                       if (escape1.Source == null)
+                               for (; tend < ti; ti--)
+                                       if (!IsIgnorable (t [ti]))
                                                break;
-                                       s = escape1.Source;
-                                       si = escape1.Index;
-                                       end = escape1.End;
-                                       escape1.Source = null;
-                                       continue;
-                               }
-                               if (IsIgnorable (s [si])) {
-                                       si--;
-                                       continue;
-                               }
-
-                               ExtenderType ext1 = GetExtenderType (s [si]);
-                               ExtenderType ext2 = GetExtenderType (t [ti]);
-                               int vt = -1;
-                               byte [] sk = null;
-
-                               // Check contraction for target.
-                               Contraction ctt = null;
-                               if (ext2 != ExtenderType.None) {
-                                       if (previousChar2 < 0)
-                                               sk = previousSortKey2;
-                                       else
-                                               vt = FilterExtender (previousChar2, ext2);
-                               }
-                               else if (escape2.Source == null)
-                                       ctt = GetTailContraction (t, ti, 0);
-                               if (ctt != null) {
-                                       if (ctt.SortKey != null) {
-                                               ti -= ctt.Source.Length;
-                                               for (int i = 0; i < ctt.SortKey.Length; i++)
-                                                       charSortKey2 [i] = ctt.SortKey [i];
-                                               previousChar2 = -1;
-                                               previousSortKey2 = charSortKey2;
-                                       } else {
-                                               escape2.Source = t;
-                                               escape2.Index = ti - ctt.Source.Length;
-                                               escape2.End = tend;
-                                               t = ctt.Replacement;
-                                               ti = ctt.Replacement.Length - 1;
-                                               tend = 0;
-                                               continue;
-                                       }
-                               } else if (sk == null) {
-                                       if (vt < 0)
-                                               vt = FilterOptions (t [ti]);
-                                       sk = charSortKey2;
-                                       sk [0] = Category (vt);
-                                       sk [1] = Level1 (vt);
-                                       if (!ignoreNonSpace)
-                                               // FIXME: pass extender type
-                                               sk [2] = Level2 (vt, ext2);
-                                       sk [3] = Uni.Level3 (vt);
-                                       if (sk [1] != 1)
-                                               previousChar2 = vt;
-                                       ti--;
-                               }
-                               if (!MatchesBackward (ref s, ref si, ref end, vt, sk, !Uni.HasSpecialWeight ((char) vt)))
+                               if (tend == ti)
+                                       break;
+                               for (; send < si; si--)
+                                       if (IsSafe (s [si]))
+                                               break;
+                               for (; tend < ti; ti--)
+                                       if (IsSafe (t [ti]))
+                                               break;
+Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, length, t, ti, tstart, sStep - si, tStep - ti);
+                               if (CompareInternal (s, si, sStep - si,
+                                       t, ti, tStep - ti, stringSort,
+                                       out targetConsumed, out sourceConsumed,
+                                       true) != 0)
                                        return false;
-                       }
-                       if (si < end) {
-                               // All codepoints in the compared range
-                               // matches. In that case, what matters 
-                               // is whether the remaining part of 
-                               // "target" is ignorable or not.
-                               while (ti >= 0)
-                                       if (!IsIgnorable (t [ti--]))
-                                               return false;
-                               return true;
+                               if (send == si)
+                                       return false;
+                               sStep = si;
+                               tStep = ti;
+                               si--;
+                               ti--;
                        }
                        return true;
-               }
+#else
+                       // FIXME: it is not efficient for very long target.
+                       bool sourceConsumed, targetConsumed;
+                       int mismatchCount = 0;
+                       for (int i = 0; i < length; i++) {
+                               escape1.Source = escape2.Source = null;
+                               previousSortKey = previousSortKey2 = null;
+                               previousChar = previousChar2 = -1;
 
-               // WARNING: Don't invoke it outside IsSuffix().
-               // returns consumed source length (mostly 1, source length in case of contraction)
-               bool MatchesBackward (ref string s, ref int idx, ref int end, int it, byte [] sortkey, bool noLv4)
-               {
-                       Contraction ct = null;
-                       // If there is already expansion, then it should not
-                       // process further expansions.
-                       if (escape1.Source == null)
-                               ct = GetTailContraction (s, idx, end);
-                       if (ct != null) {
-                               if (ct.SortKey != null) {
-                                       if (!noLv4)
-                                               return false;
-                                       for (int i = 0; i < ct.SortKey.Length; i++)
-                                               if (sortkey [i] != ct.SortKey [i])
-                                                       return false;
-                                       idx -= ct.Source.Length;
+                               int ret = CompareInternal (s, start - i, i + 1,
+                                       t, tstart, t.Length - tstart,
+                                       stringSort, out targetConsumed,
+                                       out sourceConsumed, true);
+                               if (ret == 0)
                                        return true;
-                               } else {
-                                       escape1.Source = s;
-                                       escape1.Index = idx - ct.Source.Length;
-                                       escape1.End = end;
-                                       s = ct.Replacement;
-                                       idx = s.Length - 1;
-                                       end = 0;
-                                       return MatchesBackward (ref s, ref idx, ref end, it, sortkey, noLv4);
-                               }
-                       } else {
-                               // primitive comparison
-                               if (!MatchesPrimitive (s [idx], it, sortkey))
+                               if (!sourceConsumed && targetConsumed)
                                        return false;
-                               idx--;
-                               return true;
+                               if (!targetConsumed) {
+                                       mismatchCount++;
+                                       if (mismatchCount > Uni.MaxExpansionLength)
+                                               // The largest length of an
+                                               // expansion is 3, so if the
+                                               // target was not consumed more
+                                               // than 3 times, then the tail
+                                               // character does not match.
+                                               return false;
+                               }
                        }
-               }
-
-               // Common use methods 
-
-               // returns comparison result.
-               private bool MatchesPrimitive (char src, int ct, byte [] sortkey)
-               {
-                       // char-by-char comparison.
-                       int cs = FilterOptions (src);
-                       if (cs == ct)
-                               return true;
-                       // lv.1 to 3
-                       if (Category (cs) != sortkey [0] ||
-                               Level1 (cs) != sortkey [1] ||
-                               !ignoreNonSpace && Level2 (cs, ExtenderType.None) != sortkey [2] ||
-                               Uni.Level3 (cs) != sortkey [3])
-                               return false;
-                       // lv.4 (only when required). No need to check cj coz
-                       // there is no pair of characters that has the same
-                       // primary level and differs here.
-                       if (!Uni.HasSpecialWeight (src))
-                               return true;
-                       char target = (char) ct;
-                       return Uni.IsJapaneseSmallLetter (src) !=
-                               Uni.IsJapaneseSmallLetter (target) ||
-                               Uni.GetJapaneseDashType (src) !=
-                               Uni.GetJapaneseDashType (target) ||
-                               Uni.IsHiragana (src) !=
-                               Uni.IsHiragana (target) ||
-                               IsHalfKana (src) != IsHalfKana (target);
-               }
-
-               int CompareFlagPair (bool b1, bool b2)
-               {
-                       return b1 == b2 ? 0 : b1 ? 1 : -1;
+                       return false;
+#endif
                }
 
                #endregion
 
                #region IndexOf() / LastIndexOf()
 
-               // IndexOf (string, string, CompareOptions)
-               // IndexOf (string, string, int, int, CompareOptions)
-               // IndexOf (string, char, int, int, CompareOptions)
-               // IndexOfPrimitiveChar (string, int, int, char)
-               // IndexOfSortKey (string, int, int, byte[], char, int, bool)
-               // IndexOf (string, string, int, int)
-
                public int IndexOf (string s, string target, CompareOptions opt)
                {
                        return IndexOf (s, target, 0, s.Length, opt);
@@ -1445,21 +1213,18 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                                (opt & CompareOptions.StringSort) != 0);
                                else
                                        return IndexOfSortKey (s, start, length, ct.SortKey, char.MinValue, -1, true);
+                       } else {
+                               int ti = FilterOptions ((int) target);
+                               charSortKeyIndexTarget [0] = Category (ti);
+                               charSortKeyIndexTarget [1] = Level1 (ti);
+                               if (!ignoreNonSpace)
+                                       charSortKeyIndexTarget [2] =
+                                               Level2 (ti, ExtenderType.None);
+                               charSortKeyIndexTarget [3] = Uni.Level3 (ti);
+                               return IndexOfSortKey (s, start, length,
+                                       charSortKeyIndexTarget, target, ti,
+                                       !Uni.HasSpecialWeight ((char) ti));
                        }
-                       else
-                               return IndexOfPrimitiveChar (s, start, length, target);
-               }
-
-               // Searches target char w/o checking contractions
-               int IndexOfPrimitiveChar (string s, int start, int length, char target)
-               {
-                       int ti = FilterOptions ((int) target);
-                       charSortKeyIndexTarget [0] = Category (ti);
-                       charSortKeyIndexTarget [1] = Level1 (ti);
-                       if (!ignoreNonSpace)
-                               charSortKeyIndexTarget [2] = Level2 (ti, ExtenderType.None);
-                       charSortKeyIndexTarget [3] = Uni.Level3 (ti);
-                       return IndexOfSortKey (s, start, length, charSortKeyIndexTarget, target, ti, !Uni.HasSpecialWeight ((char) ti));
                }
 
                // Searches target byte[] keydata
@@ -1570,33 +1335,27 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                if (ct.Replacement != null)
                                        return LastIndexOf (s, ct.Replacement, start, length, (opt & CompareOptions.StringSort) != 0);
                                else
-                                       return LastIndexOfSortKey (s, start, length, ct.SortKey, char.MinValue, -1, true);
+                                       return LastIndexOfSortKey (s, start, start, length, ct.SortKey, char.MinValue, -1, true);
+                       }
+                       else {
+                               int ti = FilterOptions ((int) target);
+                               charSortKeyIndexTarget [0] = Category (ti);
+                               charSortKeyIndexTarget [1] = Level1 (ti);
+                               if (!ignoreNonSpace)
+                                       charSortKeyIndexTarget [2] = Level2 (ti, ExtenderType.None);
+                               charSortKeyIndexTarget [3] = Uni.Level3 (ti);
+                               return LastIndexOfSortKey (s, start, start, length, charSortKeyIndexTarget, target, ti, !Uni.HasSpecialWeight ((char) ti));
                        }
-                       else
-                               return LastIndexOfPrimitiveChar (s, start, length, target);
-               }
-
-               // Searches target char w/o checking contractions
-               int LastIndexOfPrimitiveChar (string s, int start, int length, char target)
-               {
-                       int ti = FilterOptions ((int) target);
-                       charSortKey [0] = Category (ti);
-                       charSortKey [1] = Level1 (ti);
-                       if (!ignoreNonSpace)
-                               // FIXME: pass ExtenderType
-                               charSortKey [2] = Level2 (ti, ExtenderType.None);
-                       charSortKey [3] = Uni.Level3 (ti);
-                       return LastIndexOfSortKey (s, start, length, charSortKey, target, ti, !Uni.HasSpecialWeight ((char) ti));
                }
 
                // Searches target byte[] keydata
-               int LastIndexOfSortKey (string s, int start, int length, byte [] sortkey, char target, int ti, bool noLv4)
+               int LastIndexOfSortKey (string s, int start, int orgStart, int length, byte [] sortkey, char target, int ti, bool noLv4)
                {
                        int end = start - length;
-
-                       for (int idx = start; idx > end; idx--) {
+                       int idx = start;
+                       while (idx > end) {
                                int cur = idx;
-                               if (Matches (s, ref idx, end, ti, target, sortkey, noLv4, true))
+                               if (MatchesBackward (s, ref idx, end, orgStart, ti, sortkey, noLv4))
                                        return cur;
                        }
                        return -1;
@@ -1614,20 +1373,47 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                        if (tidx == target.Length)
                                return start;
                        Contraction ct = GetContraction (target, tidx, target.Length - tidx);
-                       byte [] sortkey = ct != null ? ct.SortKey : null;
                        string replace = ct != null ? ct.Replacement : null;
+                       byte [] sk = replace == null ? charSortKeyIndexTarget : null;
+
+                       bool noLv4 = true;
+                       char tc = char.MinValue;
+                       int ti = -1;
+                       if (ct != null && sk != null) {
+                               for (int i = 0; i < ct.SortKey.Length; i++)
+                                       sk [i] = ct.SortKey [i];
+                       } else if (sk != null) {
+                               tc = target [tidx];
+                               ti = FilterOptions (target [tidx]);
+                               sk [0] = Category (ti);
+                               sk [1] = Level1 (ti);
+                               if (!ignoreNonSpace)
+                                       sk [2] = Level2 (ti, ExtenderType.None);
+                               sk [3] = Uni.Level3 (ti);
+                               noLv4 = !Uni.HasSpecialWeight ((char) ti);
+                       }
+                       if (sk != null) {
+                               for (tidx++; tidx < target.Length; tidx++) {
+                                       if (Category (target [tidx]) != 1)
+                                               break;
+                                       if (sk [2] == 0)
+                                               sk [2] = 2;
+                                       sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
+                               }
+                       }
 
                        do {
                                int idx = 0;
-                               if (sortkey != null)
-                                       idx = LastIndexOfSortKey (s, start, length, ct.SortKey, char.MinValue, -1, true);
-                               else if (replace != null)
+
+                               if (replace != null)
                                        idx = LastIndexOf (s, replace, start, length, stringSort);
                                else
-                                       idx = LastIndexOfPrimitiveChar (s, start, length, target [tidx]);
-
+                                       idx = LastIndexOfSortKey (s, start, orgStart, length, sk, tc, ti, noLv4);
                                if (idx < 0)
                                        return -1;
+                               length -= start - idx;
+                               start = idx;
+
                                if (IsPrefix (s, target, idx, orgStart - idx + 1, stringSort, false)) {
                                        for (;idx < orgStart; idx++)
                                                if (!IsIgnorable (s [idx]))
@@ -1646,62 +1432,6 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                        return -1;
                }
 
-               private bool Matches (string s, ref int idx, int end, int ti, char target, byte [] sortkey, bool noLv4, bool lastIndexOf)
-               {
-                       switch (char.GetUnicodeCategory (s [idx])) {
-                       case UnicodeCategory.PrivateUse:
-                       case UnicodeCategory.Surrogate:
-                               if (s [idx] != target)
-                                       return false;
-                               return true;
-                       }
-
-                       char sc = char.MinValue;
-                       Contraction ct = GetContraction (s, idx, end);
-                       // if lv4 exists, it never matches contraction
-                       if (ct != null && noLv4) {
-                               if (lastIndexOf)
-                                       idx -= ct.Source.Length - 1;
-                               else
-                                       idx += ct.Source.Length - 1;
-                               if (ct.SortKey != null) {
-                                       for (int i = 0; i < sortkey.Length; i++)
-                                               if (ct.SortKey [i] != sortkey [i])
-                                                       return false;
-                                       return true;
-                               }
-                               // Here is the core of LAMESPEC
-                               // described at the top of the source.
-                               sc = ct.Replacement [0];
-                       }
-                       else
-                               sc = s [idx];
-
-                       if (sc == target)
-                               return true;
-                       int si = FilterOptions ((int) sc);
-                       if (Category (si) != sortkey [0] ||
-                               Level1 (si) != sortkey [1] ||
-                               // FIXME: pass ExtenderType
-                               !ignoreNonSpace && Level2 (si, ExtenderType.None) != sortkey [2] ||
-                               Uni.Level3 (si) != sortkey [3])
-                               return false;
-                       if (noLv4 && !Uni.HasSpecialWeight ((char) si))
-                               return true;
-                       else if (noLv4)
-                               return false;
-                       if (Uni.IsJapaneseSmallLetter ((char) si) !=
-                               Uni.IsJapaneseSmallLetter ((char) ti) ||
-                               Uni.GetJapaneseDashType ((char) si) !=
-                               Uni.GetJapaneseDashType ((char) ti) ||
-                               !Uni.IsHiragana ((char) si) !=
-                               !Uni.IsHiragana ((char) ti) ||
-                               IsHalfKana ((char) si) !=
-                               IsHalfKana ((char) ti))
-                               return false;
-                       return true;
-               }
-
                private bool MatchesForward (string s, ref int idx, int end, int ti, byte [] sortkey, bool noLv4)
                {
                        int si = -1;
@@ -1709,8 +1439,13 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                        Contraction ct = null;
                        if (ext == ExtenderType.None)
                                ct = GetContraction (s, idx, end);
-                       else if (previousChar < 0)
+                       else if (previousChar < 0) {
+                               if (previousSortKey == null) {
+                                       idx++;
+                                       return false;
+                               }
                                charSortKey = previousSortKey;
+                       }
                        else
                                si = FilterExtender (previousChar, ext);
                        // if lv4 exists, it never matches contraction
@@ -1753,6 +1488,11 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                                + Level2 (s [idx], ExtenderType.None));
                        }
 
+                       return MatchesPrimitive (charSortKey, si, ext, sortkey, ti, noLv4);
+               }
+
+               private bool MatchesPrimitive (byte [] charSortKey, int si, ExtenderType ext, byte [] sortkey, int ti, bool noLv4)
+               {
                        if (charSortKey [0] != sortkey [0] ||
                                charSortKey [1] != sortkey [1] ||
                                (!ignoreNonSpace && charSortKey [2] != sortkey [2]) ||
@@ -1768,8 +1508,9 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                return false;
                        if (Uni.IsJapaneseSmallLetter ((char) si) !=
                                Uni.IsJapaneseSmallLetter ((char) ti) ||
-                               Uni.GetJapaneseDashType ((char) si) !=
-                               Uni.GetJapaneseDashType ((char) ti) ||
+                               ToDashTypeValue (ext) !=
+                               // FIXME: we will have to specify correct value for target
+                               ToDashTypeValue (ExtenderType.None) ||
                                !Uni.IsHiragana ((char) si) !=
                                !Uni.IsHiragana ((char) ti) ||
                                IsHalfKana ((char) si) !=
@@ -1777,6 +1518,89 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                return false;
                        return true;
                }
+
+               private bool MatchesBackward (string s, ref int idx, int end, int orgStart, int ti, byte [] sortkey, bool noLv4)
+               {
+                       int cur = idx;
+                       int si = -1;
+                       ExtenderType ext = GetExtenderType (s [idx]);
+                       // To handle extenders in source, we need to
+                       // check next _primary_ character.
+                       if (ext != ExtenderType.None) {
+                               byte diacritical = 0;
+                               for (int tmp = 0; ; tmp--) {
+                                       if (tmp < 0) // heading extender
+                                               return false;
+                                       if (IsIgnorable (s [tmp]))
+                                               continue;
+                                       int tmpi = FilterOptions (s [tmp]);
+                                       byte category = Category (tmpi);
+                                       if (category == 1) {
+                                               diacritical = Level2 (tmpi, ExtenderType.None);
+                                               continue;
+                                       }
+                                       si = FilterExtender (tmpi, ext);
+                                       charSortKey [0] = category;
+                                       charSortKey [1] = Level1 (si);
+                                       if (!ignoreNonSpace)
+                                               charSortKey [2] = Level2 (si, ext);
+                                       charSortKey [3] = Uni.Level3 (si);
+                                       if (ext != ExtenderType.Conditional &&
+                                               diacritical != 0)
+                                               charSortKey [2] =
+                                                       (charSortKey [2] == 0) ?
+                                                       (byte) (diacritical + 2) :
+                                                       diacritical;
+                                       break;
+                               }
+                               idx--;
+                       }
+                       Contraction ct = null;
+                       if (ext == ExtenderType.None)
+                               ct = GetContraction (s, idx, end);
+                       // if lv4 exists, it never matches contraction
+                       if (ct != null) {
+                               idx -= ct.Source.Length;
+                               if (!noLv4)
+                                       return false;
+                               if (ct.SortKey != null) {
+                                       for (int i = 0; i < sortkey.Length; i++)
+                                               charSortKey [i] = sortkey [i];
+                                       previousChar = -1;
+                                       previousSortKey = charSortKey;
+                               } else {
+                                       // Here is the core of LAMESPEC
+                                       // described at the top of the source.
+                                       int dummy = ct.Replacement.Length - 1;
+                                       return MatchesBackward (ct.Replacement, 
+                                               ref dummy, dummy, -1, ti, sortkey, noLv4);
+                               }
+                       } else if (ext == ExtenderType.None) {
+                               if (si < 0)
+                                       si = FilterOptions (s [idx]);
+                               charSortKey [0] = Category (si);
+                               charSortKey [1] = Level1 (si);
+                               if (!ignoreNonSpace)
+                                       charSortKey [2] = Level2 (si, ext);
+                               charSortKey [3] = Uni.Level3 (si);
+                               if (charSortKey [0] != 1)
+                                       previousChar = si;
+                               idx--;
+                       }
+                       if (ext == ExtenderType.None) {
+                               for (int tmp = cur + 1; tmp < orgStart; tmp++) {
+                                       if (Category (s [tmp]) != 1)
+                                               break;
+                                       if (ignoreNonSpace)
+                                               continue;
+                                       if (charSortKey [2] == 0)
+                                                       charSortKey [2] = 2;
+                                       charSortKey [2] = (byte) (charSortKey [2]
+                                               + Level2 (s [tmp], ExtenderType.None));
+                               }
+                       }
+                       return MatchesPrimitive (charSortKey, si, ext, sortkey, ti, noLv4);
+               }
                #endregion
        }
 }