2005-07-26 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / SimpleCollator.cs
index 85b165880aac793153070e3b5bd434d83de1ed55..cbe4d904eb17cdb8836bd2eb0ef914f839aff62b 100644 (file)
 // other character than the identical sequence (depending on CompareOptions
 // and of course, defined sortkey value itself), comparison cannot be done
 // at source char[] level.
-//
-// (to be continued.)
-//
-
 //
 // In IndexOf(), the first character in the target string or the target char
 // itself is turned into sortkey bytes. If the character has a contraction and
@@ -37,7 +33,6 @@
 // tries IsPrefix() from that location. If it returns true, then it returns
 // the index.
 //
-
 // LAMESPEC: IndexOf() is lame as a whole API. It never matches in the middle
 // of expansion and there is no proper way to return such indexes within
 // a single int return value.
@@ -62,30 +57,6 @@ namespace Mono.Globalization.Unicode
                static SimpleCollator invariant =
                        new SimpleCollator (CultureInfo.InvariantCulture);
 
-               internal static readonly byte [] ignorableFlags =
-                       Uni.ignorableFlags;
-               internal static readonly byte [] categories =
-                       Uni.categories;
-               internal static readonly byte [] level1 =
-                       Uni.level1;
-               internal static readonly byte [] level2 =
-                       Uni.level2;
-               internal static readonly byte [] level3 =
-                       Uni.level3;
-               internal static readonly ushort [] widthCompat =
-                       Uni.widthCompat;
-               internal static readonly CodePointIndexer categoryIndexer =
-                       UUtil.Category;
-               internal static readonly CodePointIndexer lv1Indexer =
-                       UUtil.Level1;
-               internal static readonly CodePointIndexer lv2Indexer =
-                       UUtil.Level2;
-               internal static readonly CodePointIndexer lv3Indexer =
-                       UUtil.Level3;
-               internal static readonly CodePointIndexer widthIndexer =
-                       UUtil.WidthCompat;
-
-
                SortKeyBuffer buf;
                // CompareOptions expanded.
                bool ignoreNonSpace; // used in IndexOf()
@@ -95,92 +66,25 @@ namespace Mono.Globalization.Unicode
                bool ignoreKanaType;
                TextInfo textInfo; // for ToLower().
                bool frenchSort;
-               readonly ushort [] cjkTable;
+               unsafe readonly byte* cjkCatTable;
+               unsafe readonly byte* cjkLv1Table;
                readonly CodePointIndexer cjkIndexer;
-               readonly byte [] cjkLv2Table;
+               unsafe readonly byte* cjkLv2Table;
                readonly CodePointIndexer cjkLv2Indexer;
                readonly int lcid;
                readonly Contraction [] contractions;
                readonly Level2Map [] level2Maps;
 
+               // This flag marks characters as "unsafe", where the character
+               // could be used as part of a contraction (whose length > 1).
+               readonly bool [] unsafeFlags;
+
+               const int UnsafeFlagLength = 0x300;
+
                // temporary sortkey buffer for index search/comparison
                byte [] charSortKey = new byte [4];
                byte [] charSortKey2 = new byte [4];
-               // temporary expansion store for IsPrefix/Suffix
-               int escapedSourceIndex;
-
-               #region Tailoring support classes
-               // Possible mapping types are:
-               //
-               //      - string to string (ReplacementMap)
-               //      - string to SortKey (SortKeyMap)
-               //      - diacritical byte to byte (DiacriticalMap)
-               //
-               // There could be mapping from string to sortkeys, but
-               // for now there is none as such.
-               //
-               internal class Contraction
-               {
-                       public readonly char [] Source;
-                       // only either of them is used.
-                       public readonly string Replacement;
-                       public readonly byte [] SortKey;
-
-                       public Contraction (char [] source,
-                               string replacement, byte [] sortkey)
-                       {
-                               Source = source;
-                               Replacement = replacement;
-                               SortKey = sortkey;
-                       }
-               }
-
-               internal class ContractionComparer : IComparer
-               {
-                       public static readonly ContractionComparer Instance =
-                               new ContractionComparer ();
-
-                       public int Compare (object o1, object o2)
-                       {
-                               Contraction c1 = (Contraction) o1;
-                               Contraction c2 = (Contraction) o2;
-                               char [] a1 = c1.Source;
-                               char [] a2 = c2.Source;
-                               int min = a1.Length > a2.Length ?
-                                       a2.Length : a1.Length;
-                               for (int i = 0; i < min; i++)
-                                       if (a1 [i] != a2 [i])
-                                               return a1 [i] - a2 [i];
-                               return a1.Length - a2.Length;
-                       }
-               }
-
-               internal class Level2Map
-               {
-                       public byte Source;
-                       public byte Replace;
-
-                       public Level2Map (byte source, byte replace)
-                       {
-                               Source = source;
-                               Replace = replace;
-                       }
-               }
-
-               internal class Level2MapComparer : IComparer
-               {
-                       public static readonly Level2MapComparer Instance =
-                               new Level2MapComparer ();
-
-                       public int Compare (object o1, object o2)
-                       {
-                               Level2Map m1 = (Level2Map) o1;
-                               Level2Map m2 = (Level2Map) o2;
-                               return (m1.Source - m2.Source);
-                       }
-               }
-
-               #endregion
+               byte [] charSortKeyIndexTarget = new byte [4];
 
                #region .ctor() and split functions
 
@@ -190,8 +94,11 @@ namespace Mono.Globalization.Unicode
                        textInfo = culture.TextInfo;
                        buf = new SortKeyBuffer (culture.LCID);
 
-                       SetCJKTable (culture, ref cjkTable, ref cjkIndexer,
-                               ref cjkLv2Table, ref cjkLv2Indexer);
+                       unsafe {
+                               SetCJKTable (culture, ref cjkIndexer,
+                                       ref cjkCatTable, ref cjkLv1Table,
+                                       ref cjkLv2Indexer, ref cjkLv2Table);
+                       }
 
                        // Get tailoring info
                        TailoringInfo t = null;
@@ -204,8 +111,14 @@ namespace Mono.Globalization.Unicode
                                t = Uni.GetTailoringInfo (127);
 
                        frenchSort = t.FrenchSort;
-                       BuildTailoringTables (culture, t, ref contractions,
+                       Uni.BuildTailoringTables (culture, t, ref contractions,
                                ref level2Maps);
+                       unsafeFlags = new bool [UnsafeFlagLength];
+                       foreach (Contraction c in contractions)
+                               if (c.Source.Length > 1)
+                                       foreach (char ch in c.Source)
+                                               unsafeFlags [(int) ch] = true;
+
                        // FIXME: Since tailorings are mostly for latin
                        // (and in some cases Cyrillic) characters, it would
                        // be much better for performance to store "start 
@@ -223,97 +136,15 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
 */
                }
 
-               private void BuildTailoringTables (CultureInfo culture,
-                       TailoringInfo t,
-                       ref Contraction [] contractions,
-                       ref Level2Map [] diacriticals)
-               {
-                       // collect tailoring entries.
-                       ArrayList cmaps = new ArrayList ();
-                       ArrayList dmaps = new ArrayList ();
-                       char [] tarr = Uni.TailoringValues;
-                       int idx = t.TailoringIndex;
-                       int end = idx + t.TailoringCount;
-                       while (idx < end) {
-                               int ss = idx + 1;
-                               char [] src = null;
-                               switch (tarr [idx]) {
-                               case '\x1': // SortKeyMap
-                                       idx++;
-                                       while (tarr [ss] != 0)
-                                               ss++;
-                                       src = new char [ss - idx];
-                                       Array.Copy (tarr, idx, src, 0, ss - idx);
-                                       byte [] sortkey = new byte [4];
-                                       for (int i = 0; i < 4; i++)
-                                               sortkey [i] = (byte) tarr [ss + 1 + i];
-                                       cmaps.Add (new Contraction (
-                                               src, null, sortkey));
-                                       // it ends with 0
-                                       idx = ss + 6;
-                                       break;
-                               case '\x2': // DiacriticalMap
-                                       dmaps.Add (new Level2Map (
-                                               (byte) tarr [idx + 1],
-                                               (byte) tarr [idx + 2]));
-                                       idx += 3;
-                                       break;
-                               case '\x3': // ReplacementMap
-                                       idx++;
-                                       while (tarr [ss] != 0)
-                                               ss++;
-                                       src = new char [ss - idx];
-                                       Array.Copy (tarr, idx, src, 0, ss - idx);
-                                       ss++;
-                                       int l = ss;
-                                       while (tarr [l] != 0)
-                                               l++;
-                                       string r = new string (tarr, ss, l - ss);
-                                       cmaps.Add (new Contraction (
-                                               src, r, null));
-                                       idx = l + 1;
-                                       break;
-                               default:
-                                       throw new NotImplementedException (String.Format ("Mono INTERNAL ERROR (Should not happen): Collation tailoring table is broken for culture {0} ({1}) at 0x{2:X}", culture.LCID, culture.Name, idx));
-                               }
-                       }
-                       cmaps.Sort (ContractionComparer.Instance);
-                       dmaps.Sort (Level2MapComparer.Instance);
-                       contractions = cmaps.ToArray (typeof (Contraction))
-                               as Contraction [];
-                       diacriticals = dmaps.ToArray (typeof (Level2Map))
-                               as Level2Map [];
-               }
-
-               private void SetCJKTable (CultureInfo culture,
-                       ref ushort [] cjkTable, ref CodePointIndexer cjkIndexer,
-                       ref byte [] cjkLv2Table, ref CodePointIndexer cjkLv2Indexer)
+               unsafe private void SetCJKTable (
+                       CultureInfo culture, ref CodePointIndexer cjkIndexer,
+                       ref byte* catTable, ref byte* lv1Table,
+                       ref CodePointIndexer lv2Indexer, ref byte* lv2Table)
                {
                        string name = GetNeutralCulture (culture).Name;
 
-                       Uni.FillCJK (name);
-
-                       // custom CJK table support.
-                       switch (name) {
-                       case "zh-CHS":
-                               cjkTable = Uni.CjkCHS;
-                               cjkIndexer = UUtil.CjkCHS;
-                               break;
-                       case "zh-CHT":
-                               cjkTable = Uni.CjkCHT;
-                               cjkIndexer = UUtil.Cjk;
-                               break;
-                       case "ja":
-                               cjkTable = Uni.CjkJA;
-                               cjkIndexer = UUtil.Cjk;
-                               break;
-                       case "ko":
-                               cjkTable = Uni.CjkKO;
-                               cjkLv2Table = Uni.CjkKOLv2;
-                               cjkIndexer = UUtil.Cjk;
-                               cjkLv2Indexer = UUtil.Cjk;
-                               break;
-                       }
+                       Uni.FillCJK (name, ref cjkIndexer, ref catTable,
+                               ref lv1Table, ref lv2Indexer, ref lv2Table);
                }
 
                static CultureInfo GetNeutralCulture (CultureInfo info)
@@ -326,27 +157,23 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
 
                #endregion
 
-               byte Category (int cp)
+               unsafe byte Category (int cp)
                {
-                       if (cp < 0x3000 || cjkTable == null)
-                               return categories [categoryIndexer.ToIndex (cp)];
+                       if (cp < 0x3000 || cjkCatTable == null)
+                               return Uni.Category (cp);
                        int idx = cjkIndexer.ToIndex (cp);
-                       ushort cjk = idx < 0 ? (ushort) 0 : cjkTable [idx];
-                       return cjk != 0 ? (byte) ((cjk & 0xFF00) >> 8) :
-                               categories [categoryIndexer.ToIndex (cp)];
+                       return idx < 0 ? Uni.Category (cp) : cjkCatTable [idx];
                }
 
-               byte Level1 (int cp)
+               unsafe byte Level1 (int cp)
                {
-                       if (cp < 0x3000 || cjkTable == null)
-                               return level1 [lv1Indexer.ToIndex (cp)];
+                       if (cp < 0x3000 || cjkLv1Table == null)
+                               return Uni.Level1 (cp);
                        int idx = cjkIndexer.ToIndex (cp);
-                       ushort cjk = idx < 0 ? (ushort) 0 : cjkTable [idx];
-                       return cjk != 0 ? (byte) (cjk & 0xFF) :
-                               level1 [lv1Indexer.ToIndex (cp)];
+                       return idx < 0 ? Uni.Level1 (cp) : cjkLv1Table [idx];
                }
 
-               byte Level2 (int cp, ExtenderType ext)
+               unsafe byte Level2 (int cp, ExtenderType ext)
                {
                        if (ext == ExtenderType.Buggy)
                                return 5;
@@ -354,12 +181,12 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                return 0;
 
                        if (cp < 0x3000 || cjkLv2Table == null)
-                               return level2 [lv2Indexer.ToIndex (cp)];
+                               return Uni.Level2 (cp);
                        int idx = cjkLv2Indexer.ToIndex (cp);
                        byte ret = idx < 0 ? (byte) 0 : cjkLv2Table [idx];
                        if (ret != 0)
                                return ret;
-                       ret = level2 [lv2Indexer.ToIndex (cp)];
+                       ret = Uni.Level2 (cp);
                        if (level2Maps.Length == 0)
                                return ret;
                        for (int i = 0; i < level2Maps.Length; i++) {
@@ -383,6 +210,9 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                        this.ignoreWidth = (options & CompareOptions.IgnoreWidth) != 0;
                        this.ignoreCase = (options & CompareOptions.IgnoreCase) != 0;
                        this.ignoreKanaType = (options & CompareOptions.IgnoreKanaType) != 0;
+                       previousChar = previousChar2 = -1;
+                       previousSortKey = previousSortKey2 = null;
+                       escape1.Source = escape2.Source = null;
                }
 
                Contraction GetContraction (string s, int start, int end)
@@ -473,7 +303,7 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                int FilterOptions (int i)
                {
                        if (ignoreWidth) {
-                               int x = widthCompat [widthIndexer.ToIndex (i)];
+                               int x = Uni.ToWidthCompat (i);
                                if (x != 0)
                                        i = x;
                        }
@@ -505,12 +335,25 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                        // Windows also expects true for U+3031 and U+3032,
                        // but they should *never* repeat one character.
 
+                       // U+2015 becomes an extender only when it is Japanese
+                       if (i == 0x2015)
+                               return lcid == 16 ? ExtenderType.Conditional :
+                                       ExtenderType.None;
+
                        if (i < 0x3005 || i > 0xFF70)
                                return ExtenderType.None;
-                       if (i == 0xFE7C || i == 0xFE7D)
-                               return ExtenderType.Simple;
-                       if (i == 0xFF70)
-                               return ExtenderType.Conditional;
+                       if (i >= 0xFE7C) {
+                               switch (i) {
+                               case 0xFE7C:
+                               case 0xFE7D:
+                                       return ExtenderType.Simple;
+                               case 0xFF70:
+                                       return ExtenderType.Conditional;
+                               case 0xFF9E:
+                               case 0xFF9F:
+                                       return ExtenderType.Voiced;
+                               }
+                       }
                        if (i > 0x30FE)
                                return ExtenderType.None;
                        switch (i) {
@@ -574,6 +417,10 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                ignoreNonSpace && Uni.IsIgnorableNonSpacing (i);
                }
 
+               bool IsSafe (int i)
+               {
+                       return i >= unsafeFlags.Length ? true : !unsafeFlags [i];
+               }
 
                #region GetSortKey()
 
@@ -593,7 +440,6 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
 
                        buf.Initialize (options, lcid, s, frenchSort);
                        int end = start + length;
-                       previousChar = -1;
                        GetSortKey (s, start, end);
                        return buf.GetResultAndReset ();
                }
@@ -614,7 +460,7 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                                        b [0],
                                                        b [1],
                                                        b [2] != 1 ? b [2] : Level2 (i, ext),
-                                                       b [3] != 1 ? b [3] : level3 [lv3Indexer.ToIndex (i)]);
+                                                       b [3] != 1 ? b [3] : Uni.Level3 (i));
                                        }
                                        // otherwise do nothing.
                                        // (if the extender is the first char
@@ -636,7 +482,7 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                                        b [0],
                                                        b [1],
                                                        b [2] != 1 ? b [2] : Level2 (i, ext),
-                                                       b [3] != 1 ? b [3] : level3 [lv3Indexer.ToIndex (i)]);
+                                                       b [3] != 1 ? b [3] : Uni.Level3 (i));
                                                previousSortKey = b;
                                                previousChar = -1;
                                        }
@@ -752,6 +598,7 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                        public int Index;
                        public int Start;
                        public int End;
+                       public int Optional;
                }
 
                // Those instances are reused not to invoke instantiation
@@ -800,25 +647,23 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                        return d1.Length == d2.Length ? 0 : d1.Length < d2.Length ? -1 : 1;
 #else
                        SetOptions (options);
-                       escape1.Source = null;
-                       escape2.Source = null;
-                       previousSortKey= previousSortKey2 = null;
-                       previousChar = previousChar2 = -1;
-                       bool dummy;
-                       int ret = CompareInternal (s1, idx1, len1, s2, idx2, len2, (options & CompareOptions.StringSort) != 0, out dummy);
+                       bool dummy, dummy2;
+                       int ret = CompareInternal (s1, idx1, len1, s2, idx2, len2, (options & CompareOptions.StringSort) != 0, out dummy, out dummy2, true);
                        return ret == 0 ? 0 : ret < 0 ? -1 : 1;
 #endif
                }
 
                int CompareInternal (string s1, int idx1, int len1, string s2,
                        int idx2, int len2, bool stringSort,
-                       out bool targetConsumed)
+                       out bool targetConsumed, out bool sourceConsumed,
+                       bool skipHeadingExtenders)
                {
                        int start1 = idx1;
                        int start2 = idx2;
                        int end1 = idx1 + len1;
                        int end2 = idx2 + len2;
                        targetConsumed = false;
+                       sourceConsumed = false;
 
                        // It holds final result that comes from the comparison
                        // at level 2 or lower. Even if Compare() found the
@@ -836,16 +681,21 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                        int lv5Value2 = 0;
 
                        // Skip heading extenders
-                       for (; idx1 < end1; idx1++)
-                               if (GetExtenderType (s1 [idx1]) == ExtenderType.None)
-                                       break;
-                       for (; idx2 < end2; idx2++)
-                               if (GetExtenderType (s2 [idx2]) == ExtenderType.None)
-                                       break;
+                       if (skipHeadingExtenders) {
+                               for (; idx1 < end1; idx1++)
+                                       if (GetExtenderType (s1 [idx1]) == ExtenderType.None)
+                                               break;
+                               for (; idx2 < end2; idx2++)
+                                       if (GetExtenderType (s2 [idx2]) == ExtenderType.None)
+                                               break;
+                       }
 
                        ExtenderType ext1 = ExtenderType.None;
                        ExtenderType ext2 = ExtenderType.None;
 
+                       int quickCheckPos1 = idx1;
+                       int quickCheckPos2 = idx2;
+
                        while (true) {
                                for (; idx1 < end1; idx1++)
                                        if (!IsIgnorable (s1 [idx1]))
@@ -861,6 +711,7 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                        start1 = escape1.Start;
                                        idx1 = escape1.Index;
                                        end1 = escape1.End;
+                                       quickCheckPos1 = escape1.Optional;
                                        escape1.Source = null;
                                        continue;
                                }
@@ -871,21 +722,48 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                        start2 = escape2.Start;
                                        idx2 = escape2.Index;
                                        end2 = escape2.End;
+                                       quickCheckPos2 = escape2.Optional;
                                        escape2.Source = null;
                                        continue;
                                }
-#if false
-// FIXME: optimization could be done here.
+#if true
+// If comparison is unstable, then this part is one of the most doubtful part.
+// Here we run quick codepoint comparison and run back to "stable" area to
+// compare characters.
+
+                               // Strictly to say, even the first character
+                               // could be compared here, but it messed
+                               // backward step, so I just avoided mess.
+                               if (quickCheckPos1 < idx1 && quickCheckPos2 < idx2) {
+                                       while (idx1 < end1 && idx2 < end2 &&
+                                               s1 [idx1] == s2 [idx2]) {
+                                               idx1++;
+                                               idx2++;
+                                       }
+                                       if (idx1 == end1 || idx2 == end2)
+                                               continue; // check replacement
 
-                               if (s1 [idx1] == s2 [idx2]) {
-                                       idx1++;
-                                       idx2++;
-                                       continue;
+                                       int backwardEnd1 = quickCheckPos1;
+                                       int backwardEnd2 = quickCheckPos2;
+                                       quickCheckPos1 = idx1;
+                                       quickCheckPos2 = idx2;
+
+                                       idx1--;
+                                       idx2--;
+
+                                       for (;idx1 > backwardEnd1; idx1--)
+                                               if (Category (s1 [idx1]) != 1)
+                                                       break;
+                                       for (;idx2 > backwardEnd2; idx2--)
+                                               if (Category (s2 [idx2]) != 1)
+                                                       break;
+                                       for (;idx1 > backwardEnd1; idx1--)
+                                               if (IsSafe (s1 [idx1]))
+                                                       break;
+                                       for (;idx2 > backwardEnd2; idx2--)
+                                               if (IsSafe (s2 [idx2]))
+                                                       break;
                                }
-//                             while (idx1 >= start1 && !IsSafe ((int) s [idx1]))
-//                                     idx1--;
-//                             while (idx2 >= start2 && !IsSafe ((int) s [idx2]))
-//                                     idx2--;
 #endif
 
                                int cur1 = idx1;
@@ -978,10 +856,12 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                                escape1.Start = start1;
                                                escape1.Index = cur1 + ct1.Source.Length;
                                                escape1.End = end1;
+                                               escape1.Optional = quickCheckPos1;
                                                s1 = ct1.Replacement;
                                                idx1 = 0;
                                                start1 = 0;
                                                end1 = s1.Length;
+                                               quickCheckPos1 = 0;
                                                continue;
                                        }
                                }
@@ -1019,10 +899,12 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                                escape2.Start = start2;
                                                escape2.Index = cur2 + ct2.Source.Length;
                                                escape2.End = end2;
+                                               escape2.Optional = quickCheckPos2;
                                                s2 = ct2.Replacement;
                                                idx2 = 0;
                                                start2 = 0;
                                                end2 = s2.Length;
+                                               quickCheckPos2 = 0;
                                                continue;
                                        }
                                }
@@ -1139,17 +1021,34 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                        ext2 = ExtenderType.None;
                                }
                        }
+                       if (currentLevel == 1 && finalResult != 0) {
+                               while (idx1 < end1)
+                                       if (Uni.IsIgnorableNonSpacing (s1 [idx1]))
+                                               idx1++;
+                               while (idx2 < end2)
+                                       if (Uni.IsIgnorableNonSpacing (s2 [idx2]))
+                                               idx2++;
+                       }
                        // we still have to handle level 5
                        if (finalResult == 0) {
                                finalResult = lv5At1 - lv5At2;
                                if (finalResult == 0)
                                        finalResult = lv5Value1 - lv5Value2;
                        }
-                       if (finalResult == 0 && idx2 == end2)
-                               targetConsumed = true;
+                       if (finalResult == 0) {
+                               if (idx2 == end2)
+                                       targetConsumed = true;
+                               if (idx1 == end1)
+                                       sourceConsumed = true;
+                       }
                        return idx1 != end1 ? 1 : idx2 == end2 ? finalResult : -1;
                }
 
+               int CompareFlagPair (bool b1, bool b2)
+               {
+                       return b1 == b2 ? 0 : b1 ? 1 : -1;
+               }
+
                #endregion
 
                #region IsPrefix() and IsSuffix()
@@ -1163,164 +1062,18 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                {
                        SetOptions (opt);
                        return IsPrefix (s, target, start, length, 
-                               (opt & CompareOptions.StringSort) != 0);
+                               (opt & CompareOptions.StringSort) != 0, true);
                }
 
-               public bool IsPrefix (string s, string target, int start, int length, bool stringSort)
+               public bool IsPrefix (string s, string target, int start, int length, bool stringSort, bool skipHeadingExtenders)
                {
-                       bool consumed;
-                       escape1.Source = null;
-                       escape2.Source = null;
-                       previousSortKey= previousSortKey2 = null;
-                       previousChar = previousChar2 = -1;
+                       bool consumed, dummy;
                        int ret = CompareInternal (s, start, length,
                                target, 0, target.Length, stringSort,
-                               out consumed);
+                               out consumed, out dummy, skipHeadingExtenders);
                        return consumed;
                }
 
-/*
-               public bool IsPrefix (string s, string target, int start, int length, CompareOptions opt)
-               {
-                       SetOptions (opt);
-                       return IsPrefix (s, target, start, length);
-               }
-
-               // returns the consumed length in positive number, or -1 if
-               // target was not a prefix.
-               bool IsPrefix (string s, string target, int start, int length)
-               {
-                       // quick check : simple codepoint comparison
-                       if (s.Length >= target.Length) {
-                               int si = start;
-                               for (int i = 0; si < length && i < target.Length; i++, si++)
-                                       if (s [si] != target [i])
-                                               break;
-                               if (si == start + target.Length)
-                                       return true;
-                       }
-
-                       escapedSourceIndex = -1;
-                       return IsPrefixInternal (s, target, start, length);
-               }
-
-               bool IsPrefixInternal (string s, string target, int start, int length)
-               {
-                       int si = start;
-                       int end = start + length;
-                       int ti = 0;
-                       string source = s;
-
-                       while (ti < target.Length) {
-                               if (IsIgnorable (target [ti])) {
-                                       ti++;
-                                       continue;
-                               }
-                               if (si >= end) {
-                                       if (s == source)
-                                               break;
-                                       s = source;
-                                       si = escapedSourceIndex;
-                                       end = start + length;
-                                       escapedSourceIndex = -1;
-                                       continue;
-                               }
-                               if (IsIgnorable (s [si])) {
-                                       si++;
-                                       continue;
-                               }
-
-                               // Check contraction for target.
-                               Contraction ctt = GetContraction (target, ti, target.Length);
-                               if (ctt != null) {
-                                       ti += ctt.Source.Length;
-                                       if (ctt.SortKey != null) {
-                                               int ret = GetMatchLength (ref s, ref si, ref end, -1, ctt.SortKey, true);
-                                               if (ret < 0)
-                                                       return false;
-                                               si += ret;
-                                       } else {
-                                               string r = ctt.Replacement;
-                                               int i = 0;
-                                               while (i < r.Length && si < end) {
-                                                       int ret = GetMatchLength (ref s, ref si, ref end, r [i]);
-                                                       if (ret < 0)
-                                                               return false;
-                                                       si += ret;
-                                                       i++;
-                                               }
-                                               if (i < r.Length && si >= end)
-                                                       return false;
-                                       }
-                               }
-                               else {
-                                       int ret = GetMatchLength (ref s, ref si, ref end, target [ti]);
-                                       if (ret < 0)
-                                               return false;
-                                       si += ret;
-                                       ti++;
-                               }
-                       }
-                       if (si == end) {
-                               // All codepoints in the compared range
-                               // matches. In that case, what matters 
-                               // is whether the remaining part of 
-                               // "target" is ignorable or not.
-                               while (ti < target.Length)
-                                       if (!IsIgnorable (target [ti++]))
-                                               return false;
-                               return true;
-                       }
-                       return true;
-               }
-*/
-
-               // WARNING: Don't invoke it outside IsPrefix().
-               int GetMatchLength (ref string s, ref int idx, ref int end, char target)
-               {
-                       int it = FilterOptions ((int) target);
-                       charSortKey [0] = Category (it);
-                       charSortKey [1] = Level1 (it);
-                       if (!ignoreNonSpace)
-                               // FIXME: pass ExtenderType
-                               charSortKey [2] = Level2 (it, ExtenderType.None);
-                       charSortKey [3] = Uni.Level3 (it);
-
-                       return GetMatchLength (ref s, ref idx, ref end, it, charSortKey, !Uni.HasSpecialWeight ((char) it));
-               }
-
-               // WARNING: Don't invoke it outside IsPrefix().
-               // returns consumed source length (mostly 1, source length in case of contraction)
-               int GetMatchLength (ref string s, ref int idx, ref int end, int it, byte [] sortkey, bool noLv4)
-               {
-                       Contraction ct = null;
-                       // If there is already expansion, then it should not
-                       // process further expansions.
-                       if (escapedSourceIndex < 0)
-                               ct = GetContraction (s, idx, end);
-                       if (ct != null) {
-                               if (ct.SortKey != null) {
-                                       if (!noLv4)
-                                               return -1;
-                                       for (int i = 0; i < ct.SortKey.Length; i++)
-                                               if (sortkey [i] != ct.SortKey [i])
-                                                       return -1;
-                                       return ct.Source.Length;
-                               } else {
-                                       escapedSourceIndex = idx + ct.Source.Length;
-                                       s = ct.Replacement;
-                                       idx = 0;
-                                       end = s.Length;
-                                       return GetMatchLength (ref s, ref idx, ref end, it, sortkey, noLv4);
-                               }
-                       } else {
-                               // primitive comparison
-                               if (Compare (s [idx], it, sortkey) != 0)
-                                       return -1;
-                               return 1;
-                       }
-               }
-
                // IsSuffix()
 
                public bool IsSuffix (string src, string target, CompareOptions opt)
@@ -1330,8 +1083,6 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
 
                public bool IsSuffix (string s, string target, int start, int length, CompareOptions opt)
                {
-                       SetOptions (opt);
-
                        // quick check : simple codepoint comparison
                        if (s.Length >= target.Length) {
                                int si = start;
@@ -1343,190 +1094,96 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                                        return true;
                        }
 
-                       escapedSourceIndex = -1;
-                       return IsSuffix (s, target, start, length);
+                       SetOptions (opt);
+                       return IsSuffix (s, target, start, length,
+                               (opt & CompareOptions.StringSort) != 0);
                }
 
-               bool IsSuffix (string s, string target, int start, int length)
+               bool IsSuffix (string s, string t, int start, int length,
+                       bool stringSort)
                {
+                       int tstart = 0;
+                       for (;tstart < t.Length; tstart++)
+                               if (!IsIgnorable (t [tstart]))
+                                       break;
+                       if (tstart == t.Length)
+                               return true; // as if target is String.Empty.
+
+#if false
+// This is still not working. If it is not likely to get working, then just remove it.
                        int si = start;
-                       int ti = target.Length - 1;
-                       string source = s;
-                       int end = start - length + 1;
+                       int send = start - length;
+                       int ti = t.Length - 1;
+                       int tend = -1;
 
-                       while (ti >= 0) {
-                               if (IsIgnorable (target [ti])) {
-                                       ti--;
-                                       continue;
-                               }
-                               if (si < 0) {
-                                       if (s == source)
+                       int sStep = start + 1;
+                       int tStep = t.Length;
+                       bool sourceConsumed, targetConsumed;
+                       while (true) {
+                               for (; send < si; si--)
+                                       if (!IsIgnorable (s [si]))
                                                break;
-                                       s = source;
-                                       si = escapedSourceIndex;
-                                       escapedSourceIndex = -1;
-                                       continue;
-                               }
-                               if (IsIgnorable (s [si])) {
-                                       si--;
-                                       continue;
-                               }
-
-                               // Check contraction for target.
-                               Contraction ctt = GetTailContraction (target, ti, 0);
-                               if (ctt != null) {
-                                       ti -= ctt.Source.Length;
-                                       if (ctt.SortKey != null) {
-                                               int ret = GetMatchLengthBack (ref s, ref si, ref end, -1, ctt.SortKey, true);
-                                               if (ret < 0)
-                                                       return false;
-                                               si -= ret;
-                                       } else {
-                                               string r = ctt.Replacement;
-                                               int i = r.Length - 1;
-                                               while (i >= 0 && si >= end) {
-                                                       int ret = GetMatchLengthBack (ref s, ref si, ref end, r [i]);
-                                                       if (ret < 0)
-                                                               return false;
-                                                       si -= ret;
-                                                       i--;
-                                               }
-                                               if (i >= 0 && si < end)
-                                                       return false;
-                                       }
-                               }
-                               else {
-                                       int ret = GetMatchLengthBack (ref s, ref si, ref end, target [ti]);
-                                       if (ret < 0)
-                                               return false;
-                                       si -= ret;
-                                       ti--;
-                               }
-                       }
-                       if (si < end) {
-                               // All codepoints in the compared range
-                               // matches. In that case, what matters 
-                               // is whether the remaining part of 
-                               // "target" is ignorable or not.
-                               while (ti >= 0)
-                                       if (!IsIgnorable (target [ti--]))
-                                               return false;
-                               return true;
+                               for (; tend < ti; ti--)
+                                       if (!IsIgnorable (t [ti]))
+                                               break;
+                               if (tend == ti)
+                                       break;
+                               for (; send < si; si--)
+                                       if (IsSafe (s [si]))
+                                               break;
+                               for (; tend < ti; ti--)
+                                       if (IsSafe (t [ti]))
+                                               break;
+Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, length, t, ti, tstart, sStep - si, tStep - ti);
+                               if (CompareInternal (s, si, sStep - si,
+                                       t, ti, tStep - ti, stringSort,
+                                       out targetConsumed, out sourceConsumed,
+                                       true) != 0)
+                                       return false;
+                               if (send == si)
+                                       return false;
+                               sStep = si;
+                               tStep = ti;
+                               si--;
+                               ti--;
                        }
                        return true;
-               }
-
-               // WARNING: Don't invoke it outside IsSuffix().
-               int GetMatchLengthBack (ref string s, ref int idx, ref int end, char target)
-               {
-                       int it = FilterOptions ((int) target);
-                       charSortKey [0] = Category (it);
-                       charSortKey [1] = Level1 (it);
-                       if (!ignoreNonSpace)
-                               // FIXME: pass extender type
-                               charSortKey [2] = Level2 (it, ExtenderType.None);
-                       charSortKey [3] = Uni.Level3 (it);
-
-                       return GetMatchLengthBack (ref s, ref idx, ref end, it, charSortKey, !Uni.HasSpecialWeight ((char) it));
-               }
-
-               // WARNING: Don't invoke it outside IsSuffix().
-               // returns consumed source length (mostly 1, source length in case of contraction)
-               int GetMatchLengthBack (ref string s, ref int idx, ref int end, int it, byte [] sortkey, bool noLv4)
-               {
-                       Contraction ct = null;
-                       // If there is already expansion, then it should not
-                       // process further expansions.
-                       if (escapedSourceIndex < 0)
-                               ct = GetTailContraction (s, idx, end);
-                       if (ct != null) {
-                               if (ct.SortKey != null) {
-                                       if (!noLv4)
-                                               return -1;
-                                       for (int i = 0; i < ct.SortKey.Length; i++)
-                                               if (sortkey [i] != ct.SortKey [i])
-                                                       return -1;
-                                       return ct.Source.Length;
-                               } else {
-                                       escapedSourceIndex = idx - ct.Source.Length;
-                                       s = ct.Replacement;
-                                       idx = s.Length - 1;
-                                       end = 0;
-                                       return GetMatchLength (ref s, ref idx, ref end, it, sortkey, noLv4);
+#else
+                       // FIXME: it is not efficient for very long target.
+                       bool sourceConsumed, targetConsumed;
+                       int mismatchCount = 0;
+                       for (int i = 0; i < length; i++) {
+                               escape1.Source = escape2.Source = null;
+                               previousSortKey = previousSortKey2 = null;
+                               previousChar = previousChar2 = -1;
+
+                               int ret = CompareInternal (s, start - i, i + 1,
+                                       t, tstart, t.Length - tstart,
+                                       stringSort, out targetConsumed,
+                                       out sourceConsumed, true);
+                               if (ret == 0)
+                                       return true;
+                               if (!sourceConsumed && targetConsumed)
+                                       return false;
+                               if (!targetConsumed) {
+                                       mismatchCount++;
+                                       if (mismatchCount > Uni.MaxExpansionLength)
+                                               // The largest length of an
+                                               // expansion is 3, so if the
+                                               // target was not consumed more
+                                               // than 3 times, then the tail
+                                               // character does not match.
+                                               return false;
                                }
-                       } else {
-                               // primitive comparison
-                               if (Compare (s [idx], it, sortkey) != 0)
-                                       return -1;
-                               return 1;
                        }
-               }
-
-               // Common use methods 
-
-               // returns comparison result.
-               private int Compare (char src, int ct, byte [] sortkey)
-               {
-                       // char-by-char comparison.
-                       int cs = FilterOptions (src);
-                       if (cs == ct)
-                               return 0;
-                       // lv.1 to 3
-                       int ret = Category (cs) - Category (ct);
-                       if (ret != 0)
-                               return ret;
-                       ret = Level1 (cs) - Level1 (ct);
-                       if (ret != 0)
-                               return ret;
-                       if (!ignoreNonSpace) {
-                               // FIXME: pass ExtenderType
-                               ret = Level2 (cs, ExtenderType.None) - Level2 (ct, ExtenderType.None);
-                               if (ret != 0)
-                                       return ret;
-                       }
-                       ret = Uni.Level3 (cs) - Uni.Level3 (ct);
-                       if (ret != 0)
-                               return ret;
-                       // lv.4 (only when required). No need to check cj coz
-                       // there is no pair of characters that has the same
-                       // primary level and differs here.
-                       if (!Uni.HasSpecialWeight (src))
-                               return 0;
-                       char target = (char) ct;
-                       ret = CompareFlagPair (
-                               !Uni.IsJapaneseSmallLetter (src),
-                               !Uni.IsJapaneseSmallLetter (target));
-                       if (ret != 0)
-                               return ret;
-                       ret = Uni.GetJapaneseDashType (src) -
-                               Uni.GetJapaneseDashType (target);
-                       if (ret != 0)
-                               return ret;
-                       ret = CompareFlagPair (Uni.IsHiragana (src),
-                               Uni.IsHiragana (target));
-                       if (ret != 0)
-                               return ret;
-                       ret = CompareFlagPair (!IsHalfKana (src),
-                               !IsHalfKana (target));
-                       return ret;
-               }
-
-               int CompareFlagPair (bool b1, bool b2)
-               {
-                       return b1 == b2 ? 0 : b1 ? 1 : -1;
+                       return false;
+#endif
                }
 
                #endregion
 
                #region IndexOf() / LastIndexOf()
 
-               // IndexOf (string, string, CompareOptions)
-               // IndexOf (string, string, int, int, CompareOptions)
-               // IndexOf (string, char, int, int, CompareOptions)
-               // IndexOfPrimitiveChar (string, int, int, char)
-               // IndexOfSortKey (string, int, int, byte[], char, int, bool)
-               // IndexOf (string, string, int, int)
-
                public int IndexOf (string s, string target, CompareOptions opt)
                {
                        return IndexOf (s, target, 0, s.Length, opt);
@@ -1552,34 +1209,32 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                        Contraction ct = GetContraction (target);
                        if (ct != null) {
                                if (ct.Replacement != null)
-                                       return IndexOfPrimitiveChar (s, start, length, ct.Replacement [0]);
+                                       return IndexOf (s, ct.Replacement, start, length,
+                                               (opt & CompareOptions.StringSort) != 0);
                                else
                                        return IndexOfSortKey (s, start, length, ct.SortKey, char.MinValue, -1, true);
+                       } else {
+                               int ti = FilterOptions ((int) target);
+                               charSortKeyIndexTarget [0] = Category (ti);
+                               charSortKeyIndexTarget [1] = Level1 (ti);
+                               if (!ignoreNonSpace)
+                                       charSortKeyIndexTarget [2] =
+                                               Level2 (ti, ExtenderType.None);
+                               charSortKeyIndexTarget [3] = Uni.Level3 (ti);
+                               return IndexOfSortKey (s, start, length,
+                                       charSortKeyIndexTarget, target, ti,
+                                       !Uni.HasSpecialWeight ((char) ti));
                        }
-                       else
-                               return IndexOfPrimitiveChar (s, start, length, target);
-               }
-
-               // Searches target char w/o checking contractions
-               int IndexOfPrimitiveChar (string s, int start, int length, char target)
-               {
-                       int ti = FilterOptions ((int) target);
-                       charSortKey [0] = Category (ti);
-                       charSortKey [1] = Level1 (ti);
-                       if (!ignoreNonSpace)
-                               // FIXME: pass ExtenderType
-                               charSortKey [2] = Level2 (ti, ExtenderType.None);
-                       charSortKey [3] = Uni.Level3 (ti);
-                       return IndexOfSortKey (s, start, length, charSortKey, target, ti, !Uni.HasSpecialWeight ((char) ti));
                }
 
                // Searches target byte[] keydata
                int IndexOfSortKey (string s, int start, int length, byte [] sortkey, char target, int ti, bool noLv4)
                {
                        int end = start + length;
-                       for (int idx = start; idx < end; idx++) {
+                       int idx = start;
+                       while (idx < end) {
                                int cur = idx;
-                               if (Matches (s, ref idx, end, ti, target, sortkey, noLv4, false))
+                               if (MatchesForward (s, ref idx, end, ti, sortkey, noLv4))
                                        return cur;
                        }
                        return -1;
@@ -1596,21 +1251,47 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                        if (tidx == target.Length)
                                return start;
                        Contraction ct = GetContraction (target, tidx, target.Length - tidx);
-                       byte [] sortkey = ct != null ? ct.SortKey : null;
                        string replace = ct != null ? ct.Replacement : null;
+                       byte [] sk = replace == null ? charSortKeyIndexTarget : null;
+                       bool noLv4 = true;
+                       char tc = char.MinValue;
+                       int ti = -1;
+                       if (ct != null && sk != null) {
+                               for (int i = 0; i < ct.SortKey.Length; i++)
+                                       sk [i] = ct.SortKey [i];
+                       } else if (sk != null) {
+                               tc = target [tidx];
+                               ti = FilterOptions (target [tidx]);
+                               sk [0] = Category (ti);
+                               sk [1] = Level1 (ti);
+                               if (!ignoreNonSpace)
+                                       sk [2] = Level2 (ti, ExtenderType.None);
+                               sk [3] = Uni.Level3 (ti);
+                               noLv4 = !Uni.HasSpecialWeight ((char) ti);
+                       }
+                       if (sk != null) {
+                               for (tidx++; tidx < target.Length; tidx++) {
+                                       if (Category (target [tidx]) != 1)
+                                               break;
+                                       if (sk [2] == 0)
+                                               sk [2] = 2;
+                                       sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
+                               }
+                       }
+
                        do {
                                int idx = 0;
-                               if (sortkey != null)
-                                       idx = IndexOfSortKey (s, start, length, ct.SortKey, char.MinValue, -1, true);
-                               else if (replace != null)
+                               if (replace != null)
                                        idx = IndexOf (s, replace, start, length, stringSort);
                                else
-                                       idx = IndexOfPrimitiveChar (s, start, length, target [tidx]);
+                                       idx = IndexOfSortKey (s, start, length, sk, tc, ti, noLv4);
                                if (idx < 0)
                                        return -1;
-                               if (IsPrefix (s, target, idx, length - (idx - start), stringSort))
+                               length -= idx - start;
+                               start = idx;
+                               if (IsPrefix (s, target, start, length, stringSort, false))
                                        return idx;
-                               Contraction cts = GetContraction (s, idx, length - (idx - start));
+                               Contraction cts = GetContraction (s, start, length);
                                if (cts != null) {
                                        start += cts.Source.Length;
                                        length -= cts.Source.Length;
@@ -1652,35 +1333,29 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                        Contraction ct = GetContraction (target);
                        if (ct != null) {
                                if (ct.Replacement != null)
-                                       return LastIndexOfPrimitiveChar (s, start, length, ct.Replacement [0]);
+                                       return LastIndexOf (s, ct.Replacement, start, length, (opt & CompareOptions.StringSort) != 0);
                                else
-                                       return LastIndexOfSortKey (s, start, length, ct.SortKey, char.MinValue, -1, true);
+                                       return LastIndexOfSortKey (s, start, start, length, ct.SortKey, char.MinValue, -1, true);
+                       }
+                       else {
+                               int ti = FilterOptions ((int) target);
+                               charSortKeyIndexTarget [0] = Category (ti);
+                               charSortKeyIndexTarget [1] = Level1 (ti);
+                               if (!ignoreNonSpace)
+                                       charSortKeyIndexTarget [2] = Level2 (ti, ExtenderType.None);
+                               charSortKeyIndexTarget [3] = Uni.Level3 (ti);
+                               return LastIndexOfSortKey (s, start, start, length, charSortKeyIndexTarget, target, ti, !Uni.HasSpecialWeight ((char) ti));
                        }
-                       else
-                               return LastIndexOfPrimitiveChar (s, start, length, target);
-               }
-
-               // Searches target char w/o checking contractions
-               int LastIndexOfPrimitiveChar (string s, int start, int length, char target)
-               {
-                       int ti = FilterOptions ((int) target);
-                       charSortKey [0] = Category (ti);
-                       charSortKey [1] = Level1 (ti);
-                       if (!ignoreNonSpace)
-                               // FIXME: pass ExtenderType
-                               charSortKey [2] = Level2 (ti, ExtenderType.None);
-                       charSortKey [3] = Uni.Level3 (ti);
-                       return LastIndexOfSortKey (s, start, length, charSortKey, target, ti, !Uni.HasSpecialWeight ((char) ti));
                }
 
                // Searches target byte[] keydata
-               int LastIndexOfSortKey (string s, int start, int length, byte [] sortkey, char target, int ti, bool noLv4)
+               int LastIndexOfSortKey (string s, int start, int orgStart, int length, byte [] sortkey, char target, int ti, bool noLv4)
                {
                        int end = start - length;
-
-                       for (int idx = start; idx > end; idx--) {
+                       int idx = start;
+                       while (idx > end) {
                                int cur = idx;
-                               if (Matches (s, ref idx, end, ti, target, sortkey, noLv4, true))
+                               if (MatchesBackward (s, ref idx, end, orgStart, ti, sortkey, noLv4))
                                        return cur;
                        }
                        return -1;
@@ -1698,21 +1373,48 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                        if (tidx == target.Length)
                                return start;
                        Contraction ct = GetContraction (target, tidx, target.Length - tidx);
-                       byte [] sortkey = ct != null ? ct.SortKey : null;
                        string replace = ct != null ? ct.Replacement : null;
+                       byte [] sk = replace == null ? charSortKeyIndexTarget : null;
+
+                       bool noLv4 = true;
+                       char tc = char.MinValue;
+                       int ti = -1;
+                       if (ct != null && sk != null) {
+                               for (int i = 0; i < ct.SortKey.Length; i++)
+                                       sk [i] = ct.SortKey [i];
+                       } else if (sk != null) {
+                               tc = target [tidx];
+                               ti = FilterOptions (target [tidx]);
+                               sk [0] = Category (ti);
+                               sk [1] = Level1 (ti);
+                               if (!ignoreNonSpace)
+                                       sk [2] = Level2 (ti, ExtenderType.None);
+                               sk [3] = Uni.Level3 (ti);
+                               noLv4 = !Uni.HasSpecialWeight ((char) ti);
+                       }
+                       if (sk != null) {
+                               for (tidx++; tidx < target.Length; tidx++) {
+                                       if (Category (target [tidx]) != 1)
+                                               break;
+                                       if (sk [2] == 0)
+                                               sk [2] = 2;
+                                       sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
+                               }
+                       }
 
                        do {
                                int idx = 0;
-                               if (sortkey != null)
-                                       idx = LastIndexOfSortKey (s, start, length, ct.SortKey, char.MinValue, -1, true);
-                               else if (replace != null)
+
+                               if (replace != null)
                                        idx = LastIndexOf (s, replace, start, length, stringSort);
                                else
-                                       idx = LastIndexOfPrimitiveChar (s, start, length, target [tidx]);
-
+                                       idx = LastIndexOfSortKey (s, start, orgStart, length, sk, tc, ti, noLv4);
                                if (idx < 0)
                                        return -1;
-                               if (IsPrefix (s, target, idx, orgStart - idx + 1, stringSort)) {
+                               length -= start - idx;
+                               start = idx;
+
+                               if (IsPrefix (s, target, idx, orgStart - idx + 1, stringSort, false)) {
                                        for (;idx < orgStart; idx++)
                                                if (!IsIgnorable (s [idx]))
                                                        break;
@@ -1730,54 +1432,85 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                        return -1;
                }
 
-               private bool Matches (string s, ref int idx, int end, int ti, char target, byte [] sortkey, bool noLv4, bool lastIndexOf)
+               private bool MatchesForward (string s, ref int idx, int end, int ti, byte [] sortkey, bool noLv4)
                {
-                       switch (char.GetUnicodeCategory (s [idx])) {
-                       case UnicodeCategory.PrivateUse:
-                       case UnicodeCategory.Surrogate:
-                               if (s [idx] != target)
+                       int si = -1;
+                       ExtenderType ext = GetExtenderType (s [idx]);
+                       Contraction ct = null;
+                       if (ext == ExtenderType.None)
+                               ct = GetContraction (s, idx, end);
+                       else if (previousChar < 0) {
+                               if (previousSortKey == null) {
+                                       idx++;
                                        return false;
-                               return true;
+                               }
+                               charSortKey = previousSortKey;
                        }
-
-                       char sc = char.MinValue;
-                       Contraction ct = GetContraction (s, idx, end);
+                       else
+                               si = FilterExtender (previousChar, ext);
                        // if lv4 exists, it never matches contraction
-                       if (ct != null && noLv4) {
-                               if (lastIndexOf)
-                                       idx -= ct.Source.Length - 1;
-                               else
-                                       idx += ct.Source.Length - 1;
+                       if (ct != null) {
+                               idx += ct.Source.Length;
+                               if (!noLv4)
+                                       return false;
                                if (ct.SortKey != null) {
                                        for (int i = 0; i < sortkey.Length; i++)
-                                               if (ct.SortKey [i] != sortkey [i])
-                                                       return false;
-                                       return true;
+                                               charSortKey [i] = sortkey [i];
+                                       previousChar = -1;
+                                       previousSortKey = charSortKey;
+                               } else {
+                                       // Here is the core of LAMESPEC
+                                       // described at the top of the source.
+                                       int dummy = 0;
+                                       return MatchesForward (ct.Replacement, ref dummy,
+                                               ct.Replacement.Length, ti, sortkey, noLv4);
                                }
-                               // Here is the core of LAMESPEC
-                               // described at the top of the source.
-                               sc = ct.Replacement [0];
+                       } else {
+                               if (si < 0)
+                                       si = FilterOptions (s [idx]);
+                               charSortKey [0] = Category (si);
+                               charSortKey [1] = Level1 (si);
+                               if (!ignoreNonSpace)
+                                       charSortKey [2] = Level2 (si, ext);
+                               charSortKey [3] = Uni.Level3 (si);
+                               if (charSortKey [0] != 1)
+                                       previousChar = si;
+                               idx++;
+                       }
+                       for (; idx < end; idx++) {
+                               if (Category (s [idx]) != 1)
+                                       break;
+                               if (ignoreNonSpace)
+                                       continue;
+                               if (charSortKey [2] == 0)
+                                               charSortKey [2] = 2;
+                                       charSortKey [2] = (byte) (charSortKey [2]
+                                               + Level2 (s [idx], ExtenderType.None));
                        }
-                       else
-                               sc = s [idx];
 
-                       if (sc == target)
-                               return true;
-                       int si = FilterOptions ((int) sc);
-                       if (Category (si) != sortkey [0] ||
-                               Level1 (si) != sortkey [1] ||
-                               // FIXME: pass ExtenderType
-                               !ignoreNonSpace && Level2 (si, ExtenderType.None) != sortkey [2] ||
-                               Uni.Level3 (si) != sortkey [3])
+                       return MatchesPrimitive (charSortKey, si, ext, sortkey, ti, noLv4);
+               }
+
+               private bool MatchesPrimitive (byte [] charSortKey, int si, ExtenderType ext, byte [] sortkey, int ti, bool noLv4)
+               {
+                       if (charSortKey [0] != sortkey [0] ||
+                               charSortKey [1] != sortkey [1] ||
+                               (!ignoreNonSpace && charSortKey [2] != sortkey [2]) ||
+                               charSortKey [3] != sortkey [3])
                                return false;
-                       if (noLv4 && !Uni.HasSpecialWeight ((char) si))
+                       if (noLv4 && (si < 0 || !Uni.HasSpecialWeight ((char) si)))
                                return true;
                        else if (noLv4)
                                return false;
+                       // Since target can never be an extender, if the source
+                       // is an expander and it matters, then they never match.
+                       if (!ignoreNonSpace && ext == ExtenderType.Conditional)
+                               return false;
                        if (Uni.IsJapaneseSmallLetter ((char) si) !=
                                Uni.IsJapaneseSmallLetter ((char) ti) ||
-                               Uni.GetJapaneseDashType ((char) si) !=
-                               Uni.GetJapaneseDashType ((char) ti) ||
+                               ToDashTypeValue (ext) !=
+                               // FIXME: we will have to specify correct value for target
+                               ToDashTypeValue (ExtenderType.None) ||
                                !Uni.IsHiragana ((char) si) !=
                                !Uni.IsHiragana ((char) ti) ||
                                IsHalfKana ((char) si) !=
@@ -1786,6 +1519,88 @@ Console.WriteLine (" -> '{0}'", c.Replacement);
                        return true;
                }
 
+               private bool MatchesBackward (string s, ref int idx, int end, int orgStart, int ti, byte [] sortkey, bool noLv4)
+               {
+                       int cur = idx;
+                       int si = -1;
+                       ExtenderType ext = GetExtenderType (s [idx]);
+                       // To handle extenders in source, we need to
+                       // check next _primary_ character.
+                       if (ext != ExtenderType.None) {
+                               byte diacritical = 0;
+                               for (int tmp = 0; ; tmp--) {
+                                       if (tmp < 0) // heading extender
+                                               return false;
+                                       if (IsIgnorable (s [tmp]))
+                                               continue;
+                                       int tmpi = FilterOptions (s [tmp]);
+                                       byte category = Category (tmpi);
+                                       if (category == 1) {
+                                               diacritical = Level2 (tmpi, ExtenderType.None);
+                                               continue;
+                                       }
+                                       si = FilterExtender (tmpi, ext);
+                                       charSortKey [0] = category;
+                                       charSortKey [1] = Level1 (si);
+                                       if (!ignoreNonSpace)
+                                               charSortKey [2] = Level2 (si, ext);
+                                       charSortKey [3] = Uni.Level3 (si);
+                                       if (ext != ExtenderType.Conditional &&
+                                               diacritical != 0)
+                                               charSortKey [2] =
+                                                       (charSortKey [2] == 0) ?
+                                                       (byte) (diacritical + 2) :
+                                                       diacritical;
+                                       break;
+                               }
+                               idx--;
+                       }
+                       Contraction ct = null;
+                       if (ext == ExtenderType.None)
+                               ct = GetContraction (s, idx, end);
+                       // if lv4 exists, it never matches contraction
+                       if (ct != null) {
+                               idx -= ct.Source.Length;
+                               if (!noLv4)
+                                       return false;
+                               if (ct.SortKey != null) {
+                                       for (int i = 0; i < sortkey.Length; i++)
+                                               charSortKey [i] = sortkey [i];
+                                       previousChar = -1;
+                                       previousSortKey = charSortKey;
+                               } else {
+                                       // Here is the core of LAMESPEC
+                                       // described at the top of the source.
+                                       int dummy = ct.Replacement.Length - 1;
+                                       return MatchesBackward (ct.Replacement, 
+                                               ref dummy, dummy, -1, ti, sortkey, noLv4);
+                               }
+                       } else if (ext == ExtenderType.None) {
+                               if (si < 0)
+                                       si = FilterOptions (s [idx]);
+                               charSortKey [0] = Category (si);
+                               charSortKey [1] = Level1 (si);
+                               if (!ignoreNonSpace)
+                                       charSortKey [2] = Level2 (si, ext);
+                               charSortKey [3] = Uni.Level3 (si);
+                               if (charSortKey [0] != 1)
+                                       previousChar = si;
+                               idx--;
+                       }
+                       if (ext == ExtenderType.None) {
+                               for (int tmp = cur + 1; tmp < orgStart; tmp++) {
+                                       if (Category (s [tmp]) != 1)
+                                               break;
+                                       if (ignoreNonSpace)
+                                               continue;
+                                       if (charSortKey [2] == 0)
+                                                       charSortKey [2] = 2;
+                                       charSortKey [2] = (byte) (charSortKey [2]
+                                               + Level2 (s [tmp], ExtenderType.None));
+                               }
+                       }
+                       return MatchesPrimitive (charSortKey, si, ext, sortkey, ti, noLv4);
+               }
                #endregion
        }
 }