2005-07-20 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / SimpleCollator.cs
1 //
2 // SimpleCollator.cs
3 //
4 // This class will demonstrate CompareInfo functionality that will just work.
5 //
6
7 //
8 // Here's a summary for supporting contractions, expansions and diacritical 
9 // remappings.
10 //
11 // Diacritical mapping is a simple tailoring rule that "remaps" diacritical
12 // weight value from one to another. For now it applies to all range of
13 // characters, but at some stage we might need to limit the remapping ranges.
14 //
15 // A Contraction consists of a string (actually char[]) and a sortkey entry
16 // (i.e. byte[]). It indicates that a sequence of characters is interpreted
17 // as a single character that has the mapped sortkey value. There is no
18 // character which goes across "special rules". When the collator encountered
19 // such a sequence of characters, it just returns the sortkey value without
20 // further replacement.
21 //
22 // Since it is still likely to happen that a contraction sequence matches
23 // other character than the identical sequence (depending on CompareOptions
24 // and of course, defined sortkey value itself), comparison cannot be done
25 // at source char[] level.
26 //
27 // (to be continued.)
28 //
29
30 //
31 // In IndexOf(), the first character in the target string or the target char
32 // itself is turned into sortkey bytes. If the character has a contraction and
33 // that is sortkey map, then it is used instead. If the contraction exists and
34 // that is replacement map, then the first character of the replacement string
35 // is searched instead. IndexOf() always searches only for the top character,
36 // and if it returned negative value, then it returns -1. Otherwise, it then
37 // tries IsPrefix() from that location. If it returns true, then it returns
38 // the index.
39 //
40
41 // LAMESPEC: IndexOf() is lame as a whole API. It never matches in the middle
42 // of expansion and there is no proper way to return such indexes within
43 // a single int return value.
44 //
45 // For example, try below in .NET:
46 //      IndexOf("\u00E6", "a")
47 //      IndexOf("\u00E6", "e")
48 //
49
50
51 using System;
52 using System.Collections;
53 using System.Globalization;
54
55 using Uni = Mono.Globalization.Unicode.MSCompatUnicodeTable;
56 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
57
58 namespace Mono.Globalization.Unicode
59 {
60         internal class SimpleCollator
61         {
62                 static SimpleCollator invariant =
63                         new SimpleCollator (CultureInfo.InvariantCulture);
64
65                 internal static readonly byte [] ignorableFlags =
66                         Uni.ignorableFlags;
67                 internal static readonly byte [] categories =
68                         Uni.categories;
69                 internal static readonly byte [] level1 =
70                         Uni.level1;
71                 internal static readonly byte [] level2 =
72                         Uni.level2;
73                 internal static readonly byte [] level3 =
74                         Uni.level3;
75                 internal static readonly ushort [] widthCompat =
76                         Uni.widthCompat;
77                 internal static readonly CodePointIndexer categoryIndexer =
78                         UUtil.Category;
79                 internal static readonly CodePointIndexer lv1Indexer =
80                         UUtil.Level1;
81                 internal static readonly CodePointIndexer lv2Indexer =
82                         UUtil.Level2;
83                 internal static readonly CodePointIndexer lv3Indexer =
84                         UUtil.Level3;
85                 internal static readonly CodePointIndexer widthIndexer =
86                         UUtil.WidthCompat;
87
88
89                 SortKeyBuffer buf;
90                 // CompareOptions expanded.
91                 bool ignoreNonSpace; // used in IndexOf()
92                 bool ignoreSymbols;
93                 bool ignoreWidth;
94                 bool ignoreCase;
95                 bool ignoreKanaType;
96                 TextInfo textInfo; // for ToLower().
97                 bool frenchSort;
98                 readonly ushort [] cjkTable;
99                 readonly CodePointIndexer cjkIndexer;
100                 readonly byte [] cjkLv2Table;
101                 readonly CodePointIndexer cjkLv2Indexer;
102                 readonly int lcid;
103                 readonly Contraction [] contractions;
104                 readonly Level2Map [] level2Maps;
105
106                 // temporary sortkey buffer for index search/comparison
107                 byte [] charSortKey = new byte [4];
108                 byte [] charSortKey2 = new byte [4];
109                 byte [] charSortKeyIndexTarget = new byte [4];
110
111                 #region Tailoring support classes
112                 // Possible mapping types are:
113                 //
114                 //      - string to string (ReplacementMap)
115                 //      - string to SortKey (SortKeyMap)
116                 //      - diacritical byte to byte (DiacriticalMap)
117                 //
118                 // There could be mapping from string to sortkeys, but
119                 // for now there is none as such.
120                 //
121                 internal class Contraction
122                 {
123                         public readonly char [] Source;
124                         // only either of them is used.
125                         public readonly string Replacement;
126                         public readonly byte [] SortKey;
127
128                         public Contraction (char [] source,
129                                 string replacement, byte [] sortkey)
130                         {
131                                 Source = source;
132                                 Replacement = replacement;
133                                 SortKey = sortkey;
134                         }
135                 }
136
137                 internal class ContractionComparer : IComparer
138                 {
139                         public static readonly ContractionComparer Instance =
140                                 new ContractionComparer ();
141
142                         public int Compare (object o1, object o2)
143                         {
144                                 Contraction c1 = (Contraction) o1;
145                                 Contraction c2 = (Contraction) o2;
146                                 char [] a1 = c1.Source;
147                                 char [] a2 = c2.Source;
148                                 int min = a1.Length > a2.Length ?
149                                         a2.Length : a1.Length;
150                                 for (int i = 0; i < min; i++)
151                                         if (a1 [i] != a2 [i])
152                                                 return a1 [i] - a2 [i];
153                                 return a1.Length - a2.Length;
154                         }
155                 }
156
157                 internal class Level2Map
158                 {
159                         public byte Source;
160                         public byte Replace;
161
162                         public Level2Map (byte source, byte replace)
163                         {
164                                 Source = source;
165                                 Replace = replace;
166                         }
167                 }
168
169                 internal class Level2MapComparer : IComparer
170                 {
171                         public static readonly Level2MapComparer Instance =
172                                 new Level2MapComparer ();
173
174                         public int Compare (object o1, object o2)
175                         {
176                                 Level2Map m1 = (Level2Map) o1;
177                                 Level2Map m2 = (Level2Map) o2;
178                                 return (m1.Source - m2.Source);
179                         }
180                 }
181
182                 #endregion
183
184                 #region .ctor() and split functions
185
186                 public SimpleCollator (CultureInfo culture)
187                 {
188                         lcid = culture.LCID;
189                         textInfo = culture.TextInfo;
190                         buf = new SortKeyBuffer (culture.LCID);
191
192                         SetCJKTable (culture, ref cjkTable, ref cjkIndexer,
193                                 ref cjkLv2Table, ref cjkLv2Indexer);
194
195                         // Get tailoring info
196                         TailoringInfo t = null;
197                         for (CultureInfo ci = culture; ci.LCID != 127; ci = ci.Parent) {
198                                 t = Uni.GetTailoringInfo (ci.LCID);
199                                 if (t != null)
200                                         break;
201                         }
202                         if (t == null) // then use invariant
203                                 t = Uni.GetTailoringInfo (127);
204
205                         frenchSort = t.FrenchSort;
206                         BuildTailoringTables (culture, t, ref contractions,
207                                 ref level2Maps);
208                         // FIXME: Since tailorings are mostly for latin
209                         // (and in some cases Cyrillic) characters, it would
210                         // be much better for performance to store "start 
211                         // indexes" for > 370 (culture-specific letters).
212
213 /*
214 // dump tailoring table
215 Console.WriteLine ("******** building table for {0} : c - {1} d - {2}",
216 culture.LCID, contractions.Length, level2Maps.Length);
217 foreach (Contraction c in contractions) {
218 foreach (char cc in c.Source)
219 Console.Write ("{0:X4} ", (int) cc);
220 Console.WriteLine (" -> '{0}'", c.Replacement);
221 }
222 */
223                 }
224
225                 private void BuildTailoringTables (CultureInfo culture,
226                         TailoringInfo t,
227                         ref Contraction [] contractions,
228                         ref Level2Map [] diacriticals)
229                 {
230                         // collect tailoring entries.
231                         ArrayList cmaps = new ArrayList ();
232                         ArrayList dmaps = new ArrayList ();
233                         char [] tarr = Uni.TailoringValues;
234                         int idx = t.TailoringIndex;
235                         int end = idx + t.TailoringCount;
236                         while (idx < end) {
237                                 int ss = idx + 1;
238                                 char [] src = null;
239                                 switch (tarr [idx]) {
240                                 case '\x1': // SortKeyMap
241                                         idx++;
242                                         while (tarr [ss] != 0)
243                                                 ss++;
244                                         src = new char [ss - idx];
245                                         Array.Copy (tarr, idx, src, 0, ss - idx);
246                                         byte [] sortkey = new byte [4];
247                                         for (int i = 0; i < 4; i++)
248                                                 sortkey [i] = (byte) tarr [ss + 1 + i];
249                                         cmaps.Add (new Contraction (
250                                                 src, null, sortkey));
251                                         // it ends with 0
252                                         idx = ss + 6;
253                                         break;
254                                 case '\x2': // DiacriticalMap
255                                         dmaps.Add (new Level2Map (
256                                                 (byte) tarr [idx + 1],
257                                                 (byte) tarr [idx + 2]));
258                                         idx += 3;
259                                         break;
260                                 case '\x3': // ReplacementMap
261                                         idx++;
262                                         while (tarr [ss] != 0)
263                                                 ss++;
264                                         src = new char [ss - idx];
265                                         Array.Copy (tarr, idx, src, 0, ss - idx);
266                                         ss++;
267                                         int l = ss;
268                                         while (tarr [l] != 0)
269                                                 l++;
270                                         string r = new string (tarr, ss, l - ss);
271                                         cmaps.Add (new Contraction (
272                                                 src, r, null));
273                                         idx = l + 1;
274                                         break;
275                                 default:
276                                         throw new NotImplementedException (String.Format ("Mono INTERNAL ERROR (Should not happen): Collation tailoring table is broken for culture {0} ({1}) at 0x{2:X}", culture.LCID, culture.Name, idx));
277                                 }
278                         }
279                         cmaps.Sort (ContractionComparer.Instance);
280                         dmaps.Sort (Level2MapComparer.Instance);
281                         contractions = cmaps.ToArray (typeof (Contraction))
282                                 as Contraction [];
283                         diacriticals = dmaps.ToArray (typeof (Level2Map))
284                                 as Level2Map [];
285                 }
286
287                 private void SetCJKTable (CultureInfo culture,
288                         ref ushort [] cjkTable, ref CodePointIndexer cjkIndexer,
289                         ref byte [] cjkLv2Table, ref CodePointIndexer cjkLv2Indexer)
290                 {
291                         string name = GetNeutralCulture (culture).Name;
292
293                         Uni.FillCJK (name);
294
295                         // custom CJK table support.
296                         switch (name) {
297                         case "zh-CHS":
298                                 cjkTable = Uni.CjkCHS;
299                                 cjkIndexer = UUtil.CjkCHS;
300                                 break;
301                         case "zh-CHT":
302                                 cjkTable = Uni.CjkCHT;
303                                 cjkIndexer = UUtil.Cjk;
304                                 break;
305                         case "ja":
306                                 cjkTable = Uni.CjkJA;
307                                 cjkIndexer = UUtil.Cjk;
308                                 break;
309                         case "ko":
310                                 cjkTable = Uni.CjkKO;
311                                 cjkLv2Table = Uni.CjkKOLv2;
312                                 cjkIndexer = UUtil.Cjk;
313                                 cjkLv2Indexer = UUtil.Cjk;
314                                 break;
315                         }
316                 }
317
318                 static CultureInfo GetNeutralCulture (CultureInfo info)
319                 {
320                         CultureInfo ret = info;
321                         while (ret.Parent != null && ret.Parent.LCID != 127)
322                                 ret = ret.Parent;
323                         return ret;
324                 }
325
326                 #endregion
327
328                 byte Category (int cp)
329                 {
330                         if (cp < 0x3000 || cjkTable == null)
331                                 return categories [categoryIndexer.ToIndex (cp)];
332                         int idx = cjkIndexer.ToIndex (cp);
333                         ushort cjk = idx < 0 ? (ushort) 0 : cjkTable [idx];
334                         return cjk != 0 ? (byte) ((cjk & 0xFF00) >> 8) :
335                                 categories [categoryIndexer.ToIndex (cp)];
336                 }
337
338                 byte Level1 (int cp)
339                 {
340                         if (cp < 0x3000 || cjkTable == null)
341                                 return level1 [lv1Indexer.ToIndex (cp)];
342                         int idx = cjkIndexer.ToIndex (cp);
343                         ushort cjk = idx < 0 ? (ushort) 0 : cjkTable [idx];
344                         return cjk != 0 ? (byte) (cjk & 0xFF) :
345                                 level1 [lv1Indexer.ToIndex (cp)];
346                 }
347
348                 byte Level2 (int cp, ExtenderType ext)
349                 {
350                         if (ext == ExtenderType.Buggy)
351                                 return 5;
352                         else if (ext == ExtenderType.Conditional)
353                                 return 0;
354
355                         if (cp < 0x3000 || cjkLv2Table == null)
356                                 return level2 [lv2Indexer.ToIndex (cp)];
357                         int idx = cjkLv2Indexer.ToIndex (cp);
358                         byte ret = idx < 0 ? (byte) 0 : cjkLv2Table [idx];
359                         if (ret != 0)
360                                 return ret;
361                         ret = level2 [lv2Indexer.ToIndex (cp)];
362                         if (level2Maps.Length == 0)
363                                 return ret;
364                         for (int i = 0; i < level2Maps.Length; i++) {
365                                 if (level2Maps [i].Source == ret)
366                                         return level2Maps [i].Replace;
367                                 else if (level2Maps [i].Source > ret)
368                                         break;
369                         }
370                         return ret;
371                 }
372
373                 bool IsHalfKana (int cp)
374                 {
375                         return ignoreWidth || Uni.IsHalfWidthKana ((char) cp);
376                 }
377
378                 void SetOptions (CompareOptions options)
379                 {
380                         this.ignoreNonSpace = (options & CompareOptions.IgnoreNonSpace) != 0;
381                         this.ignoreSymbols = (options & CompareOptions.IgnoreSymbols) != 0;
382                         this.ignoreWidth = (options & CompareOptions.IgnoreWidth) != 0;
383                         this.ignoreCase = (options & CompareOptions.IgnoreCase) != 0;
384                         this.ignoreKanaType = (options & CompareOptions.IgnoreKanaType) != 0;
385                         previousChar = previousChar2 = -1;
386                         previousSortKey = previousSortKey2 = null;
387                         escape1.Source = escape2.Source = null;
388                 }
389
390                 Contraction GetContraction (string s, int start, int end)
391                 {
392                         Contraction c = GetContraction (s, start, end, contractions);
393                         if (c != null || lcid == 127)
394                                 return c;
395                         return GetContraction (s, start, end, invariant.contractions);
396                 }
397
398                 Contraction GetContraction (string s, int start, int end, Contraction [] clist)
399                 {
400                         for (int i = 0; i < clist.Length; i++) {
401                                 Contraction ct = clist [i];
402                                 int diff = ct.Source [0] - s [start];
403                                 if (diff > 0)
404                                         return null; // it's already sorted
405                                 else if (diff < 0)
406                                         continue;
407                                 char [] chars = ct.Source;
408                                 if (end - start < chars.Length)
409                                         continue;
410                                 bool match = true;
411                                 for (int n = 0; n < chars.Length; n++)
412                                         if (s [start + n] != chars [n]) {
413                                                 match = false;
414                                                 break;
415                                         }
416                                 if (match)
417                                         return ct;
418                         }
419                         return null;
420                 }
421
422                 Contraction GetTailContraction (string s, int start, int end)
423                 {
424                         Contraction c = GetTailContraction (s, start, end, contractions);
425                         if (c != null || lcid == 127)
426                                 return c;
427                         return GetTailContraction (s, start, end, invariant.contractions);
428                 }
429
430                 Contraction GetTailContraction (string s, int start, int end, Contraction [] clist)
431                 {
432                         for (int i = 0; i < clist.Length; i++) {
433                                 Contraction ct = clist [i];
434                                 int diff = ct.Source [0] - s [end];
435                                 if (diff > 0)
436                                         return null; // it's already sorted
437                                 else if (diff < 0)
438                                         continue;
439                                 char [] chars = ct.Source;
440                                 if (start - end + 1 < chars.Length)
441                                         continue;
442                                 bool match = true;
443                                 int offset = start - chars.Length + 1;
444                                 for (int n = 0; n < chars.Length; n++)
445                                         if (s [offset + n] != chars [n]) {
446                                                 match = false;
447                                                 break;
448                                         }
449                                 if (match)
450                                         return ct;
451                         }
452                         return null;
453                 }
454
455                 Contraction GetContraction (char c)
456                 {
457                         Contraction ct = GetContraction (c, contractions);
458                         if (ct != null || lcid == 127)
459                                 return ct;
460                         return GetContraction (c, invariant.contractions);
461                 }
462
463                 Contraction GetContraction (char c, Contraction [] clist)
464                 {
465                         for (int i = 0; i < clist.Length; i++) {
466                                 Contraction ct = clist [i];
467                                 if (ct.Source [0] > c)
468                                         return null; // it's already sorted
469                                 if (ct.Source [0] == c && ct.Source.Length == 1)
470                                         return ct;
471                         }
472                         return null;
473                 }
474
475                 int FilterOptions (int i)
476                 {
477                         if (ignoreWidth) {
478                                 int x = widthCompat [widthIndexer.ToIndex (i)];
479                                 if (x != 0)
480                                         i = x;
481                         }
482                         if (ignoreCase)
483                                 i = textInfo.ToLower ((char) i);
484                         if (ignoreKanaType)
485                                 i = Uni.ToKanaTypeInsensitive (i);
486                         return i;
487                 }
488
489                 int previousChar = -1;
490                 byte [] previousSortKey = null;
491                 int previousChar2 = -1;
492                 byte [] previousSortKey2 = null;
493
494                 enum ExtenderType {
495                         None,
496                         Simple,
497                         Voiced,
498                         Conditional,
499                         Buggy,
500                 }
501
502                 ExtenderType GetExtenderType (int i)
503                 {
504                         // LAMESPEC: Windows expects true for U+3005, but 
505                         // sometimes it does not represent to repeat just
506                         // one character.
507                         // Windows also expects true for U+3031 and U+3032,
508                         // but they should *never* repeat one character.
509
510                         if (i < 0x3005 || i > 0xFF70)
511                                 return ExtenderType.None;
512                         if (i >= 0xFE7C) {
513                                 switch (i) {
514                                 case 0xFE7C:
515                                 case 0xFE7D:
516                                         return ExtenderType.Simple;
517                                 case 0xFF70:
518                                         return ExtenderType.Conditional;
519                                 case 0xFF9E:
520                                 case 0xFF9F:
521                                         return ExtenderType.Voiced;
522                                 }
523                         }
524                         if (i > 0x30FE)
525                                 return ExtenderType.None;
526                         switch (i) {
527                         case 0x3005: // LAMESPEC: see above.
528                                 return ExtenderType.Buggy;
529                         case 0x3031: // LAMESPEC: see above.
530                         case 0x3032: // LAMESPEC: see above.
531                         case 0x309D:
532                         case 0x30FD:
533                                 return ExtenderType.Simple;
534                         case 0x309E:
535                         case 0x30FE:
536                                 return ExtenderType.Voiced;
537                         case 0x30FC:
538                                 return ExtenderType.Conditional;
539                         default:
540                                 return ExtenderType.None;
541                         }
542                 }
543
544                 byte ToDashTypeValue (ExtenderType ext)
545                 {
546                         if (ignoreNonSpace) // LAMESPEC: huh, why?
547                                 return 3;
548                         switch (ext) {
549                         case ExtenderType.None:
550                                 return 3;
551                         case ExtenderType.Conditional:
552                                 return 5;
553                         default:
554                                 return 4;
555                         }
556                 }
557
558                 int FilterExtender (int i, ExtenderType ext)
559                 {
560                         if (ext == ExtenderType.Conditional &&
561                                 Uni.HasSpecialWeight ((char) i)) {
562                                 bool half = IsHalfKana ((char) i);
563                                 bool katakana = !Uni.IsHiragana ((char) i);
564                                 switch (Level1 (i) & 7) {
565                                 case 2:
566                                         return half ? 0xFF71 : katakana ? 0x30A2 : 0x3042;
567                                 case 3:
568                                         return half ? 0xFF72 : katakana ? 0x30A4 : 0x3044;
569                                 case 4:
570                                         return half ? 0xFF73 : katakana ? 0x30A6 : 0x3046;
571                                 case 5:
572                                         return half ? 0xFF74 : katakana ? 0x30A8 : 0x3048;
573                                 case 6:
574                                         return half ? 0xFF75 : katakana ? 0x30AA : 0x304A;
575                                 }
576                         }
577                         return i;
578                 }
579
580                 bool IsIgnorable (int i)
581                 {
582                         return Uni.IsIgnorable (i) ||
583                                 ignoreSymbols && Uni.IsIgnorableSymbol (i) ||
584                                 ignoreNonSpace && Uni.IsIgnorableNonSpacing (i);
585                 }
586
587
588                 #region GetSortKey()
589
590                 public SortKey GetSortKey (string s)
591                 {
592                         return GetSortKey (s, CompareOptions.None);
593                 }
594
595                 public SortKey GetSortKey (string s, CompareOptions options)
596                 {
597                         return GetSortKey (s, 0, s.Length, options);
598                 }
599
600                 public SortKey GetSortKey (string s, int start, int length, CompareOptions options)
601                 {
602                         SetOptions (options);
603
604                         buf.Initialize (options, lcid, s, frenchSort);
605                         int end = start + length;
606                         GetSortKey (s, start, end);
607                         return buf.GetResultAndReset ();
608                 }
609
610                 void GetSortKey (string s, int start, int end)
611                 {
612                         for (int n = start; n < end; n++) {
613                                 int i = s [n];
614
615                                 ExtenderType ext = GetExtenderType (i);
616                                 if (ext != ExtenderType.None) {
617                                         i = FilterExtender (previousChar, ext);
618                                         if (i >= 0)
619                                                 FillSortKeyRaw (i, ext);
620                                         else if (previousSortKey != null) {
621                                                 byte [] b = previousSortKey;
622                                                 buf.AppendNormal (
623                                                         b [0],
624                                                         b [1],
625                                                         b [2] != 1 ? b [2] : Level2 (i, ext),
626                                                         b [3] != 1 ? b [3] : level3 [lv3Indexer.ToIndex (i)]);
627                                         }
628                                         // otherwise do nothing.
629                                         // (if the extender is the first char
630                                         // in the string, then just ignore.)
631                                         continue;
632                                 }
633
634                                 if (IsIgnorable (i))
635                                         continue;
636                                 i = FilterOptions (i);
637
638                                 Contraction ct = GetContraction (s, n, end);
639                                 if (ct != null) {
640                                         if (ct.Replacement != null) {
641                                                 GetSortKey (ct.Replacement, 0, ct.Replacement.Length);
642                                         } else {
643                                                 byte [] b = ct.SortKey;
644                                                 buf.AppendNormal (
645                                                         b [0],
646                                                         b [1],
647                                                         b [2] != 1 ? b [2] : Level2 (i, ext),
648                                                         b [3] != 1 ? b [3] : level3 [lv3Indexer.ToIndex (i)]);
649                                                 previousSortKey = b;
650                                                 previousChar = -1;
651                                         }
652                                         n += ct.Source.Length - 1;
653                                 }
654                                 else {
655                                         if (!Uni.IsIgnorableNonSpacing (i))
656                                                 previousChar = i;
657                                         FillSortKeyRaw (i, ExtenderType.None);
658                                 }
659                         }
660                 }
661
662                 void FillSortKeyRaw (int i, ExtenderType ext)
663                 {
664                         if (0x3400 <= i && i <= 0x4DB5) {
665                                 int diff = i - 0x3400;
666                                 buf.AppendCJKExtension (
667                                         (byte) (0x10 + diff / 254),
668                                         (byte) (diff % 254 + 2));
669                                 return;
670                         }
671
672                         UnicodeCategory uc = char.GetUnicodeCategory ((char) i);
673                         switch (uc) {
674                         case UnicodeCategory.PrivateUse:
675                                 int diff = i - 0xE000;
676                                 buf.AppendNormal (
677                                         (byte) (0xE5 + diff / 254),
678                                         (byte) (diff % 254 + 2),
679                                         0,
680                                         0);
681                                 return;
682                         case UnicodeCategory.Surrogate:
683                                 FillSurrogateSortKeyRaw (i);
684                                 return;
685                         }
686
687                         byte level2 = Level2 (i, ext);
688                         if (Uni.HasSpecialWeight ((char) i)) {
689                                 byte level1 = Level1 (i);
690                                 buf.AppendKana (
691                                         Category (i),
692                                         level1,
693                                         level2,
694                                         Uni.Level3 (i),
695                                         Uni.IsJapaneseSmallLetter ((char) i),
696                                         ToDashTypeValue (ext),
697                                         !Uni.IsHiragana ((char) i),
698                                         IsHalfKana ((char) i)
699                                         );
700                                 if (!ignoreNonSpace && ext == ExtenderType.Voiced)
701                                         // Append voice weight
702                                         buf.AppendNormal (1, 1, 1, 0);
703                         }
704                         else
705                                 buf.AppendNormal (
706                                         Category (i),
707                                         Level1 (i),
708                                         level2,
709                                         Uni.Level3 (i));
710                 }
711
712                 void FillSurrogateSortKeyRaw (int i)
713                 {
714                         int diffbase = 0;
715                         int segment = 0;
716                         byte lower = 0;
717
718                         if (i < 0xD840) {
719                                 diffbase = 0xD800;
720                                 segment = 0x41;
721                                 lower = (byte) ((i == 0xD800) ? 0x3E : 0x3F);
722                         } else if (0xD840 <= i && i < 0xD880) {
723                                 diffbase = 0xD840;
724                                 segment = 0xF2;
725                                 lower = 0x3E;
726                         } else if (0xDB80 <= i && i < 0xDC00) {
727                                 diffbase = 0xDB80 - 0x40;
728                                 segment = 0xFE;
729                                 lower = 0x3E;
730                         } else {
731                                 diffbase = 0xDC00 - 0xF8 + 2;
732                                 segment = 0x41;
733                                 lower = 0x3F;
734                         }
735                         int diff = i - diffbase;
736
737                         buf.AppendNormal (
738                                 (byte) (segment + diff / 254),
739                                 (byte) (diff % 254 + 2),
740                                 lower,
741                                 lower);
742                 }
743
744                 #endregion
745
746                 #region Compare()
747
748                 public int Compare (string s1, string s2)
749                 {
750                         return Compare (s1, s2, CompareOptions.None);
751                 }
752
753                 public int Compare (string s1, string s2, CompareOptions options)
754                 {
755                         return Compare (s1, 0, s1.Length, s2, 0, s2.Length, options);
756                 }
757
758                 class Escape
759                 {
760                         public string Source;
761                         public int Index;
762                         public int Start;
763                         public int End;
764                 }
765
766                 // Those instances are reused not to invoke instantiation
767                 // during Compare().
768                 Escape escape1 = new Escape ();
769                 Escape escape2 = new Escape ();
770
771                 private int CompareOrdinal (string s1, int idx1, int len1,
772                         string s2, int idx2, int len2)
773                 {
774                         int min = len1 < len2 ? len1 : len2;
775                         int end1 = idx1 + min;
776                         int end2 = idx2 + min;
777                         for (int i1 = idx1, i2 = idx2;
778                                 i1 < end1 && i2 < end2; i1++, i2++)
779                                 if (s1 [i1] != s2 [i2])
780                                         return s1 [i1] - s2 [i2];
781                         return len1 == len2 ? 0 :
782                                 len1 == min ? - 1 : 1;
783                 }
784
785                 public int Compare (string s1, int idx1, int len1,
786                         string s2, int idx2, int len2, CompareOptions options)
787                 {
788                         // quick equality check
789                         if (idx1 == idx2 && len1 == len2 &&
790                                 Object.ReferenceEquals (s1, s2))
791                                 return 0;
792                         // FIXME: this should be done inside Compare() at
793                         // any time.
794 //                      int ord = CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
795 //                      if (ord == 0)
796 //                              return 0;
797                         if (options == CompareOptions.Ordinal)
798                                 return CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
799
800 #if false // stable easy version, depends on GetSortKey().
801                         SortKey sk1 = GetSortKey (s1, idx1, len1, options);
802                         SortKey sk2 = GetSortKey (s2, idx2, len2, options);
803                         byte [] d1 = sk1.KeyData;
804                         byte [] d2 = sk2.KeyData;
805                         int len = d1.Length > d2.Length ? d2.Length : d1.Length;
806                         for (int i = 0; i < len; i++)
807                                 if (d1 [i] != d2 [i])
808                                         return d1 [i] < d2 [i] ? -1 : 1;
809                         return d1.Length == d2.Length ? 0 : d1.Length < d2.Length ? -1 : 1;
810 #else
811                         SetOptions (options);
812                         bool dummy, dummy2;
813                         int ret = CompareInternal (s1, idx1, len1, s2, idx2, len2, (options & CompareOptions.StringSort) != 0, out dummy, out dummy2, true);
814                         return ret == 0 ? 0 : ret < 0 ? -1 : 1;
815 #endif
816                 }
817
818                 int CompareInternal (string s1, int idx1, int len1, string s2,
819                         int idx2, int len2, bool stringSort,
820                         out bool targetConsumed, out bool sourceConsumed,
821                         bool skipHeadingExtenders)
822                 {
823                         int start1 = idx1;
824                         int start2 = idx2;
825                         int end1 = idx1 + len1;
826                         int end2 = idx2 + len2;
827                         targetConsumed = false;
828                         sourceConsumed = false;
829
830                         // It holds final result that comes from the comparison
831                         // at level 2 or lower. Even if Compare() found the
832                         // difference at level 2 or lower, it still has to
833                         // continue level 1 comparison. FinalResult is used
834                         // when there was no further differences.
835                         int finalResult = 0;
836                         // It holds the comparison level to do. It starts from
837                         // 5, and becomes 1 when we do primary-only comparison.
838                         int currentLevel = 5;
839
840                         int lv5At1 = -1;
841                         int lv5At2 = -1;
842                         int lv5Value1 = 0;
843                         int lv5Value2 = 0;
844
845                         // Skip heading extenders
846                         if (skipHeadingExtenders) {
847                                 for (; idx1 < end1; idx1++)
848                                         if (GetExtenderType (s1 [idx1]) == ExtenderType.None)
849                                                 break;
850                                 for (; idx2 < end2; idx2++)
851                                         if (GetExtenderType (s2 [idx2]) == ExtenderType.None)
852                                                 break;
853                         }
854
855                         ExtenderType ext1 = ExtenderType.None;
856                         ExtenderType ext2 = ExtenderType.None;
857
858                         while (true) {
859                                 for (; idx1 < end1; idx1++)
860                                         if (!IsIgnorable (s1 [idx1]))
861                                                 break;
862                                 for (; idx2 < end2; idx2++)
863                                         if (!IsIgnorable (s2 [idx2]))
864                                                 break;
865
866                                 if (idx1 >= end1) {
867                                         if (escape1.Source == null)
868                                                 break;
869                                         s1 = escape1.Source;
870                                         start1 = escape1.Start;
871                                         idx1 = escape1.Index;
872                                         end1 = escape1.End;
873                                         escape1.Source = null;
874                                         continue;
875                                 }
876                                 if (idx2 >= end2) {
877                                         if (escape2.Source == null)
878                                                 break;
879                                         s2 = escape2.Source;
880                                         start2 = escape2.Start;
881                                         idx2 = escape2.Index;
882                                         end2 = escape2.End;
883                                         escape2.Source = null;
884                                         continue;
885                                 }
886 #if false
887 // FIXME: optimization could be done here.
888
889                                 if (s1 [idx1] == s2 [idx2]) {
890                                         idx1++;
891                                         idx2++;
892                                         continue;
893                                 }
894 //                              while (idx1 >= start1 && !IsSafe ((int) s [idx1]))
895 //                                      idx1--;
896 //                              while (idx2 >= start2 && !IsSafe ((int) s [idx2]))
897 //                                      idx2--;
898 #endif
899
900                                 int cur1 = idx1;
901                                 int cur2 = idx2;
902                                 byte [] sk1 = null;
903                                 byte [] sk2 = null;
904                                 int i1 = FilterOptions (s1 [idx1]);
905                                 int i2 = FilterOptions (s2 [idx2]);
906                                 bool special1 = false;
907                                 bool special2 = false;
908
909                                 // If current character is an expander, then
910                                 // repeat the previous character.
911                                 ext1 = GetExtenderType (i1);
912                                 if (ext1 != ExtenderType.None) {
913                                         if (previousChar < 0) {
914                                                 if (previousSortKey == null) {
915                                                         // nothing to extend
916                                                         idx1++;
917                                                         continue;
918                                                 }
919                                                 sk1 = previousSortKey;
920                                         }
921                                         else
922                                                 i1 = FilterExtender (previousChar, ext1);
923                                 }
924                                 ext2 = GetExtenderType (i2);
925                                 if (ext2 != ExtenderType.None) {
926                                         if (previousChar2 < 0) {
927                                                 if (previousSortKey2 == null) {
928                                                         // nothing to extend
929                                                         idx2++;
930                                                         continue;
931                                                 }
932                                                 sk2 = previousSortKey2;
933                                         }
934                                         else
935                                                 i2 = FilterExtender (previousChar2, ext2);
936                                 }
937
938                                 byte cat1 = Category (i1);
939                                 byte cat2 = Category (i2);
940
941                                 // Handle special weight characters
942                                 if (!stringSort && currentLevel > 4) {
943                                         if (cat1 == 6) {
944                                                 lv5At1 = escape1.Source != null ?
945                                                         escape1.Index - escape1.Start :
946                                                         cur1 - start1;
947                                                 // here Windows has a bug that it does
948                                                 // not consider thirtiary weight.
949                                                 lv5Value1 = Level1 (i1) << 8 + Uni.Level3 (i1);
950                                                 previousChar = i1;
951                                                 idx1++;
952                                         }
953                                         if (cat2 == 6) {
954                                                 lv5At2 = escape2.Source != null ?
955                                                         escape2.Index - escape2.Start :
956                                                         cur2 - start2;
957                                                 // here Windows has a bug that it does
958                                                 // not consider thirtiary weight.
959                                                 lv5Value2 = Level1 (i2) << 8 + Uni.Level3 (i2);
960                                                 previousChar2 = i2;
961                                                 idx2++;
962                                         }
963                                         if (cat1 == 6 || cat2 == 6) {
964                                                 currentLevel = 4;
965                                                 continue;
966                                         }
967                                 }
968
969                                 Contraction ct1 = null;
970                                 if (ext1 == ExtenderType.None)
971                                         ct1 = GetContraction (s1, idx1, end1);
972
973                                 int offset1 = 1;
974                                 if (sk1 != null)
975                                         offset1 = 1;
976                                 else if (ct1 != null) {
977                                         offset1 = ct1.Source.Length;
978                                         if (ct1.SortKey != null) {
979                                                 sk1 = charSortKey;
980                                                 for (int i = 0; i < ct1.SortKey.Length; i++)
981                                                         sk1 [i] = ct1.SortKey [i];
982                                                 previousChar = -1;
983                                                 previousSortKey = sk1;
984                                         }
985                                         else if (escape1.Source == null) {
986                                                 escape1.Source = s1;
987                                                 escape1.Start = start1;
988                                                 escape1.Index = cur1 + ct1.Source.Length;
989                                                 escape1.End = end1;
990                                                 s1 = ct1.Replacement;
991                                                 idx1 = 0;
992                                                 start1 = 0;
993                                                 end1 = s1.Length;
994                                                 continue;
995                                         }
996                                 }
997                                 else {
998                                         sk1 = charSortKey;
999                                         sk1 [0] = cat1;
1000                                         sk1 [1] = Level1 (i1);
1001                                         if (!ignoreNonSpace && currentLevel > 1)
1002                                                 sk1 [2] = Level2 (i1, ext1);
1003                                         if (currentLevel > 2)
1004                                                 sk1 [3] = Uni.Level3 (i1);
1005                                         if (currentLevel > 3)
1006                                                 special1 = Uni.HasSpecialWeight ((char) i1);
1007                                         if (cat1 > 1)
1008                                                 previousChar = i1;
1009                                 }
1010
1011                                 Contraction ct2 = null;
1012                                 if (ext2 == ExtenderType.None)
1013                                         ct2 = GetContraction (s2, idx2, end2);
1014
1015                                 if (sk2 != null)
1016                                         idx2++;
1017                                 else if (ct2 != null) {
1018                                         idx2 += ct2.Source.Length;
1019                                         if (ct2.SortKey != null) {
1020                                                 sk2 = charSortKey2;
1021                                                 for (int i = 0; i < ct2.SortKey.Length; i++)
1022                                                         sk2 [i] = ct2.SortKey [i];
1023                                                 previousChar2 = -1;
1024                                                 previousSortKey2 = sk2;
1025                                         }
1026                                         else if (escape2.Source == null) {
1027                                                 escape2.Source = s2;
1028                                                 escape2.Start = start2;
1029                                                 escape2.Index = cur2 + ct2.Source.Length;
1030                                                 escape2.End = end2;
1031                                                 s2 = ct2.Replacement;
1032                                                 idx2 = 0;
1033                                                 start2 = 0;
1034                                                 end2 = s2.Length;
1035                                                 continue;
1036                                         }
1037                                 }
1038                                 else {
1039                                         sk2 = charSortKey2;
1040                                         sk2 [0] = cat2;
1041                                         sk2 [1] = Level1 (i2);
1042                                         if (!ignoreNonSpace && currentLevel > 1)
1043                                                 sk2 [2] = Level2 (i2, ext2);
1044                                         if (currentLevel > 2)
1045                                                 sk2 [3] = Uni.Level3 (i2);
1046                                         if (currentLevel > 3)
1047                                                 special2 = Uni.HasSpecialWeight ((char) i2);
1048                                         if (cat2 > 1)
1049                                                 previousChar2 = i2;
1050                                         idx2++;
1051                                 }
1052
1053                                 // Add offset here so that it does not skip
1054                                 // idx1 while s2 has a replacement.
1055                                 idx1 += offset1;
1056
1057                                 // add diacritical marks in s1 here
1058                                 if (!ignoreNonSpace) {
1059                                         while (idx1 < end1) {
1060                                                 if (Category (s1 [idx1]) != 1)
1061                                                         break;
1062                                                 if (sk1 [2] == 0)
1063                                                         sk1 [2] = 2;
1064                                                 sk1 [2] = (byte) (sk1 [2] + 
1065                                                         Level2 (s1 [idx1], ExtenderType.None));
1066                                                 idx1++;
1067                                         }
1068
1069                                         // add diacritical marks in s2 here
1070                                         while (idx2 < end2) {
1071                                                 if (Category (s2 [idx2]) != 1)
1072                                                         break;
1073                                                 if (sk2 [2] == 0)
1074                                                         sk2 [2] = 2;
1075                                                 sk2 [2] = (byte) (sk2 [2] + 
1076                                                         Level2 (s2 [idx2], ExtenderType.None));
1077                                                 idx2++;
1078                                         }
1079                                 }
1080
1081                                 int ret = sk1 [0] - sk2 [0];
1082                                 ret = ret != 0 ? ret : sk1 [1] - sk2 [1];
1083                                 if (ret != 0)
1084                                         return ret;
1085                                 if (currentLevel == 1)
1086                                         continue;
1087                                 if (!ignoreNonSpace) {
1088                                         ret = sk1 [2] - sk2 [2];
1089                                         if (ret != 0) {
1090                                                 finalResult = ret;
1091                                                 currentLevel = frenchSort ? 2 : 1;
1092                                                 continue;
1093                                         }
1094                                 }
1095                                 if (currentLevel == 2)
1096                                         continue;
1097                                 ret = sk1 [3] - sk2 [3];
1098                                 if (ret != 0) {
1099                                         finalResult = ret;
1100                                         currentLevel = 2;
1101                                         continue;
1102                                 }
1103                                 if (currentLevel == 3)
1104                                         continue;
1105                                 if (special1 != special2) {
1106                                         finalResult = special1 ? 1 : -1;
1107                                         currentLevel = 3;
1108                                         continue;
1109                                 }
1110                                 if (special1) {
1111                                         ret = CompareFlagPair (
1112                                                 !Uni.IsJapaneseSmallLetter ((char) i1),
1113                                                 !Uni.IsJapaneseSmallLetter ((char) i2));
1114                                         ret = ret != 0 ? ret :
1115                                                 ToDashTypeValue (ext1) -
1116                                                 ToDashTypeValue (ext2);
1117                                         ret = ret != 0 ? ret : CompareFlagPair (
1118                                                 Uni.IsHiragana ((char) i1),
1119                                                 Uni.IsHiragana ((char) i2));
1120                                         ret = ret != 0 ? ret : CompareFlagPair (
1121                                                 !IsHalfKana ((char) i1),
1122                                                 !IsHalfKana ((char) i2));
1123                                         if (ret != 0) {
1124                                                 finalResult = ret;
1125                                                 currentLevel = 3;
1126                                                 continue;
1127                                         }
1128                                 }
1129                         }
1130
1131                         // If there were only level 3 or lower differences,
1132                         // then we still have to find diacritical differences
1133                         // if any.
1134                         if (!ignoreNonSpace &&
1135                                 finalResult != 0 && currentLevel > 2) {
1136                                 while (idx1 < end1 && idx2 < end2) {
1137                                         if (!Uni.IsIgnorableNonSpacing (s1 [idx1]))
1138                                                 break;
1139                                         if (!Uni.IsIgnorableNonSpacing (s2 [idx2]))
1140                                                 break;
1141                                         finalResult = Level2 (FilterOptions ((s1 [idx1])), ext1) - Level2 (FilterOptions (s2 [idx2]), ext2);
1142                                         if (finalResult != 0)
1143                                                 break;
1144                                         idx1++;
1145                                         idx2++;
1146                                         // they should work only for the first character
1147                                         ext1 = ExtenderType.None;
1148                                         ext2 = ExtenderType.None;
1149                                 }
1150                         }
1151                         // we still have to handle level 5
1152                         if (finalResult == 0) {
1153                                 finalResult = lv5At1 - lv5At2;
1154                                 if (finalResult == 0)
1155                                         finalResult = lv5Value1 - lv5Value2;
1156                         }
1157                         if (finalResult == 0) {
1158                                 if (idx2 == end2)
1159                                         targetConsumed = true;
1160                                 if (idx1 == end1)
1161                                         sourceConsumed = true;
1162                         }
1163                         return idx1 != end1 ? 1 : idx2 == end2 ? finalResult : -1;
1164                 }
1165
1166                 int CompareFlagPair (bool b1, bool b2)
1167                 {
1168                         return b1 == b2 ? 0 : b1 ? 1 : -1;
1169                 }
1170
1171                 #endregion
1172
1173                 #region IsPrefix() and IsSuffix()
1174
1175                 public bool IsPrefix (string src, string target, CompareOptions opt)
1176                 {
1177                         return IsPrefix (src, target, 0, src.Length, opt);
1178                 }
1179
1180                 public bool IsPrefix (string s, string target, int start, int length, CompareOptions opt)
1181                 {
1182                         SetOptions (opt);
1183                         return IsPrefix (s, target, start, length, 
1184                                 (opt & CompareOptions.StringSort) != 0, true);
1185                 }
1186
1187                 public bool IsPrefix (string s, string target, int start, int length, bool stringSort, bool skipHeadingExtenders)
1188                 {
1189                         bool consumed, dummy;
1190                         int ret = CompareInternal (s, start, length,
1191                                 target, 0, target.Length, stringSort,
1192                                 out consumed, out dummy, skipHeadingExtenders);
1193                         return consumed;
1194                 }
1195
1196                 // IsSuffix()
1197
1198                 public bool IsSuffix (string src, string target, CompareOptions opt)
1199                 {
1200                         return IsSuffix (src, target, src.Length - 1, src.Length, opt);
1201                 }
1202
1203                 public bool IsSuffix (string s, string target, int start, int length, CompareOptions opt)
1204                 {
1205                         // quick check : simple codepoint comparison
1206                         if (s.Length >= target.Length) {
1207                                 int si = start;
1208                                 int se = start - length;
1209                                 for (int i = target.Length - 1; si >= se && i >= 0; i--, si--)
1210                                         if (s [si] != target [i])
1211                                                 break;
1212                                 if (si == start + target.Length)
1213                                         return true;
1214                         }
1215
1216                         SetOptions (opt);
1217                         return IsSuffix (s, target, start, length,
1218                                 (opt & CompareOptions.StringSort) != 0);
1219                 }
1220
1221                 bool IsSuffix (string s, string t, int start, int length,
1222                         bool stringSort)
1223                 {
1224                         int tstart = 0;
1225                         for (;tstart < t.Length; tstart++)
1226                                 if (!IsIgnorable (t [tstart]))
1227                                         break;
1228                         if (tstart == t.Length)
1229                                 return true; // as if target is String.Empty.
1230
1231                         // FIXME: it is not efficient.
1232                         bool sourceConsumed, targetConsumed;
1233                         for (int i = 0; i < length; i++) {
1234                                 escape1.Source = escape2.Source = null;
1235                                 previousSortKey = previousSortKey2 = null;
1236                                 previousChar = previousChar2 = -1;
1237
1238                                 int ret = CompareInternal (s, start - i, i + 1,
1239                                         t, tstart, t.Length - tstart,
1240                                         stringSort, out targetConsumed,
1241                                         out sourceConsumed, true);
1242                                 if (ret == 0)
1243                                         return true;
1244                                 if (!sourceConsumed && targetConsumed)
1245                                         return false;
1246                         }
1247                         return false;
1248                 }
1249
1250                 #endregion
1251
1252                 #region IndexOf() / LastIndexOf()
1253
1254                 public int IndexOf (string s, string target, CompareOptions opt)
1255                 {
1256                         return IndexOf (s, target, 0, s.Length, opt);
1257                 }
1258
1259                 public int IndexOf (string s, string target, int start, int length, CompareOptions opt)
1260                 {
1261                         SetOptions (opt);
1262                         return IndexOf (s, target, start, length,
1263                                 (opt & CompareOptions.StringSort) != 0);
1264                 }
1265
1266                 public int IndexOf (string s, char target, CompareOptions opt)
1267                 {
1268                         return IndexOf (s, target, 0, s.Length, opt);
1269                 }
1270
1271                 public int IndexOf (string s, char target, int start, int length, CompareOptions opt)
1272                 {
1273                         SetOptions (opt);
1274
1275                         // If target is contraction, then use string search.
1276                         Contraction ct = GetContraction (target);
1277                         if (ct != null) {
1278                                 if (ct.Replacement != null)
1279                                         return IndexOf (s, ct.Replacement, start, length,
1280                                                 (opt & CompareOptions.StringSort) != 0);
1281                                 else
1282                                         return IndexOfSortKey (s, start, length, ct.SortKey, char.MinValue, -1, true);
1283                         } else {
1284                                 int ti = FilterOptions ((int) target);
1285                                 charSortKeyIndexTarget [0] = Category (ti);
1286                                 charSortKeyIndexTarget [1] = Level1 (ti);
1287                                 if (!ignoreNonSpace)
1288                                         charSortKeyIndexTarget [2] =
1289                                                 Level2 (ti, ExtenderType.None);
1290                                 charSortKeyIndexTarget [3] = Uni.Level3 (ti);
1291                                 return IndexOfSortKey (s, start, length,
1292                                         charSortKeyIndexTarget, target, ti,
1293                                         !Uni.HasSpecialWeight ((char) ti));
1294                         }
1295                 }
1296
1297                 // Searches target byte[] keydata
1298                 int IndexOfSortKey (string s, int start, int length, byte [] sortkey, char target, int ti, bool noLv4)
1299                 {
1300                         int end = start + length;
1301                         int idx = start;
1302                         while (idx < end) {
1303                                 int cur = idx;
1304                                 if (MatchesForward (s, ref idx, end, ti, sortkey, noLv4))
1305                                         return cur;
1306                         }
1307                         return -1;
1308                 }
1309
1310                 // Searches string. Search head character (or keydata when
1311                 // the head is contraction sortkey) and try IsPrefix().
1312                 int IndexOf (string s, string target, int start, int length, bool stringSort)
1313                 {
1314                         int tidx = 0;
1315                         for (; tidx < target.Length; tidx++)
1316                                 if (!IsIgnorable (target [tidx]))
1317                                         break;
1318                         if (tidx == target.Length)
1319                                 return start;
1320                         Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1321                         string replace = ct != null ? ct.Replacement : null;
1322                         byte [] sk = replace == null ? charSortKeyIndexTarget : null;
1323                         bool noLv4 = true;
1324                         char tc = char.MinValue;
1325                         int ti = -1;
1326                         if (ct != null && sk != null) {
1327                                 for (int i = 0; i < ct.SortKey.Length; i++)
1328                                         sk [i] = ct.SortKey [i];
1329                         } else if (sk != null) {
1330                                 tc = target [tidx];
1331                                 ti = FilterOptions (target [tidx]);
1332                                 sk [0] = Category (ti);
1333                                 sk [1] = Level1 (ti);
1334                                 if (!ignoreNonSpace)
1335                                         sk [2] = Level2 (ti, ExtenderType.None);
1336                                 sk [3] = Uni.Level3 (ti);
1337                                 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1338                         }
1339                         if (sk != null) {
1340                                 for (tidx++; tidx < target.Length; tidx++) {
1341                                         if (Category (target [tidx]) != 1)
1342                                                 break;
1343                                         if (sk [2] == 0)
1344                                                 sk [2] = 2;
1345                                         sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1346                                 }
1347                         }
1348
1349                         do {
1350                                 int idx = 0;
1351                                 if (replace != null)
1352                                         idx = IndexOf (s, replace, start, length, stringSort);
1353                                 else
1354                                         idx = IndexOfSortKey (s, start, length, sk, tc, ti, noLv4);
1355                                 if (idx < 0)
1356                                         return -1;
1357                                 length -= idx - start;
1358                                 start = idx;
1359                                 if (IsPrefix (s, target, start, length, stringSort, false))
1360                                         return idx;
1361                                 Contraction cts = GetContraction (s, start, length);
1362                                 if (cts != null) {
1363                                         start += cts.Source.Length;
1364                                         length -= cts.Source.Length;
1365                                 } else {
1366                                         start++;
1367                                         length--;
1368                                 }
1369                         } while (length > 0);
1370                         return -1;
1371                 }
1372
1373                 //
1374                 // There are the same number of IndexOf() related methods,
1375                 // with the same functionalities for each.
1376                 //
1377
1378                 public int LastIndexOf (string s, string target, CompareOptions opt)
1379                 {
1380                         return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1381                 }
1382
1383                 public int LastIndexOf (string s, string target, int start, int length, CompareOptions opt)
1384                 {
1385                         SetOptions (opt);
1386                         return LastIndexOf (s, target, start, length,
1387                                 (opt & CompareOptions.StringSort) != 0);
1388                 }
1389
1390                 public int LastIndexOf (string s, char target, CompareOptions opt)
1391                 {
1392                         return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1393                 }
1394
1395                 public int LastIndexOf (string s, char target, int start, int length, CompareOptions opt)
1396                 {
1397                         SetOptions (opt);
1398
1399                         // If target is contraction, then use string search.
1400                         Contraction ct = GetContraction (target);
1401                         if (ct != null) {
1402                                 if (ct.Replacement != null)
1403                                         return LastIndexOf (s, ct.Replacement, start, length, (opt & CompareOptions.StringSort) != 0);
1404                                 else
1405                                         return LastIndexOfSortKey (s, start, start, length, ct.SortKey, char.MinValue, -1, true);
1406                         }
1407                         else {
1408                                 int ti = FilterOptions ((int) target);
1409                                 charSortKeyIndexTarget [0] = Category (ti);
1410                                 charSortKeyIndexTarget [1] = Level1 (ti);
1411                                 if (!ignoreNonSpace)
1412                                         charSortKeyIndexTarget [2] = Level2 (ti, ExtenderType.None);
1413                                 charSortKeyIndexTarget [3] = Uni.Level3 (ti);
1414                                 return LastIndexOfSortKey (s, start, start, length, charSortKeyIndexTarget, target, ti, !Uni.HasSpecialWeight ((char) ti));
1415                         }
1416                 }
1417
1418                 // Searches target byte[] keydata
1419                 int LastIndexOfSortKey (string s, int start, int orgStart, int length, byte [] sortkey, char target, int ti, bool noLv4)
1420                 {
1421                         int end = start - length;
1422                         int idx = start;
1423                         while (idx > end) {
1424                                 int cur = idx;
1425                                 if (MatchesBackward (s, ref idx, end, orgStart, ti, sortkey, noLv4))
1426                                         return cur;
1427                         }
1428                         return -1;
1429                 }
1430
1431                 // Searches string. Search head character (or keydata when
1432                 // the head is contraction sortkey) and try IsPrefix().
1433                 int LastIndexOf (string s, string target, int start, int length, bool stringSort)
1434                 {
1435                         int orgStart = start;
1436                         int tidx = 0;
1437                         for (; tidx < target.Length; tidx++)
1438                                 if (!IsIgnorable (target [tidx]))
1439                                         break;
1440                         if (tidx == target.Length)
1441                                 return start;
1442                         Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1443                         string replace = ct != null ? ct.Replacement : null;
1444                         byte [] sk = replace == null ? charSortKeyIndexTarget : null;
1445
1446                         bool noLv4 = true;
1447                         char tc = char.MinValue;
1448                         int ti = -1;
1449                         if (ct != null && sk != null) {
1450                                 for (int i = 0; i < ct.SortKey.Length; i++)
1451                                         sk [i] = ct.SortKey [i];
1452                         } else if (sk != null) {
1453                                 tc = target [tidx];
1454                                 ti = FilterOptions (target [tidx]);
1455                                 sk [0] = Category (ti);
1456                                 sk [1] = Level1 (ti);
1457                                 if (!ignoreNonSpace)
1458                                         sk [2] = Level2 (ti, ExtenderType.None);
1459                                 sk [3] = Uni.Level3 (ti);
1460                                 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1461                         }
1462                         if (sk != null) {
1463                                 for (tidx++; tidx < target.Length; tidx++) {
1464                                         if (Category (target [tidx]) != 1)
1465                                                 break;
1466                                         if (sk [2] == 0)
1467                                                 sk [2] = 2;
1468                                         sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1469                                 }
1470                         }
1471
1472                         do {
1473                                 int idx = 0;
1474
1475                                 if (replace != null)
1476                                         idx = LastIndexOf (s, replace, start, length, stringSort);
1477                                 else
1478                                         idx = LastIndexOfSortKey (s, start, orgStart, length, sk, tc, ti, noLv4);
1479                                 if (idx < 0)
1480                                         return -1;
1481                                 length -= start - idx;
1482                                 start = idx;
1483
1484                                 if (IsPrefix (s, target, idx, orgStart - idx + 1, stringSort, false)) {
1485                                         for (;idx < orgStart; idx++)
1486                                                 if (!IsIgnorable (s [idx]))
1487                                                         break;
1488                                         return idx;
1489                                 }
1490                                 Contraction cts = GetContraction (s, idx, orgStart - idx + 1);
1491                                 if (cts != null) {
1492                                         start -= cts.Source.Length;
1493                                         length -= cts.Source.Length;
1494                                 } else {
1495                                         start--;
1496                                         length--;
1497                                 }
1498                         } while (length > 0);
1499                         return -1;
1500                 }
1501
1502                 private bool MatchesForward (string s, ref int idx, int end, int ti, byte [] sortkey, bool noLv4)
1503                 {
1504                         int si = -1;
1505                         ExtenderType ext = GetExtenderType (s [idx]);
1506                         Contraction ct = null;
1507                         if (ext == ExtenderType.None)
1508                                 ct = GetContraction (s, idx, end);
1509                         else if (previousChar < 0) {
1510                                 if (previousSortKey == null) {
1511                                         idx++;
1512                                         return false;
1513                                 }
1514                                 charSortKey = previousSortKey;
1515                         }
1516                         else
1517                                 si = FilterExtender (previousChar, ext);
1518                         // if lv4 exists, it never matches contraction
1519                         if (ct != null) {
1520                                 idx += ct.Source.Length;
1521                                 if (!noLv4)
1522                                         return false;
1523                                 if (ct.SortKey != null) {
1524                                         for (int i = 0; i < sortkey.Length; i++)
1525                                                 charSortKey [i] = sortkey [i];
1526                                         previousChar = -1;
1527                                         previousSortKey = charSortKey;
1528                                 } else {
1529                                         // Here is the core of LAMESPEC
1530                                         // described at the top of the source.
1531                                         int dummy = 0;
1532                                         return MatchesForward (ct.Replacement, ref dummy,
1533                                                 ct.Replacement.Length, ti, sortkey, noLv4);
1534                                 }
1535                         } else {
1536                                 if (si < 0)
1537                                         si = FilterOptions (s [idx]);
1538                                 charSortKey [0] = Category (si);
1539                                 charSortKey [1] = Level1 (si);
1540                                 if (!ignoreNonSpace)
1541                                         charSortKey [2] = Level2 (si, ext);
1542                                 charSortKey [3] = Uni.Level3 (si);
1543                                 if (charSortKey [0] != 1)
1544                                         previousChar = si;
1545                                 idx++;
1546                         }
1547                         for (; idx < end; idx++) {
1548                                 if (Category (s [idx]) != 1)
1549                                         break;
1550                                 if (ignoreNonSpace)
1551                                         continue;
1552                                 if (charSortKey [2] == 0)
1553                                                 charSortKey [2] = 2;
1554                                         charSortKey [2] = (byte) (charSortKey [2]
1555                                                 + Level2 (s [idx], ExtenderType.None));
1556                         }
1557
1558                         return MatchesPrimitive (charSortKey, si, ext, sortkey, ti, noLv4);
1559                 }
1560
1561                 private bool MatchesPrimitive (byte [] charSortKey, int si, ExtenderType ext, byte [] sortkey, int ti, bool noLv4)
1562                 {
1563                         if (charSortKey [0] != sortkey [0] ||
1564                                 charSortKey [1] != sortkey [1] ||
1565                                 (!ignoreNonSpace && charSortKey [2] != sortkey [2]) ||
1566                                 charSortKey [3] != sortkey [3])
1567                                 return false;
1568                         if (noLv4 && (si < 0 || !Uni.HasSpecialWeight ((char) si)))
1569                                 return true;
1570                         else if (noLv4)
1571                                 return false;
1572                         // Since target can never be an extender, if the source
1573                         // is an expander and it matters, then they never match.
1574                         if (!ignoreNonSpace && ext == ExtenderType.Conditional)
1575                                 return false;
1576                         if (Uni.IsJapaneseSmallLetter ((char) si) !=
1577                                 Uni.IsJapaneseSmallLetter ((char) ti) ||
1578                                 ToDashTypeValue (ext) !=
1579                                 // FIXME: we will have to specify correct value for target
1580                                 ToDashTypeValue (ExtenderType.None) ||
1581                                 !Uni.IsHiragana ((char) si) !=
1582                                 !Uni.IsHiragana ((char) ti) ||
1583                                 IsHalfKana ((char) si) !=
1584                                 IsHalfKana ((char) ti))
1585                                 return false;
1586                         return true;
1587                 }
1588
1589                 private bool MatchesBackward (string s, ref int idx, int end, int orgStart, int ti, byte [] sortkey, bool noLv4)
1590                 {
1591                         int cur = idx;
1592                         int si = -1;
1593                         ExtenderType ext = GetExtenderType (s [idx]);
1594                         // To handle extenders in source, we need to
1595                         // check next _primary_ character.
1596                         if (ext != ExtenderType.None) {
1597                                 byte diacritical = 0;
1598                                 for (int tmp = 0; ; tmp--) {
1599                                         if (tmp < 0) // heading extender
1600                                                 return false;
1601                                         if (IsIgnorable (s [tmp]))
1602                                                 continue;
1603                                         int tmpi = FilterOptions (s [tmp]);
1604                                         byte category = Category (tmpi);
1605                                         if (category == 1) {
1606                                                 diacritical = Level2 (tmpi, ExtenderType.None);
1607                                                 continue;
1608                                         }
1609                                         si = FilterExtender (tmpi, ext);
1610                                         charSortKey [0] = category;
1611                                         charSortKey [1] = Level1 (si);
1612                                         if (!ignoreNonSpace)
1613                                                 charSortKey [2] = Level2 (si, ext);
1614                                         charSortKey [3] = Uni.Level3 (si);
1615                                         if (ext != ExtenderType.Conditional &&
1616                                                 diacritical != 0)
1617                                                 charSortKey [2] =
1618                                                         (charSortKey [2] == 0) ?
1619                                                         (byte) (diacritical + 2) :
1620                                                         diacritical;
1621                                         break;
1622                                 }
1623                                 idx--;
1624                         }
1625                         Contraction ct = null;
1626                         if (ext == ExtenderType.None)
1627                                 ct = GetContraction (s, idx, end);
1628                         // if lv4 exists, it never matches contraction
1629                         if (ct != null) {
1630                                 idx -= ct.Source.Length;
1631                                 if (!noLv4)
1632                                         return false;
1633                                 if (ct.SortKey != null) {
1634                                         for (int i = 0; i < sortkey.Length; i++)
1635                                                 charSortKey [i] = sortkey [i];
1636                                         previousChar = -1;
1637                                         previousSortKey = charSortKey;
1638                                 } else {
1639                                         // Here is the core of LAMESPEC
1640                                         // described at the top of the source.
1641                                         int dummy = ct.Replacement.Length - 1;
1642                                         return MatchesBackward (ct.Replacement, 
1643                                                 ref dummy, dummy, -1, ti, sortkey, noLv4);
1644                                 }
1645                         } else if (ext == ExtenderType.None) {
1646                                 if (si < 0)
1647                                         si = FilterOptions (s [idx]);
1648                                 charSortKey [0] = Category (si);
1649                                 charSortKey [1] = Level1 (si);
1650                                 if (!ignoreNonSpace)
1651                                         charSortKey [2] = Level2 (si, ext);
1652                                 charSortKey [3] = Uni.Level3 (si);
1653                                 if (charSortKey [0] != 1)
1654                                         previousChar = si;
1655                                 idx--;
1656                         }
1657                         if (ext == ExtenderType.None) {
1658                                 for (int tmp = cur + 1; tmp < orgStart; tmp++) {
1659                                         if (Category (s [tmp]) != 1)
1660                                                 break;
1661                                         if (ignoreNonSpace)
1662                                                 continue;
1663                                         if (charSortKey [2] == 0)
1664                                                         charSortKey [2] = 2;
1665                                         charSortKey [2] = (byte) (charSortKey [2]
1666                                                 + Level2 (s [tmp], ExtenderType.None));
1667                                 }
1668                         }
1669                         return MatchesPrimitive (charSortKey, si, ext, sortkey, ti, noLv4);
1670                 }
1671                 #endregion
1672         }
1673 }