2005-07-14 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / SimpleCollator.cs
1 //
2 // SimpleCollator.cs
3 //
4 // This class will demonstrate CompareInfo functionality that will just work.
5 //
6
7 //
8 // Here's a summary for supporting contractions, expansions and diacritical 
9 // remappings.
10 //
11 // Diacritical mapping is a simple tailoring rule that "remaps" diacritical
12 // weight value from one to another. For now it applies to all range of
13 // characters, but at some stage we might need to limit the remapping ranges.
14 //
15 // A Contraction consists of a string (actually char[]) and a sortkey entry
16 // (i.e. byte[]). It indicates that a sequence of characters is interpreted
17 // as a single character that has the mapped sortkey value. There is no
18 // character which goes across "special rules". When the collator encountered
19 // such a sequence of characters, it just returns the sortkey value without
20 // further replacement.
21 //
22 // Since it is still likely to happen that a contraction sequence matches
23 // other character than the identical sequence (depending on CompareOptions
24 // and of course, defined sortkey value itself), comparison cannot be done
25 // at source char[] level.
26 //
27 // (to be continued.)
28 //
29
30 //
31 // In IndexOf(), the first character in the target string or the target char
32 // itself is turned into sortkey bytes. If the character has a contraction and
33 // that is sortkey map, then it is used instead. If the contraction exists and
34 // that is replacement map, then the first character of the replacement string
35 // is searched instead. IndexOf() always searches only for the top character,
36 // and if it returned negative value, then it returns -1. Otherwise, it then
37 // tries IsPrefix() from that location. If it returns true, then it returns
38 // the index.
39 //
40
41 // LAMESPEC: IndexOf() is lame as a whole API. It never matches in the middle
42 // of expansion and there is no proper way to return such indexes within
43 // a single int return value.
44 //
45 // For example, try below in .NET:
46 //      IndexOf("\u00E6", "a")
47 //      IndexOf("\u00E6", "e")
48 //
49
50
51 using System;
52 using System.Collections;
53 using System.Globalization;
54
55 using Uni = Mono.Globalization.Unicode.MSCompatUnicodeTable;
56 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
57
58 namespace Mono.Globalization.Unicode
59 {
60         internal class SimpleCollator
61         {
62                 static SimpleCollator invariant =
63                         new SimpleCollator (CultureInfo.InvariantCulture);
64
65                 internal static readonly byte [] ignorableFlags =
66                         Uni.ignorableFlags;
67                 internal static readonly byte [] categories =
68                         Uni.categories;
69                 internal static readonly byte [] level1 =
70                         Uni.level1;
71                 internal static readonly byte [] level2 =
72                         Uni.level2;
73                 internal static readonly byte [] level3 =
74                         Uni.level3;
75                 internal static readonly ushort [] widthCompat =
76                         Uni.widthCompat;
77                 internal static readonly CodePointIndexer categoryIndexer =
78                         UUtil.Category;
79                 internal static readonly CodePointIndexer lv1Indexer =
80                         UUtil.Level1;
81                 internal static readonly CodePointIndexer lv2Indexer =
82                         UUtil.Level2;
83                 internal static readonly CodePointIndexer lv3Indexer =
84                         UUtil.Level3;
85                 internal static readonly CodePointIndexer widthIndexer =
86                         UUtil.WidthCompat;
87
88
89                 SortKeyBuffer buf;
90                 // CompareOptions expanded.
91                 bool ignoreNonSpace; // used in IndexOf()
92                 bool ignoreSymbols;
93                 bool ignoreWidth;
94                 bool ignoreCase;
95                 bool ignoreKanaType;
96                 TextInfo textInfo; // for ToLower().
97                 bool frenchSort;
98                 readonly ushort [] cjkTable;
99                 readonly CodePointIndexer cjkIndexer;
100                 readonly byte [] cjkLv2Table;
101                 readonly CodePointIndexer cjkLv2Indexer;
102                 readonly int lcid;
103                 readonly Contraction [] contractions;
104                 readonly Level2Map [] level2Maps;
105
106                 // temporary sortkey buffer for index search/comparison
107                 byte [] charSortKey = new byte [4];
108                 byte [] charSortKey2 = new byte [4];
109                 byte [] charSortKeyIndexTarget = new byte [4];
110
111                 #region Tailoring support classes
112                 // Possible mapping types are:
113                 //
114                 //      - string to string (ReplacementMap)
115                 //      - string to SortKey (SortKeyMap)
116                 //      - diacritical byte to byte (DiacriticalMap)
117                 //
118                 // There could be mapping from string to sortkeys, but
119                 // for now there is none as such.
120                 //
121                 internal class Contraction
122                 {
123                         public readonly char [] Source;
124                         // only either of them is used.
125                         public readonly string Replacement;
126                         public readonly byte [] SortKey;
127
128                         public Contraction (char [] source,
129                                 string replacement, byte [] sortkey)
130                         {
131                                 Source = source;
132                                 Replacement = replacement;
133                                 SortKey = sortkey;
134                         }
135                 }
136
137                 internal class ContractionComparer : IComparer
138                 {
139                         public static readonly ContractionComparer Instance =
140                                 new ContractionComparer ();
141
142                         public int Compare (object o1, object o2)
143                         {
144                                 Contraction c1 = (Contraction) o1;
145                                 Contraction c2 = (Contraction) o2;
146                                 char [] a1 = c1.Source;
147                                 char [] a2 = c2.Source;
148                                 int min = a1.Length > a2.Length ?
149                                         a2.Length : a1.Length;
150                                 for (int i = 0; i < min; i++)
151                                         if (a1 [i] != a2 [i])
152                                                 return a1 [i] - a2 [i];
153                                 return a1.Length - a2.Length;
154                         }
155                 }
156
157                 internal class Level2Map
158                 {
159                         public byte Source;
160                         public byte Replace;
161
162                         public Level2Map (byte source, byte replace)
163                         {
164                                 Source = source;
165                                 Replace = replace;
166                         }
167                 }
168
169                 internal class Level2MapComparer : IComparer
170                 {
171                         public static readonly Level2MapComparer Instance =
172                                 new Level2MapComparer ();
173
174                         public int Compare (object o1, object o2)
175                         {
176                                 Level2Map m1 = (Level2Map) o1;
177                                 Level2Map m2 = (Level2Map) o2;
178                                 return (m1.Source - m2.Source);
179                         }
180                 }
181
182                 #endregion
183
184                 #region .ctor() and split functions
185
186                 public SimpleCollator (CultureInfo culture)
187                 {
188                         lcid = culture.LCID;
189                         textInfo = culture.TextInfo;
190                         buf = new SortKeyBuffer (culture.LCID);
191
192                         SetCJKTable (culture, ref cjkTable, ref cjkIndexer,
193                                 ref cjkLv2Table, ref cjkLv2Indexer);
194
195                         // Get tailoring info
196                         TailoringInfo t = null;
197                         for (CultureInfo ci = culture; ci.LCID != 127; ci = ci.Parent) {
198                                 t = Uni.GetTailoringInfo (ci.LCID);
199                                 if (t != null)
200                                         break;
201                         }
202                         if (t == null) // then use invariant
203                                 t = Uni.GetTailoringInfo (127);
204
205                         frenchSort = t.FrenchSort;
206                         BuildTailoringTables (culture, t, ref contractions,
207                                 ref level2Maps);
208                         // FIXME: Since tailorings are mostly for latin
209                         // (and in some cases Cyrillic) characters, it would
210                         // be much better for performance to store "start 
211                         // indexes" for > 370 (culture-specific letters).
212
213 /*
214 // dump tailoring table
215 Console.WriteLine ("******** building table for {0} : c - {1} d - {2}",
216 culture.LCID, contractions.Length, level2Maps.Length);
217 foreach (Contraction c in contractions) {
218 foreach (char cc in c.Source)
219 Console.Write ("{0:X4} ", (int) cc);
220 Console.WriteLine (" -> '{0}'", c.Replacement);
221 }
222 */
223                 }
224
225                 private void BuildTailoringTables (CultureInfo culture,
226                         TailoringInfo t,
227                         ref Contraction [] contractions,
228                         ref Level2Map [] diacriticals)
229                 {
230                         // collect tailoring entries.
231                         ArrayList cmaps = new ArrayList ();
232                         ArrayList dmaps = new ArrayList ();
233                         char [] tarr = Uni.TailoringValues;
234                         int idx = t.TailoringIndex;
235                         int end = idx + t.TailoringCount;
236                         while (idx < end) {
237                                 int ss = idx + 1;
238                                 char [] src = null;
239                                 switch (tarr [idx]) {
240                                 case '\x1': // SortKeyMap
241                                         idx++;
242                                         while (tarr [ss] != 0)
243                                                 ss++;
244                                         src = new char [ss - idx];
245                                         Array.Copy (tarr, idx, src, 0, ss - idx);
246                                         byte [] sortkey = new byte [4];
247                                         for (int i = 0; i < 4; i++)
248                                                 sortkey [i] = (byte) tarr [ss + 1 + i];
249                                         cmaps.Add (new Contraction (
250                                                 src, null, sortkey));
251                                         // it ends with 0
252                                         idx = ss + 6;
253                                         break;
254                                 case '\x2': // DiacriticalMap
255                                         dmaps.Add (new Level2Map (
256                                                 (byte) tarr [idx + 1],
257                                                 (byte) tarr [idx + 2]));
258                                         idx += 3;
259                                         break;
260                                 case '\x3': // ReplacementMap
261                                         idx++;
262                                         while (tarr [ss] != 0)
263                                                 ss++;
264                                         src = new char [ss - idx];
265                                         Array.Copy (tarr, idx, src, 0, ss - idx);
266                                         ss++;
267                                         int l = ss;
268                                         while (tarr [l] != 0)
269                                                 l++;
270                                         string r = new string (tarr, ss, l - ss);
271                                         cmaps.Add (new Contraction (
272                                                 src, r, null));
273                                         idx = l + 1;
274                                         break;
275                                 default:
276                                         throw new NotImplementedException (String.Format ("Mono INTERNAL ERROR (Should not happen): Collation tailoring table is broken for culture {0} ({1}) at 0x{2:X}", culture.LCID, culture.Name, idx));
277                                 }
278                         }
279                         cmaps.Sort (ContractionComparer.Instance);
280                         dmaps.Sort (Level2MapComparer.Instance);
281                         contractions = cmaps.ToArray (typeof (Contraction))
282                                 as Contraction [];
283                         diacriticals = dmaps.ToArray (typeof (Level2Map))
284                                 as Level2Map [];
285                 }
286
287                 private void SetCJKTable (CultureInfo culture,
288                         ref ushort [] cjkTable, ref CodePointIndexer cjkIndexer,
289                         ref byte [] cjkLv2Table, ref CodePointIndexer cjkLv2Indexer)
290                 {
291                         string name = GetNeutralCulture (culture).Name;
292
293                         Uni.FillCJK (name);
294
295                         // custom CJK table support.
296                         switch (name) {
297                         case "zh-CHS":
298                                 cjkTable = Uni.CjkCHS;
299                                 cjkIndexer = UUtil.CjkCHS;
300                                 break;
301                         case "zh-CHT":
302                                 cjkTable = Uni.CjkCHT;
303                                 cjkIndexer = UUtil.Cjk;
304                                 break;
305                         case "ja":
306                                 cjkTable = Uni.CjkJA;
307                                 cjkIndexer = UUtil.Cjk;
308                                 break;
309                         case "ko":
310                                 cjkTable = Uni.CjkKO;
311                                 cjkLv2Table = Uni.CjkKOLv2;
312                                 cjkIndexer = UUtil.Cjk;
313                                 cjkLv2Indexer = UUtil.Cjk;
314                                 break;
315                         }
316                 }
317
318                 static CultureInfo GetNeutralCulture (CultureInfo info)
319                 {
320                         CultureInfo ret = info;
321                         while (ret.Parent != null && ret.Parent.LCID != 127)
322                                 ret = ret.Parent;
323                         return ret;
324                 }
325
326                 #endregion
327
328                 byte Category (int cp)
329                 {
330                         if (cp < 0x3000 || cjkTable == null)
331                                 return categories [categoryIndexer.ToIndex (cp)];
332                         int idx = cjkIndexer.ToIndex (cp);
333                         ushort cjk = idx < 0 ? (ushort) 0 : cjkTable [idx];
334                         return cjk != 0 ? (byte) ((cjk & 0xFF00) >> 8) :
335                                 categories [categoryIndexer.ToIndex (cp)];
336                 }
337
338                 byte Level1 (int cp)
339                 {
340                         if (cp < 0x3000 || cjkTable == null)
341                                 return level1 [lv1Indexer.ToIndex (cp)];
342                         int idx = cjkIndexer.ToIndex (cp);
343                         ushort cjk = idx < 0 ? (ushort) 0 : cjkTable [idx];
344                         return cjk != 0 ? (byte) (cjk & 0xFF) :
345                                 level1 [lv1Indexer.ToIndex (cp)];
346                 }
347
348                 byte Level2 (int cp, ExtenderType ext)
349                 {
350                         if (ext == ExtenderType.Buggy)
351                                 return 5;
352                         else if (ext == ExtenderType.Conditional)
353                                 return 0;
354
355                         if (cp < 0x3000 || cjkLv2Table == null)
356                                 return level2 [lv2Indexer.ToIndex (cp)];
357                         int idx = cjkLv2Indexer.ToIndex (cp);
358                         byte ret = idx < 0 ? (byte) 0 : cjkLv2Table [idx];
359                         if (ret != 0)
360                                 return ret;
361                         ret = level2 [lv2Indexer.ToIndex (cp)];
362                         if (level2Maps.Length == 0)
363                                 return ret;
364                         for (int i = 0; i < level2Maps.Length; i++) {
365                                 if (level2Maps [i].Source == ret)
366                                         return level2Maps [i].Replace;
367                                 else if (level2Maps [i].Source > ret)
368                                         break;
369                         }
370                         return ret;
371                 }
372
373                 bool IsHalfKana (int cp)
374                 {
375                         return ignoreWidth || Uni.IsHalfWidthKana ((char) cp);
376                 }
377
378                 void SetOptions (CompareOptions options)
379                 {
380                         this.ignoreNonSpace = (options & CompareOptions.IgnoreNonSpace) != 0;
381                         this.ignoreSymbols = (options & CompareOptions.IgnoreSymbols) != 0;
382                         this.ignoreWidth = (options & CompareOptions.IgnoreWidth) != 0;
383                         this.ignoreCase = (options & CompareOptions.IgnoreCase) != 0;
384                         this.ignoreKanaType = (options & CompareOptions.IgnoreKanaType) != 0;
385                         previousChar = previousChar2 = -1;
386                         previousSortKey = previousSortKey2 = null;
387                         escape1.Source = escape2.Source = null;
388                 }
389
390                 Contraction GetContraction (string s, int start, int end)
391                 {
392                         Contraction c = GetContraction (s, start, end, contractions);
393                         if (c != null || lcid == 127)
394                                 return c;
395                         return GetContraction (s, start, end, invariant.contractions);
396                 }
397
398                 Contraction GetContraction (string s, int start, int end, Contraction [] clist)
399                 {
400                         for (int i = 0; i < clist.Length; i++) {
401                                 Contraction ct = clist [i];
402                                 int diff = ct.Source [0] - s [start];
403                                 if (diff > 0)
404                                         return null; // it's already sorted
405                                 else if (diff < 0)
406                                         continue;
407                                 char [] chars = ct.Source;
408                                 if (end - start < chars.Length)
409                                         continue;
410                                 bool match = true;
411                                 for (int n = 0; n < chars.Length; n++)
412                                         if (s [start + n] != chars [n]) {
413                                                 match = false;
414                                                 break;
415                                         }
416                                 if (match)
417                                         return ct;
418                         }
419                         return null;
420                 }
421
422                 Contraction GetTailContraction (string s, int start, int end)
423                 {
424                         Contraction c = GetTailContraction (s, start, end, contractions);
425                         if (c != null || lcid == 127)
426                                 return c;
427                         return GetTailContraction (s, start, end, invariant.contractions);
428                 }
429
430                 Contraction GetTailContraction (string s, int start, int end, Contraction [] clist)
431                 {
432                         for (int i = 0; i < clist.Length; i++) {
433                                 Contraction ct = clist [i];
434                                 int diff = ct.Source [0] - s [end];
435                                 if (diff > 0)
436                                         return null; // it's already sorted
437                                 else if (diff < 0)
438                                         continue;
439                                 char [] chars = ct.Source;
440                                 if (start - end + 1 < chars.Length)
441                                         continue;
442                                 bool match = true;
443                                 int offset = start - chars.Length + 1;
444                                 for (int n = 0; n < chars.Length; n++)
445                                         if (s [offset + n] != chars [n]) {
446                                                 match = false;
447                                                 break;
448                                         }
449                                 if (match)
450                                         return ct;
451                         }
452                         return null;
453                 }
454
455                 Contraction GetContraction (char c)
456                 {
457                         Contraction ct = GetContraction (c, contractions);
458                         if (ct != null || lcid == 127)
459                                 return ct;
460                         return GetContraction (c, invariant.contractions);
461                 }
462
463                 Contraction GetContraction (char c, Contraction [] clist)
464                 {
465                         for (int i = 0; i < clist.Length; i++) {
466                                 Contraction ct = clist [i];
467                                 if (ct.Source [0] > c)
468                                         return null; // it's already sorted
469                                 if (ct.Source [0] == c && ct.Source.Length == 1)
470                                         return ct;
471                         }
472                         return null;
473                 }
474
475                 int FilterOptions (int i)
476                 {
477                         if (ignoreWidth) {
478                                 int x = widthCompat [widthIndexer.ToIndex (i)];
479                                 if (x != 0)
480                                         i = x;
481                         }
482                         if (ignoreCase)
483                                 i = textInfo.ToLower ((char) i);
484                         if (ignoreKanaType)
485                                 i = Uni.ToKanaTypeInsensitive (i);
486                         return i;
487                 }
488
489                 int previousChar = -1;
490                 byte [] previousSortKey = null;
491                 int previousChar2 = -1;
492                 byte [] previousSortKey2 = null;
493
494                 enum ExtenderType {
495                         None,
496                         Simple,
497                         Voiced,
498                         Conditional,
499                         Buggy,
500                 }
501
502                 ExtenderType GetExtenderType (int i)
503                 {
504                         // LAMESPEC: Windows expects true for U+3005, but 
505                         // sometimes it does not represent to repeat just
506                         // one character.
507                         // Windows also expects true for U+3031 and U+3032,
508                         // but they should *never* repeat one character.
509
510                         if (i < 0x3005 || i > 0xFF70)
511                                 return ExtenderType.None;
512                         if (i >= 0xFE7C) {
513                                 switch (i) {
514                                 case 0xFE7C:
515                                 case 0xFE7D:
516                                         return ExtenderType.Simple;
517                                 case 0xFF70:
518                                         return ExtenderType.Conditional;
519                                 case 0xFF9E:
520                                 case 0xFF9F:
521                                         return ExtenderType.Voiced;
522                                 }
523                         }
524                         if (i > 0x30FE)
525                                 return ExtenderType.None;
526                         switch (i) {
527                         case 0x3005: // LAMESPEC: see above.
528                                 return ExtenderType.Buggy;
529                         case 0x3031: // LAMESPEC: see above.
530                         case 0x3032: // LAMESPEC: see above.
531                         case 0x309D:
532                         case 0x30FD:
533                                 return ExtenderType.Simple;
534                         case 0x309E:
535                         case 0x30FE:
536                                 return ExtenderType.Voiced;
537                         case 0x30FC:
538                                 return ExtenderType.Conditional;
539                         default:
540                                 return ExtenderType.None;
541                         }
542                 }
543
544                 byte ToDashTypeValue (ExtenderType ext)
545                 {
546                         if (ignoreNonSpace) // LAMESPEC: huh, why?
547                                 return 3;
548                         switch (ext) {
549                         case ExtenderType.None:
550                                 return 3;
551                         case ExtenderType.Conditional:
552                                 return 5;
553                         default:
554                                 return 4;
555                         }
556                 }
557
558                 int FilterExtender (int i, ExtenderType ext)
559                 {
560                         if (ext == ExtenderType.Conditional &&
561                                 Uni.HasSpecialWeight ((char) i)) {
562                                 bool half = IsHalfKana ((char) i);
563                                 bool katakana = !Uni.IsHiragana ((char) i);
564                                 switch (Level1 (i) & 7) {
565                                 case 2:
566                                         return half ? 0xFF71 : katakana ? 0x30A2 : 0x3042;
567                                 case 3:
568                                         return half ? 0xFF72 : katakana ? 0x30A4 : 0x3044;
569                                 case 4:
570                                         return half ? 0xFF73 : katakana ? 0x30A6 : 0x3046;
571                                 case 5:
572                                         return half ? 0xFF74 : katakana ? 0x30A8 : 0x3048;
573                                 case 6:
574                                         return half ? 0xFF75 : katakana ? 0x30AA : 0x304A;
575                                 }
576                         }
577                         return i;
578                 }
579
580                 bool IsIgnorable (int i)
581                 {
582                         return Uni.IsIgnorable (i) ||
583                                 ignoreSymbols && Uni.IsIgnorableSymbol (i) ||
584                                 ignoreNonSpace && Uni.IsIgnorableNonSpacing (i);
585                 }
586
587
588                 #region GetSortKey()
589
590                 public SortKey GetSortKey (string s)
591                 {
592                         return GetSortKey (s, CompareOptions.None);
593                 }
594
595                 public SortKey GetSortKey (string s, CompareOptions options)
596                 {
597                         return GetSortKey (s, 0, s.Length, options);
598                 }
599
600                 public SortKey GetSortKey (string s, int start, int length, CompareOptions options)
601                 {
602                         SetOptions (options);
603
604                         buf.Initialize (options, lcid, s, frenchSort);
605                         int end = start + length;
606                         GetSortKey (s, start, end);
607                         return buf.GetResultAndReset ();
608                 }
609
610                 void GetSortKey (string s, int start, int end)
611                 {
612                         for (int n = start; n < end; n++) {
613                                 int i = s [n];
614
615                                 ExtenderType ext = GetExtenderType (i);
616                                 if (ext != ExtenderType.None) {
617                                         i = FilterExtender (previousChar, ext);
618                                         if (i >= 0)
619                                                 FillSortKeyRaw (i, ext);
620                                         else if (previousSortKey != null) {
621                                                 byte [] b = previousSortKey;
622                                                 buf.AppendNormal (
623                                                         b [0],
624                                                         b [1],
625                                                         b [2] != 1 ? b [2] : Level2 (i, ext),
626                                                         b [3] != 1 ? b [3] : level3 [lv3Indexer.ToIndex (i)]);
627                                         }
628                                         // otherwise do nothing.
629                                         // (if the extender is the first char
630                                         // in the string, then just ignore.)
631                                         continue;
632                                 }
633
634                                 if (IsIgnorable (i))
635                                         continue;
636                                 i = FilterOptions (i);
637
638                                 Contraction ct = GetContraction (s, n, end);
639                                 if (ct != null) {
640                                         if (ct.Replacement != null) {
641                                                 GetSortKey (ct.Replacement, 0, ct.Replacement.Length);
642                                         } else {
643                                                 byte [] b = ct.SortKey;
644                                                 buf.AppendNormal (
645                                                         b [0],
646                                                         b [1],
647                                                         b [2] != 1 ? b [2] : Level2 (i, ext),
648                                                         b [3] != 1 ? b [3] : level3 [lv3Indexer.ToIndex (i)]);
649                                                 previousSortKey = b;
650                                                 previousChar = -1;
651                                         }
652                                         n += ct.Source.Length - 1;
653                                 }
654                                 else {
655                                         if (!Uni.IsIgnorableNonSpacing (i))
656                                                 previousChar = i;
657                                         FillSortKeyRaw (i, ExtenderType.None);
658                                 }
659                         }
660                 }
661
662                 void FillSortKeyRaw (int i, ExtenderType ext)
663                 {
664                         if (0x3400 <= i && i <= 0x4DB5) {
665                                 int diff = i - 0x3400;
666                                 buf.AppendCJKExtension (
667                                         (byte) (0x10 + diff / 254),
668                                         (byte) (diff % 254 + 2));
669                                 return;
670                         }
671
672                         UnicodeCategory uc = char.GetUnicodeCategory ((char) i);
673                         switch (uc) {
674                         case UnicodeCategory.PrivateUse:
675                                 int diff = i - 0xE000;
676                                 buf.AppendNormal (
677                                         (byte) (0xE5 + diff / 254),
678                                         (byte) (diff % 254 + 2),
679                                         0,
680                                         0);
681                                 return;
682                         case UnicodeCategory.Surrogate:
683                                 FillSurrogateSortKeyRaw (i);
684                                 return;
685                         }
686
687                         byte level2 = Level2 (i, ext);
688                         if (Uni.HasSpecialWeight ((char) i)) {
689                                 byte level1 = Level1 (i);
690                                 buf.AppendKana (
691                                         Category (i),
692                                         level1,
693                                         level2,
694                                         Uni.Level3 (i),
695                                         Uni.IsJapaneseSmallLetter ((char) i),
696                                         ToDashTypeValue (ext),
697                                         !Uni.IsHiragana ((char) i),
698                                         IsHalfKana ((char) i)
699                                         );
700                                 if (!ignoreNonSpace && ext == ExtenderType.Voiced)
701                                         // Append voice weight
702                                         buf.AppendNormal (1, 1, 1, 0);
703                         }
704                         else
705                                 buf.AppendNormal (
706                                         Category (i),
707                                         Level1 (i),
708                                         level2,
709                                         Uni.Level3 (i));
710                 }
711
712                 void FillSurrogateSortKeyRaw (int i)
713                 {
714                         int diffbase = 0;
715                         int segment = 0;
716                         byte lower = 0;
717
718                         if (i < 0xD840) {
719                                 diffbase = 0xD800;
720                                 segment = 0x41;
721                                 lower = (byte) ((i == 0xD800) ? 0x3E : 0x3F);
722                         } else if (0xD840 <= i && i < 0xD880) {
723                                 diffbase = 0xD840;
724                                 segment = 0xF2;
725                                 lower = 0x3E;
726                         } else if (0xDB80 <= i && i < 0xDC00) {
727                                 diffbase = 0xDB80 - 0x40;
728                                 segment = 0xFE;
729                                 lower = 0x3E;
730                         } else {
731                                 diffbase = 0xDC00 - 0xF8 + 2;
732                                 segment = 0x41;
733                                 lower = 0x3F;
734                         }
735                         int diff = i - diffbase;
736
737                         buf.AppendNormal (
738                                 (byte) (segment + diff / 254),
739                                 (byte) (diff % 254 + 2),
740                                 lower,
741                                 lower);
742                 }
743
744                 #endregion
745
746                 #region Compare()
747
748                 public int Compare (string s1, string s2)
749                 {
750                         return Compare (s1, s2, CompareOptions.None);
751                 }
752
753                 public int Compare (string s1, string s2, CompareOptions options)
754                 {
755                         return Compare (s1, 0, s1.Length, s2, 0, s2.Length, options);
756                 }
757
758                 class Escape
759                 {
760                         public string Source;
761                         public int Index;
762                         public int Start;
763                         public int End;
764                 }
765
766                 // Those instances are reused not to invoke instantiation
767                 // during Compare().
768                 Escape escape1 = new Escape ();
769                 Escape escape2 = new Escape ();
770
771                 private int CompareOrdinal (string s1, int idx1, int len1,
772                         string s2, int idx2, int len2)
773                 {
774                         int min = len1 < len2 ? len1 : len2;
775                         int end1 = idx1 + min;
776                         int end2 = idx2 + min;
777                         for (int i1 = idx1, i2 = idx2;
778                                 i1 < end1 && i2 < end2; i1++, i2++)
779                                 if (s1 [i1] != s2 [i2])
780                                         return s1 [i1] - s2 [i2];
781                         return len1 == len2 ? 0 :
782                                 len1 == min ? - 1 : 1;
783                 }
784
785                 public int Compare (string s1, int idx1, int len1,
786                         string s2, int idx2, int len2, CompareOptions options)
787                 {
788                         // quick equality check
789                         if (idx1 == idx2 && len1 == len2 &&
790                                 Object.ReferenceEquals (s1, s2))
791                                 return 0;
792                         // FIXME: this should be done inside Compare() at
793                         // any time.
794 //                      int ord = CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
795 //                      if (ord == 0)
796 //                              return 0;
797                         if (options == CompareOptions.Ordinal)
798                                 return CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
799
800 #if false // stable easy version, depends on GetSortKey().
801                         SortKey sk1 = GetSortKey (s1, idx1, len1, options);
802                         SortKey sk2 = GetSortKey (s2, idx2, len2, options);
803                         byte [] d1 = sk1.KeyData;
804                         byte [] d2 = sk2.KeyData;
805                         int len = d1.Length > d2.Length ? d2.Length : d1.Length;
806                         for (int i = 0; i < len; i++)
807                                 if (d1 [i] != d2 [i])
808                                         return d1 [i] < d2 [i] ? -1 : 1;
809                         return d1.Length == d2.Length ? 0 : d1.Length < d2.Length ? -1 : 1;
810 #else
811                         SetOptions (options);
812                         bool dummy, dummy2;
813                         int ret = CompareInternal (s1, idx1, len1, s2, idx2, len2, (options & CompareOptions.StringSort) != 0, out dummy, out dummy2, true);
814                         return ret == 0 ? 0 : ret < 0 ? -1 : 1;
815 #endif
816                 }
817
818                 int CompareInternal (string s1, int idx1, int len1, string s2,
819                         int idx2, int len2, bool stringSort,
820                         out bool targetConsumed, out bool sourceConsumed,
821                         bool skipHeadingExtenders)
822                 {
823                         int start1 = idx1;
824                         int start2 = idx2;
825                         int end1 = idx1 + len1;
826                         int end2 = idx2 + len2;
827                         targetConsumed = false;
828                         sourceConsumed = false;
829
830                         // It holds final result that comes from the comparison
831                         // at level 2 or lower. Even if Compare() found the
832                         // difference at level 2 or lower, it still has to
833                         // continue level 1 comparison. FinalResult is used
834                         // when there was no further differences.
835                         int finalResult = 0;
836                         // It holds the comparison level to do. It starts from
837                         // 5, and becomes 1 when we do primary-only comparison.
838                         int currentLevel = 5;
839
840                         int lv5At1 = -1;
841                         int lv5At2 = -1;
842                         int lv5Value1 = 0;
843                         int lv5Value2 = 0;
844
845                         // Skip heading extenders
846                         if (skipHeadingExtenders) {
847                                 for (; idx1 < end1; idx1++)
848                                         if (GetExtenderType (s1 [idx1]) == ExtenderType.None)
849                                                 break;
850                                 for (; idx2 < end2; idx2++)
851                                         if (GetExtenderType (s2 [idx2]) == ExtenderType.None)
852                                                 break;
853                         }
854
855                         ExtenderType ext1 = ExtenderType.None;
856                         ExtenderType ext2 = ExtenderType.None;
857
858                         while (true) {
859                                 for (; idx1 < end1; idx1++)
860                                         if (!IsIgnorable (s1 [idx1]))
861                                                 break;
862                                 for (; idx2 < end2; idx2++)
863                                         if (!IsIgnorable (s2 [idx2]))
864                                                 break;
865
866                                 if (idx1 >= end1) {
867                                         if (escape1.Source == null)
868                                                 break;
869                                         s1 = escape1.Source;
870                                         start1 = escape1.Start;
871                                         idx1 = escape1.Index;
872                                         end1 = escape1.End;
873                                         escape1.Source = null;
874                                         continue;
875                                 }
876                                 if (idx2 >= end2) {
877                                         if (escape2.Source == null)
878                                                 break;
879                                         s2 = escape2.Source;
880                                         start2 = escape2.Start;
881                                         idx2 = escape2.Index;
882                                         end2 = escape2.End;
883                                         escape2.Source = null;
884                                         continue;
885                                 }
886 #if false
887 // FIXME: optimization could be done here.
888
889                                 if (s1 [idx1] == s2 [idx2]) {
890                                         idx1++;
891                                         idx2++;
892                                         continue;
893                                 }
894 //                              while (idx1 >= start1 && !IsSafe ((int) s [idx1]))
895 //                                      idx1--;
896 //                              while (idx2 >= start2 && !IsSafe ((int) s [idx2]))
897 //                                      idx2--;
898 #endif
899
900                                 int cur1 = idx1;
901                                 int cur2 = idx2;
902                                 byte [] sk1 = null;
903                                 byte [] sk2 = null;
904                                 int i1 = FilterOptions (s1 [idx1]);
905                                 int i2 = FilterOptions (s2 [idx2]);
906                                 bool special1 = false;
907                                 bool special2 = false;
908
909                                 // If current character is an expander, then
910                                 // repeat the previous character.
911                                 ext1 = GetExtenderType (i1);
912                                 if (ext1 != ExtenderType.None) {
913                                         if (previousChar < 0) {
914                                                 if (previousSortKey == null) {
915                                                         // nothing to extend
916                                                         idx1++;
917                                                         continue;
918                                                 }
919                                                 sk1 = previousSortKey;
920                                         }
921                                         else
922                                                 i1 = FilterExtender (previousChar, ext1);
923                                 }
924                                 ext2 = GetExtenderType (i2);
925                                 if (ext2 != ExtenderType.None) {
926                                         if (previousChar2 < 0) {
927                                                 if (previousSortKey2 == null) {
928                                                         // nothing to extend
929                                                         idx2++;
930                                                         continue;
931                                                 }
932                                                 sk2 = previousSortKey2;
933                                         }
934                                         else
935                                                 i2 = FilterExtender (previousChar2, ext2);
936                                 }
937
938                                 byte cat1 = Category (i1);
939                                 byte cat2 = Category (i2);
940
941                                 // Handle special weight characters
942                                 if (!stringSort && currentLevel > 4) {
943                                         if (cat1 == 6) {
944                                                 lv5At1 = escape1.Source != null ?
945                                                         escape1.Index - escape1.Start :
946                                                         cur1 - start1;
947                                                 // here Windows has a bug that it does
948                                                 // not consider thirtiary weight.
949                                                 lv5Value1 = Level1 (i1) << 8 + Uni.Level3 (i1);
950                                                 previousChar = i1;
951                                                 idx1++;
952                                         }
953                                         if (cat2 == 6) {
954                                                 lv5At2 = escape2.Source != null ?
955                                                         escape2.Index - escape2.Start :
956                                                         cur2 - start2;
957                                                 // here Windows has a bug that it does
958                                                 // not consider thirtiary weight.
959                                                 lv5Value2 = Level1 (i2) << 8 + Uni.Level3 (i2);
960                                                 previousChar2 = i2;
961                                                 idx2++;
962                                         }
963                                         if (cat1 == 6 || cat2 == 6) {
964                                                 currentLevel = 4;
965                                                 continue;
966                                         }
967                                 }
968
969                                 Contraction ct1 = null;
970                                 if (ext1 == ExtenderType.None)
971                                         ct1 = GetContraction (s1, idx1, end1);
972
973                                 int offset1 = 1;
974                                 if (sk1 != null)
975                                         offset1 = 1;
976                                 else if (ct1 != null) {
977                                         offset1 = ct1.Source.Length;
978                                         if (ct1.SortKey != null) {
979                                                 sk1 = charSortKey;
980                                                 for (int i = 0; i < ct1.SortKey.Length; i++)
981                                                         sk1 [i] = ct1.SortKey [i];
982                                                 previousChar = -1;
983                                                 previousSortKey = sk1;
984                                         }
985                                         else if (escape1.Source == null) {
986                                                 escape1.Source = s1;
987                                                 escape1.Start = start1;
988                                                 escape1.Index = cur1 + ct1.Source.Length;
989                                                 escape1.End = end1;
990                                                 s1 = ct1.Replacement;
991                                                 idx1 = 0;
992                                                 start1 = 0;
993                                                 end1 = s1.Length;
994                                                 continue;
995                                         }
996                                 }
997                                 else {
998                                         sk1 = charSortKey;
999                                         sk1 [0] = cat1;
1000                                         sk1 [1] = Level1 (i1);
1001                                         if (!ignoreNonSpace && currentLevel > 1)
1002                                                 sk1 [2] = Level2 (i1, ext1);
1003                                         if (currentLevel > 2)
1004                                                 sk1 [3] = Uni.Level3 (i1);
1005                                         if (currentLevel > 3)
1006                                                 special1 = Uni.HasSpecialWeight ((char) i1);
1007                                         if (cat1 > 1)
1008                                                 previousChar = i1;
1009                                 }
1010
1011                                 Contraction ct2 = null;
1012                                 if (ext2 == ExtenderType.None)
1013                                         ct2 = GetContraction (s2, idx2, end2);
1014
1015                                 if (sk2 != null)
1016                                         idx2++;
1017                                 else if (ct2 != null) {
1018                                         idx2 += ct2.Source.Length;
1019                                         if (ct2.SortKey != null) {
1020                                                 sk2 = charSortKey2;
1021                                                 for (int i = 0; i < ct2.SortKey.Length; i++)
1022                                                         sk2 [i] = ct2.SortKey [i];
1023                                                 previousChar2 = -1;
1024                                                 previousSortKey2 = sk2;
1025                                         }
1026                                         else if (escape2.Source == null) {
1027                                                 escape2.Source = s2;
1028                                                 escape2.Start = start2;
1029                                                 escape2.Index = cur2 + ct2.Source.Length;
1030                                                 escape2.End = end2;
1031                                                 s2 = ct2.Replacement;
1032                                                 idx2 = 0;
1033                                                 start2 = 0;
1034                                                 end2 = s2.Length;
1035                                                 continue;
1036                                         }
1037                                 }
1038                                 else {
1039                                         sk2 = charSortKey2;
1040                                         sk2 [0] = cat2;
1041                                         sk2 [1] = Level1 (i2);
1042                                         if (!ignoreNonSpace && currentLevel > 1)
1043                                                 sk2 [2] = Level2 (i2, ext2);
1044                                         if (currentLevel > 2)
1045                                                 sk2 [3] = Uni.Level3 (i2);
1046                                         if (currentLevel > 3)
1047                                                 special2 = Uni.HasSpecialWeight ((char) i2);
1048                                         if (cat2 > 1)
1049                                                 previousChar2 = i2;
1050                                         idx2++;
1051                                 }
1052
1053                                 // Add offset here so that it does not skip
1054                                 // idx1 while s2 has a replacement.
1055                                 idx1 += offset1;
1056
1057                                 // add diacritical marks in s1 here
1058                                 if (!ignoreNonSpace) {
1059                                         while (idx1 < end1) {
1060                                                 if (Category (s1 [idx1]) != 1)
1061                                                         break;
1062                                                 if (sk1 [2] == 0)
1063                                                         sk1 [2] = 2;
1064                                                 sk1 [2] = (byte) (sk1 [2] + 
1065                                                         Level2 (s1 [idx1], ExtenderType.None));
1066                                                 idx1++;
1067                                         }
1068
1069                                         // add diacritical marks in s2 here
1070                                         while (idx2 < end2) {
1071                                                 if (Category (s2 [idx2]) != 1)
1072                                                         break;
1073                                                 if (sk2 [2] == 0)
1074                                                         sk2 [2] = 2;
1075                                                 sk2 [2] = (byte) (sk2 [2] + 
1076                                                         Level2 (s2 [idx2], ExtenderType.None));
1077                                                 idx2++;
1078                                         }
1079                                 }
1080
1081                                 int ret = sk1 [0] - sk2 [0];
1082                                 ret = ret != 0 ? ret : sk1 [1] - sk2 [1];
1083                                 if (ret != 0)
1084                                         return ret;
1085                                 if (currentLevel == 1)
1086                                         continue;
1087                                 if (!ignoreNonSpace) {
1088                                         ret = sk1 [2] - sk2 [2];
1089                                         if (ret != 0) {
1090                                                 finalResult = ret;
1091                                                 currentLevel = frenchSort ? 2 : 1;
1092                                                 continue;
1093                                         }
1094                                 }
1095                                 if (currentLevel == 2)
1096                                         continue;
1097                                 ret = sk1 [3] - sk2 [3];
1098                                 if (ret != 0) {
1099                                         finalResult = ret;
1100                                         currentLevel = 2;
1101                                         continue;
1102                                 }
1103                                 if (currentLevel == 3)
1104                                         continue;
1105                                 if (special1 != special2) {
1106                                         finalResult = special1 ? 1 : -1;
1107                                         currentLevel = 3;
1108                                         continue;
1109                                 }
1110                                 if (special1) {
1111                                         ret = CompareFlagPair (
1112                                                 !Uni.IsJapaneseSmallLetter ((char) i1),
1113                                                 !Uni.IsJapaneseSmallLetter ((char) i2));
1114                                         ret = ret != 0 ? ret :
1115                                                 ToDashTypeValue (ext1) -
1116                                                 ToDashTypeValue (ext2);
1117                                         ret = ret != 0 ? ret : CompareFlagPair (
1118                                                 Uni.IsHiragana ((char) i1),
1119                                                 Uni.IsHiragana ((char) i2));
1120                                         ret = ret != 0 ? ret : CompareFlagPair (
1121                                                 !IsHalfKana ((char) i1),
1122                                                 !IsHalfKana ((char) i2));
1123                                         if (ret != 0) {
1124                                                 finalResult = ret;
1125                                                 currentLevel = 3;
1126                                                 continue;
1127                                         }
1128                                 }
1129                         }
1130
1131                         // If there were only level 3 or lower differences,
1132                         // then we still have to find diacritical differences
1133                         // if any.
1134                         if (!ignoreNonSpace &&
1135                                 finalResult != 0 && currentLevel > 2) {
1136                                 while (idx1 < end1 && idx2 < end2) {
1137                                         if (!Uni.IsIgnorableNonSpacing (s1 [idx1]))
1138                                                 break;
1139                                         if (!Uni.IsIgnorableNonSpacing (s2 [idx2]))
1140                                                 break;
1141                                         finalResult = Level2 (FilterOptions ((s1 [idx1])), ext1) - Level2 (FilterOptions (s2 [idx2]), ext2);
1142                                         if (finalResult != 0)
1143                                                 break;
1144                                         idx1++;
1145                                         idx2++;
1146                                         // they should work only for the first character
1147                                         ext1 = ExtenderType.None;
1148                                         ext2 = ExtenderType.None;
1149                                 }
1150                         }
1151                         // we still have to handle level 5
1152                         if (finalResult == 0) {
1153                                 finalResult = lv5At1 - lv5At2;
1154                                 if (finalResult == 0)
1155                                         finalResult = lv5Value1 - lv5Value2;
1156                         }
1157                         if (finalResult == 0) {
1158                                 if (idx2 == end2)
1159                                         targetConsumed = true;
1160                                 if (idx1 == end1)
1161                                         sourceConsumed = true;
1162                         }
1163                         return idx1 != end1 ? 1 : idx2 == end2 ? finalResult : -1;
1164                 }
1165
1166                 #endregion
1167
1168                 #region IsPrefix() and IsSuffix()
1169
1170                 public bool IsPrefix (string src, string target, CompareOptions opt)
1171                 {
1172                         return IsPrefix (src, target, 0, src.Length, opt);
1173                 }
1174
1175                 public bool IsPrefix (string s, string target, int start, int length, CompareOptions opt)
1176                 {
1177                         SetOptions (opt);
1178                         return IsPrefix (s, target, start, length, 
1179                                 (opt & CompareOptions.StringSort) != 0, true);
1180                 }
1181
1182                 public bool IsPrefix (string s, string target, int start, int length, bool stringSort, bool skipHeadingExtenders)
1183                 {
1184                         bool consumed, dummy;
1185                         int ret = CompareInternal (s, start, length,
1186                                 target, 0, target.Length, stringSort,
1187                                 out consumed, out dummy, skipHeadingExtenders);
1188                         return consumed;
1189                 }
1190
1191                 // IsSuffix()
1192
1193                 public bool IsSuffix (string src, string target, CompareOptions opt)
1194                 {
1195                         return IsSuffix (src, target, src.Length - 1, src.Length, opt);
1196                 }
1197
1198                 public bool IsSuffix (string s, string target, int start, int length, CompareOptions opt)
1199                 {
1200                         // quick check : simple codepoint comparison
1201                         if (s.Length >= target.Length) {
1202                                 int si = start;
1203                                 int se = start - length;
1204                                 for (int i = target.Length - 1; si >= se && i >= 0; i--, si--)
1205                                         if (s [si] != target [i])
1206                                                 break;
1207                                 if (si == start + target.Length)
1208                                         return true;
1209                         }
1210
1211                         SetOptions (opt);
1212                         return IsSuffix (s, target, start, length,
1213                                 (opt & CompareOptions.StringSort) != 0);
1214                 }
1215
1216                 bool New_IsSuffix (string s, string t, int start, int length,
1217                         bool stringSort)
1218                 {
1219                         int tstart = 0;
1220                         for (;tstart < t.Length; tstart++)
1221                                 if (!IsIgnorable (t [tstart]))
1222                                         break;
1223                         if (tstart == t.Length)
1224                                 return true; // as if target is String.Empty.
1225
1226                         // FIXME: could be more efficient.
1227                         bool consumed, dummy;
1228                         for (int i = 0; i < length; i++) {
1229                                 int ret = CompareInternal (s, start - i, i + 1,
1230                                         t, tstart, t.Length, stringSort,
1231                                         out dummy, out consumed, true);
1232                                 if (ret == 0)
1233                                         return true;
1234                         }
1235                         return false;
1236                 }
1237
1238                 // FIXME: This code has a fatal problem that it cannot handle
1239                 // extenders :(
1240                 bool IsSuffix (string s, string t, int start, int length,
1241                         bool stringSort)
1242                 {
1243                         int si = start;
1244                         int ti = t.Length - 1;
1245                         int end = start - length + 1;
1246                         int tend = 0;
1247
1248                         while (true) {
1249                                 if (ti < tend) {
1250                                         if (escape2.Source == null)
1251                                                 break;
1252                                         t = escape2.Source;
1253                                         ti = escape2.Index;
1254                                         tend = escape2.End;
1255                                         escape2.Source = null;
1256                                         continue;
1257                                 }
1258                                 if (IsIgnorable (t [ti])) {
1259                                         ti--;
1260                                         continue;
1261                                 }
1262                                 if (si < end) {
1263                                         if (escape1.Source == null)
1264                                                 break;
1265                                         s = escape1.Source;
1266                                         si = escape1.Index;
1267                                         end = escape1.End;
1268                                         escape1.Source = null;
1269                                         continue;
1270                                 }
1271                                 if (IsIgnorable (s [si])) {
1272                                         si--;
1273                                         continue;
1274                                 }
1275
1276                                 ExtenderType ext1 = GetExtenderType (s [si]);
1277                                 ExtenderType ext2 = GetExtenderType (t [ti]);
1278                                 int vt = -1;
1279                                 byte [] sk = null;
1280
1281                                 // Check contraction for target.
1282                                 Contraction ctt = null;
1283                                 if (ext2 != ExtenderType.None) {
1284                                         if (previousChar2 < 0)
1285                                                 sk = previousSortKey2;
1286                                         else
1287                                                 vt = FilterExtender (previousChar2, ext2);
1288                                 }
1289                                 else if (escape2.Source == null)
1290                                         ctt = GetTailContraction (t, ti, 0);
1291                                 if (ctt != null) {
1292                                         if (ctt.SortKey != null) {
1293                                                 ti -= ctt.Source.Length;
1294                                                 for (int i = 0; i < ctt.SortKey.Length; i++)
1295                                                         charSortKey2 [i] = ctt.SortKey [i];
1296                                                 previousChar2 = -1;
1297                                                 previousSortKey2 = charSortKey2;
1298                                         } else {
1299                                                 escape2.Source = t;
1300                                                 escape2.Index = ti - ctt.Source.Length;
1301                                                 escape2.End = tend;
1302                                                 t = ctt.Replacement;
1303                                                 ti = ctt.Replacement.Length - 1;
1304                                                 tend = 0;
1305                                                 continue;
1306                                         }
1307                                 } else if (sk == null) {
1308                                         if (vt < 0)
1309                                                 vt = FilterOptions (t [ti]);
1310                                         sk = charSortKey2;
1311                                         sk [0] = Category (vt);
1312                                         sk [1] = Level1 (vt);
1313                                         if (!ignoreNonSpace)
1314                                                 // FIXME: pass extender type
1315                                                 sk [2] = Level2 (vt, ext2);
1316                                         sk [3] = Uni.Level3 (vt);
1317                                         if (sk [1] != 1)
1318                                                 previousChar2 = vt;
1319                                         ti--;
1320                                 }
1321                                 if (!MatchesBackward (ref s, ref si, ref end, vt, sk, !Uni.HasSpecialWeight ((char) vt)))
1322                                         return false;
1323                         }
1324                         if (si < end) {
1325                                 // All codepoints in the compared range
1326                                 // matches. In that case, what matters 
1327                                 // is whether the remaining part of 
1328                                 // "target" is ignorable or not.
1329                                 while (ti >= 0)
1330                                         if (!IsIgnorable (t [ti--]))
1331                                                 return false;
1332                                 return true;
1333                         }
1334                         return true;
1335                 }
1336
1337                 // WARNING: Don't invoke it outside IsSuffix().
1338                 // returns consumed source length (mostly 1, source length in case of contraction)
1339                 bool MatchesBackward (ref string s, ref int idx, ref int end, int it, byte [] sortkey, bool noLv4)
1340                 {
1341                         Contraction ct = null;
1342                         // If there is already expansion, then it should not
1343                         // process further expansions.
1344                         if (escape1.Source == null)
1345                                 ct = GetTailContraction (s, idx, end);
1346                         if (ct != null) {
1347                                 if (ct.SortKey != null) {
1348                                         if (!noLv4)
1349                                                 return false;
1350                                         for (int i = 0; i < ct.SortKey.Length; i++)
1351                                                 if (sortkey [i] != ct.SortKey [i])
1352                                                         return false;
1353                                         idx -= ct.Source.Length;
1354                                         return true;
1355                                 } else {
1356                                         escape1.Source = s;
1357                                         escape1.Index = idx - ct.Source.Length;
1358                                         escape1.End = end;
1359                                         s = ct.Replacement;
1360                                         idx = s.Length - 1;
1361                                         end = 0;
1362                                         return MatchesBackward (ref s, ref idx, ref end, it, sortkey, noLv4);
1363                                 }
1364                         } else {
1365                                 // primitive comparison
1366                                 if (!MatchesPrimitive (s [idx], it, sortkey))
1367                                         return false;
1368                                 idx--;
1369                                 return true;
1370                         }
1371                 }
1372
1373                 // Common use methods 
1374
1375                 // returns comparison result.
1376                 private bool MatchesPrimitive (char src, int ct, byte [] sortkey)
1377                 {
1378                         // char-by-char comparison.
1379                         int cs = FilterOptions (src);
1380                         if (cs == ct)
1381                                 return true;
1382                         // lv.1 to 3
1383                         if (Category (cs) != sortkey [0] ||
1384                                 Level1 (cs) != sortkey [1] ||
1385                                 !ignoreNonSpace && Level2 (cs, ExtenderType.None) != sortkey [2] ||
1386                                 Uni.Level3 (cs) != sortkey [3])
1387                                 return false;
1388                         // lv.4 (only when required). No need to check cj coz
1389                         // there is no pair of characters that has the same
1390                         // primary level and differs here.
1391                         if (!Uni.HasSpecialWeight (src))
1392                                 return true;
1393                         char target = (char) ct;
1394                         return Uni.IsJapaneseSmallLetter (src) !=
1395                                 Uni.IsJapaneseSmallLetter (target) ||
1396                                 Uni.GetJapaneseDashType (src) !=
1397                                 Uni.GetJapaneseDashType (target) ||
1398                                 Uni.IsHiragana (src) !=
1399                                 Uni.IsHiragana (target) ||
1400                                 IsHalfKana (src) != IsHalfKana (target);
1401                 }
1402
1403                 int CompareFlagPair (bool b1, bool b2)
1404                 {
1405                         return b1 == b2 ? 0 : b1 ? 1 : -1;
1406                 }
1407
1408                 #endregion
1409
1410                 #region IndexOf() / LastIndexOf()
1411
1412                 // IndexOf (string, string, CompareOptions)
1413                 // IndexOf (string, string, int, int, CompareOptions)
1414                 // IndexOf (string, char, int, int, CompareOptions)
1415                 // IndexOfPrimitiveChar (string, int, int, char)
1416                 // IndexOfSortKey (string, int, int, byte[], char, int, bool)
1417                 // IndexOf (string, string, int, int)
1418
1419                 public int IndexOf (string s, string target, CompareOptions opt)
1420                 {
1421                         return IndexOf (s, target, 0, s.Length, opt);
1422                 }
1423
1424                 public int IndexOf (string s, string target, int start, int length, CompareOptions opt)
1425                 {
1426                         SetOptions (opt);
1427                         return IndexOf (s, target, start, length,
1428                                 (opt & CompareOptions.StringSort) != 0);
1429                 }
1430
1431                 public int IndexOf (string s, char target, CompareOptions opt)
1432                 {
1433                         return IndexOf (s, target, 0, s.Length, opt);
1434                 }
1435
1436                 public int IndexOf (string s, char target, int start, int length, CompareOptions opt)
1437                 {
1438                         SetOptions (opt);
1439
1440                         // If target is contraction, then use string search.
1441                         Contraction ct = GetContraction (target);
1442                         if (ct != null) {
1443                                 if (ct.Replacement != null)
1444                                         return IndexOf (s, ct.Replacement, start, length,
1445                                                 (opt & CompareOptions.StringSort) != 0);
1446                                 else
1447                                         return IndexOfSortKey (s, start, length, ct.SortKey, char.MinValue, -1, true);
1448                         }
1449                         else
1450                                 return IndexOfPrimitiveChar (s, start, length, target);
1451                 }
1452
1453                 // Searches target char w/o checking contractions
1454                 int IndexOfPrimitiveChar (string s, int start, int length, char target)
1455                 {
1456                         int ti = FilterOptions ((int) target);
1457                         charSortKeyIndexTarget [0] = Category (ti);
1458                         charSortKeyIndexTarget [1] = Level1 (ti);
1459                         if (!ignoreNonSpace)
1460                                 charSortKeyIndexTarget [2] = Level2 (ti, ExtenderType.None);
1461                         charSortKeyIndexTarget [3] = Uni.Level3 (ti);
1462                         return IndexOfSortKey (s, start, length, charSortKeyIndexTarget, target, ti, !Uni.HasSpecialWeight ((char) ti));
1463                 }
1464
1465                 // Searches target byte[] keydata
1466                 int IndexOfSortKey (string s, int start, int length, byte [] sortkey, char target, int ti, bool noLv4)
1467                 {
1468                         int end = start + length;
1469                         int idx = start;
1470                         while (idx < end) {
1471                                 int cur = idx;
1472                                 if (MatchesForward (s, ref idx, end, ti, sortkey, noLv4))
1473                                         return cur;
1474                         }
1475                         return -1;
1476                 }
1477
1478                 // Searches string. Search head character (or keydata when
1479                 // the head is contraction sortkey) and try IsPrefix().
1480                 int IndexOf (string s, string target, int start, int length, bool stringSort)
1481                 {
1482                         int tidx = 0;
1483                         for (; tidx < target.Length; tidx++)
1484                                 if (!IsIgnorable (target [tidx]))
1485                                         break;
1486                         if (tidx == target.Length)
1487                                 return start;
1488                         Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1489                         string replace = ct != null ? ct.Replacement : null;
1490                         byte [] sk = replace == null ? charSortKeyIndexTarget : null;
1491                         bool noLv4 = true;
1492                         char tc = char.MinValue;
1493                         int ti = -1;
1494                         if (ct != null && sk != null) {
1495                                 for (int i = 0; i < ct.SortKey.Length; i++)
1496                                         sk [i] = ct.SortKey [i];
1497                         } else if (sk != null) {
1498                                 tc = target [tidx];
1499                                 ti = FilterOptions (target [tidx]);
1500                                 sk [0] = Category (ti);
1501                                 sk [1] = Level1 (ti);
1502                                 if (!ignoreNonSpace)
1503                                         sk [2] = Level2 (ti, ExtenderType.None);
1504                                 sk [3] = Uni.Level3 (ti);
1505                                 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1506                         }
1507                         if (sk != null) {
1508                                 for (tidx++; tidx < target.Length; tidx++) {
1509                                         if (Category (target [tidx]) != 1)
1510                                                 break;
1511                                         if (sk [2] == 0)
1512                                                 sk [2] = 2;
1513                                         sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1514                                 }
1515                         }
1516
1517                         do {
1518                                 int idx = 0;
1519                                 if (replace != null)
1520                                         idx = IndexOf (s, replace, start, length, stringSort);
1521                                 else
1522                                         idx = IndexOfSortKey (s, start, length, sk, tc, ti, noLv4);
1523                                 if (idx < 0)
1524                                         return -1;
1525                                 length -= idx - start;
1526                                 start = idx;
1527                                 if (IsPrefix (s, target, start, length, stringSort, false))
1528                                         return idx;
1529                                 Contraction cts = GetContraction (s, start, length);
1530                                 if (cts != null) {
1531                                         start += cts.Source.Length;
1532                                         length -= cts.Source.Length;
1533                                 } else {
1534                                         start++;
1535                                         length--;
1536                                 }
1537                         } while (length > 0);
1538                         return -1;
1539                 }
1540
1541                 //
1542                 // There are the same number of IndexOf() related methods,
1543                 // with the same functionalities for each.
1544                 //
1545
1546                 public int LastIndexOf (string s, string target, CompareOptions opt)
1547                 {
1548                         return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1549                 }
1550
1551                 public int LastIndexOf (string s, string target, int start, int length, CompareOptions opt)
1552                 {
1553                         SetOptions (opt);
1554                         return LastIndexOf (s, target, start, length,
1555                                 (opt & CompareOptions.StringSort) != 0);
1556                 }
1557
1558                 public int LastIndexOf (string s, char target, CompareOptions opt)
1559                 {
1560                         return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1561                 }
1562
1563                 public int LastIndexOf (string s, char target, int start, int length, CompareOptions opt)
1564                 {
1565                         SetOptions (opt);
1566
1567                         // If target is contraction, then use string search.
1568                         Contraction ct = GetContraction (target);
1569                         if (ct != null) {
1570                                 if (ct.Replacement != null)
1571                                         return LastIndexOf (s, ct.Replacement, start, length, (opt & CompareOptions.StringSort) != 0);
1572                                 else
1573                                         return LastIndexOfSortKey (s, start, length, ct.SortKey, char.MinValue, -1, true);
1574                         }
1575                         else
1576                                 return LastIndexOfPrimitiveChar (s, start, length, target);
1577                 }
1578
1579                 // Searches target char w/o checking contractions
1580                 int LastIndexOfPrimitiveChar (string s, int start, int length, char target)
1581                 {
1582                         int ti = FilterOptions ((int) target);
1583                         charSortKey [0] = Category (ti);
1584                         charSortKey [1] = Level1 (ti);
1585                         if (!ignoreNonSpace)
1586                                 // FIXME: pass ExtenderType
1587                                 charSortKey [2] = Level2 (ti, ExtenderType.None);
1588                         charSortKey [3] = Uni.Level3 (ti);
1589                         return LastIndexOfSortKey (s, start, length, charSortKey, target, ti, !Uni.HasSpecialWeight ((char) ti));
1590                 }
1591
1592                 // Searches target byte[] keydata
1593                 int LastIndexOfSortKey (string s, int start, int length, byte [] sortkey, char target, int ti, bool noLv4)
1594                 {
1595                         int end = start - length;
1596
1597                         for (int idx = start; idx > end; idx--) {
1598                                 int cur = idx;
1599                                 if (Matches (s, ref idx, end, ti, target, sortkey, noLv4, true))
1600                                         return cur;
1601                         }
1602                         return -1;
1603                 }
1604
1605                 // Searches string. Search head character (or keydata when
1606                 // the head is contraction sortkey) and try IsPrefix().
1607                 int LastIndexOf (string s, string target, int start, int length, bool stringSort)
1608                 {
1609                         int orgStart = start;
1610                         int tidx = 0;
1611                         for (; tidx < target.Length; tidx++)
1612                                 if (!IsIgnorable (target [tidx]))
1613                                         break;
1614                         if (tidx == target.Length)
1615                                 return start;
1616                         Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1617                         byte [] sortkey = ct != null ? ct.SortKey : null;
1618                         string replace = ct != null ? ct.Replacement : null;
1619
1620                         do {
1621                                 int idx = 0;
1622                                 if (sortkey != null)
1623                                         idx = LastIndexOfSortKey (s, start, length, ct.SortKey, char.MinValue, -1, true);
1624                                 else if (replace != null)
1625                                         idx = LastIndexOf (s, replace, start, length, stringSort);
1626                                 else
1627                                         idx = LastIndexOfPrimitiveChar (s, start, length, target [tidx]);
1628
1629                                 if (idx < 0)
1630                                         return -1;
1631                                 if (IsPrefix (s, target, idx, orgStart - idx + 1, stringSort, false)) {
1632                                         for (;idx < orgStart; idx++)
1633                                                 if (!IsIgnorable (s [idx]))
1634                                                         break;
1635                                         return idx;
1636                                 }
1637                                 Contraction cts = GetContraction (s, idx, orgStart - idx + 1);
1638                                 if (cts != null) {
1639                                         start -= cts.Source.Length;
1640                                         length -= cts.Source.Length;
1641                                 } else {
1642                                         start--;
1643                                         length--;
1644                                 }
1645                         } while (length > 0);
1646                         return -1;
1647                 }
1648
1649                 private bool Matches (string s, ref int idx, int end, int ti, char target, byte [] sortkey, bool noLv4, bool lastIndexOf)
1650                 {
1651                         switch (char.GetUnicodeCategory (s [idx])) {
1652                         case UnicodeCategory.PrivateUse:
1653                         case UnicodeCategory.Surrogate:
1654                                 if (s [idx] != target)
1655                                         return false;
1656                                 return true;
1657                         }
1658
1659                         char sc = char.MinValue;
1660                         Contraction ct = GetContraction (s, idx, end);
1661                         // if lv4 exists, it never matches contraction
1662                         if (ct != null && noLv4) {
1663                                 if (lastIndexOf)
1664                                         idx -= ct.Source.Length - 1;
1665                                 else
1666                                         idx += ct.Source.Length - 1;
1667                                 if (ct.SortKey != null) {
1668                                         for (int i = 0; i < sortkey.Length; i++)
1669                                                 if (ct.SortKey [i] != sortkey [i])
1670                                                         return false;
1671                                         return true;
1672                                 }
1673                                 // Here is the core of LAMESPEC
1674                                 // described at the top of the source.
1675                                 sc = ct.Replacement [0];
1676                         }
1677                         else
1678                                 sc = s [idx];
1679
1680                         if (sc == target)
1681                                 return true;
1682                         int si = FilterOptions ((int) sc);
1683                         if (Category (si) != sortkey [0] ||
1684                                 Level1 (si) != sortkey [1] ||
1685                                 // FIXME: pass ExtenderType
1686                                 !ignoreNonSpace && Level2 (si, ExtenderType.None) != sortkey [2] ||
1687                                 Uni.Level3 (si) != sortkey [3])
1688                                 return false;
1689                         if (noLv4 && !Uni.HasSpecialWeight ((char) si))
1690                                 return true;
1691                         else if (noLv4)
1692                                 return false;
1693                         if (Uni.IsJapaneseSmallLetter ((char) si) !=
1694                                 Uni.IsJapaneseSmallLetter ((char) ti) ||
1695                                 Uni.GetJapaneseDashType ((char) si) !=
1696                                 Uni.GetJapaneseDashType ((char) ti) ||
1697                                 !Uni.IsHiragana ((char) si) !=
1698                                 !Uni.IsHiragana ((char) ti) ||
1699                                 IsHalfKana ((char) si) !=
1700                                 IsHalfKana ((char) ti))
1701                                 return false;
1702                         return true;
1703                 }
1704
1705                 private bool MatchesForward (string s, ref int idx, int end, int ti, byte [] sortkey, bool noLv4)
1706                 {
1707                         int si = -1;
1708                         ExtenderType ext = GetExtenderType (s [idx]);
1709                         Contraction ct = null;
1710                         if (ext == ExtenderType.None)
1711                                 ct = GetContraction (s, idx, end);
1712                         else if (previousChar < 0)
1713                                 charSortKey = previousSortKey;
1714                         else
1715                                 si = FilterExtender (previousChar, ext);
1716                         // if lv4 exists, it never matches contraction
1717                         if (ct != null) {
1718                                 idx += ct.Source.Length;
1719                                 if (!noLv4)
1720                                         return false;
1721                                 if (ct.SortKey != null) {
1722                                         for (int i = 0; i < sortkey.Length; i++)
1723                                                 charSortKey [i] = sortkey [i];
1724                                         previousChar = -1;
1725                                         previousSortKey = charSortKey;
1726                                 } else {
1727                                         // Here is the core of LAMESPEC
1728                                         // described at the top of the source.
1729                                         int dummy = 0;
1730                                         return MatchesForward (ct.Replacement, ref dummy,
1731                                                 ct.Replacement.Length, ti, sortkey, noLv4);
1732                                 }
1733                         } else {
1734                                 if (si < 0)
1735                                         si = FilterOptions (s [idx]);
1736                                 charSortKey [0] = Category (si);
1737                                 charSortKey [1] = Level1 (si);
1738                                 if (!ignoreNonSpace)
1739                                         charSortKey [2] = Level2 (si, ext);
1740                                 charSortKey [3] = Uni.Level3 (si);
1741                                 if (charSortKey [0] != 1)
1742                                         previousChar = si;
1743                                 idx++;
1744                         }
1745                         for (; idx < end; idx++) {
1746                                 if (Category (s [idx]) != 1)
1747                                         break;
1748                                 if (ignoreNonSpace)
1749                                         continue;
1750                                 if (charSortKey [2] == 0)
1751                                                 charSortKey [2] = 2;
1752                                         charSortKey [2] = (byte) (charSortKey [2]
1753                                                 + Level2 (s [idx], ExtenderType.None));
1754                         }
1755
1756                         if (charSortKey [0] != sortkey [0] ||
1757                                 charSortKey [1] != sortkey [1] ||
1758                                 (!ignoreNonSpace && charSortKey [2] != sortkey [2]) ||
1759                                 charSortKey [3] != sortkey [3])
1760                                 return false;
1761                         if (noLv4 && (si < 0 || !Uni.HasSpecialWeight ((char) si)))
1762                                 return true;
1763                         else if (noLv4)
1764                                 return false;
1765                         // Since target can never be an extender, if the source
1766                         // is an expander and it matters, then they never match.
1767                         if (!ignoreNonSpace && ext == ExtenderType.Conditional)
1768                                 return false;
1769                         if (Uni.IsJapaneseSmallLetter ((char) si) !=
1770                                 Uni.IsJapaneseSmallLetter ((char) ti) ||
1771                                 Uni.GetJapaneseDashType ((char) si) !=
1772                                 Uni.GetJapaneseDashType ((char) ti) ||
1773                                 !Uni.IsHiragana ((char) si) !=
1774                                 !Uni.IsHiragana ((char) ti) ||
1775                                 IsHalfKana ((char) si) !=
1776                                 IsHalfKana ((char) ti))
1777                                 return false;
1778                         return true;
1779                 }
1780                 #endregion
1781         }
1782 }