85b165880aac793153070e3b5bd434d83de1ed55
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / SimpleCollator.cs
1 //
2 // SimpleCollator.cs
3 //
4 // This class will demonstrate CompareInfo functionality that will just work.
5 //
6
7 //
8 // Here's a summary for supporting contractions, expansions and diacritical 
9 // remappings.
10 //
11 // Diacritical mapping is a simple tailoring rule that "remaps" diacritical
12 // weight value from one to another. For now it applies to all range of
13 // characters, but at some stage we might need to limit the remapping ranges.
14 //
15 // A Contraction consists of a string (actually char[]) and a sortkey entry
16 // (i.e. byte[]). It indicates that a sequence of characters is interpreted
17 // as a single character that has the mapped sortkey value. There is no
18 // character which goes across "special rules". When the collator encountered
19 // such a sequence of characters, it just returns the sortkey value without
20 // further replacement.
21 //
22 // Since it is still likely to happen that a contraction sequence matches
23 // other character than the identical sequence (depending on CompareOptions
24 // and of course, defined sortkey value itself), comparison cannot be done
25 // at source char[] level.
26 //
27 // (to be continued.)
28 //
29
30 //
31 // In IndexOf(), the first character in the target string or the target char
32 // itself is turned into sortkey bytes. If the character has a contraction and
33 // that is sortkey map, then it is used instead. If the contraction exists and
34 // that is replacement map, then the first character of the replacement string
35 // is searched instead. IndexOf() always searches only for the top character,
36 // and if it returned negative value, then it returns -1. Otherwise, it then
37 // tries IsPrefix() from that location. If it returns true, then it returns
38 // the index.
39 //
40
41 // LAMESPEC: IndexOf() is lame as a whole API. It never matches in the middle
42 // of expansion and there is no proper way to return such indexes within
43 // a single int return value.
44 //
45 // For example, try below in .NET:
46 //      IndexOf("\u00E6", "a")
47 //      IndexOf("\u00E6", "e")
48 //
49
50
51 using System;
52 using System.Collections;
53 using System.Globalization;
54
55 using Uni = Mono.Globalization.Unicode.MSCompatUnicodeTable;
56 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
57
58 namespace Mono.Globalization.Unicode
59 {
60         internal class SimpleCollator
61         {
62                 static SimpleCollator invariant =
63                         new SimpleCollator (CultureInfo.InvariantCulture);
64
65                 internal static readonly byte [] ignorableFlags =
66                         Uni.ignorableFlags;
67                 internal static readonly byte [] categories =
68                         Uni.categories;
69                 internal static readonly byte [] level1 =
70                         Uni.level1;
71                 internal static readonly byte [] level2 =
72                         Uni.level2;
73                 internal static readonly byte [] level3 =
74                         Uni.level3;
75                 internal static readonly ushort [] widthCompat =
76                         Uni.widthCompat;
77                 internal static readonly CodePointIndexer categoryIndexer =
78                         UUtil.Category;
79                 internal static readonly CodePointIndexer lv1Indexer =
80                         UUtil.Level1;
81                 internal static readonly CodePointIndexer lv2Indexer =
82                         UUtil.Level2;
83                 internal static readonly CodePointIndexer lv3Indexer =
84                         UUtil.Level3;
85                 internal static readonly CodePointIndexer widthIndexer =
86                         UUtil.WidthCompat;
87
88
89                 SortKeyBuffer buf;
90                 // CompareOptions expanded.
91                 bool ignoreNonSpace; // used in IndexOf()
92                 bool ignoreSymbols;
93                 bool ignoreWidth;
94                 bool ignoreCase;
95                 bool ignoreKanaType;
96                 TextInfo textInfo; // for ToLower().
97                 bool frenchSort;
98                 readonly ushort [] cjkTable;
99                 readonly CodePointIndexer cjkIndexer;
100                 readonly byte [] cjkLv2Table;
101                 readonly CodePointIndexer cjkLv2Indexer;
102                 readonly int lcid;
103                 readonly Contraction [] contractions;
104                 readonly Level2Map [] level2Maps;
105
106                 // temporary sortkey buffer for index search/comparison
107                 byte [] charSortKey = new byte [4];
108                 byte [] charSortKey2 = new byte [4];
109                 // temporary expansion store for IsPrefix/Suffix
110                 int escapedSourceIndex;
111
112                 #region Tailoring support classes
113                 // Possible mapping types are:
114                 //
115                 //      - string to string (ReplacementMap)
116                 //      - string to SortKey (SortKeyMap)
117                 //      - diacritical byte to byte (DiacriticalMap)
118                 //
119                 // There could be mapping from string to sortkeys, but
120                 // for now there is none as such.
121                 //
122                 internal class Contraction
123                 {
124                         public readonly char [] Source;
125                         // only either of them is used.
126                         public readonly string Replacement;
127                         public readonly byte [] SortKey;
128
129                         public Contraction (char [] source,
130                                 string replacement, byte [] sortkey)
131                         {
132                                 Source = source;
133                                 Replacement = replacement;
134                                 SortKey = sortkey;
135                         }
136                 }
137
138                 internal class ContractionComparer : IComparer
139                 {
140                         public static readonly ContractionComparer Instance =
141                                 new ContractionComparer ();
142
143                         public int Compare (object o1, object o2)
144                         {
145                                 Contraction c1 = (Contraction) o1;
146                                 Contraction c2 = (Contraction) o2;
147                                 char [] a1 = c1.Source;
148                                 char [] a2 = c2.Source;
149                                 int min = a1.Length > a2.Length ?
150                                         a2.Length : a1.Length;
151                                 for (int i = 0; i < min; i++)
152                                         if (a1 [i] != a2 [i])
153                                                 return a1 [i] - a2 [i];
154                                 return a1.Length - a2.Length;
155                         }
156                 }
157
158                 internal class Level2Map
159                 {
160                         public byte Source;
161                         public byte Replace;
162
163                         public Level2Map (byte source, byte replace)
164                         {
165                                 Source = source;
166                                 Replace = replace;
167                         }
168                 }
169
170                 internal class Level2MapComparer : IComparer
171                 {
172                         public static readonly Level2MapComparer Instance =
173                                 new Level2MapComparer ();
174
175                         public int Compare (object o1, object o2)
176                         {
177                                 Level2Map m1 = (Level2Map) o1;
178                                 Level2Map m2 = (Level2Map) o2;
179                                 return (m1.Source - m2.Source);
180                         }
181                 }
182
183                 #endregion
184
185                 #region .ctor() and split functions
186
187                 public SimpleCollator (CultureInfo culture)
188                 {
189                         lcid = culture.LCID;
190                         textInfo = culture.TextInfo;
191                         buf = new SortKeyBuffer (culture.LCID);
192
193                         SetCJKTable (culture, ref cjkTable, ref cjkIndexer,
194                                 ref cjkLv2Table, ref cjkLv2Indexer);
195
196                         // Get tailoring info
197                         TailoringInfo t = null;
198                         for (CultureInfo ci = culture; ci.LCID != 127; ci = ci.Parent) {
199                                 t = Uni.GetTailoringInfo (ci.LCID);
200                                 if (t != null)
201                                         break;
202                         }
203                         if (t == null) // then use invariant
204                                 t = Uni.GetTailoringInfo (127);
205
206                         frenchSort = t.FrenchSort;
207                         BuildTailoringTables (culture, t, ref contractions,
208                                 ref level2Maps);
209                         // FIXME: Since tailorings are mostly for latin
210                         // (and in some cases Cyrillic) characters, it would
211                         // be much better for performance to store "start 
212                         // indexes" for > 370 (culture-specific letters).
213
214 /*
215 // dump tailoring table
216 Console.WriteLine ("******** building table for {0} : c - {1} d - {2}",
217 culture.LCID, contractions.Length, level2Maps.Length);
218 foreach (Contraction c in contractions) {
219 foreach (char cc in c.Source)
220 Console.Write ("{0:X4} ", (int) cc);
221 Console.WriteLine (" -> '{0}'", c.Replacement);
222 }
223 */
224                 }
225
226                 private void BuildTailoringTables (CultureInfo culture,
227                         TailoringInfo t,
228                         ref Contraction [] contractions,
229                         ref Level2Map [] diacriticals)
230                 {
231                         // collect tailoring entries.
232                         ArrayList cmaps = new ArrayList ();
233                         ArrayList dmaps = new ArrayList ();
234                         char [] tarr = Uni.TailoringValues;
235                         int idx = t.TailoringIndex;
236                         int end = idx + t.TailoringCount;
237                         while (idx < end) {
238                                 int ss = idx + 1;
239                                 char [] src = null;
240                                 switch (tarr [idx]) {
241                                 case '\x1': // SortKeyMap
242                                         idx++;
243                                         while (tarr [ss] != 0)
244                                                 ss++;
245                                         src = new char [ss - idx];
246                                         Array.Copy (tarr, idx, src, 0, ss - idx);
247                                         byte [] sortkey = new byte [4];
248                                         for (int i = 0; i < 4; i++)
249                                                 sortkey [i] = (byte) tarr [ss + 1 + i];
250                                         cmaps.Add (new Contraction (
251                                                 src, null, sortkey));
252                                         // it ends with 0
253                                         idx = ss + 6;
254                                         break;
255                                 case '\x2': // DiacriticalMap
256                                         dmaps.Add (new Level2Map (
257                                                 (byte) tarr [idx + 1],
258                                                 (byte) tarr [idx + 2]));
259                                         idx += 3;
260                                         break;
261                                 case '\x3': // ReplacementMap
262                                         idx++;
263                                         while (tarr [ss] != 0)
264                                                 ss++;
265                                         src = new char [ss - idx];
266                                         Array.Copy (tarr, idx, src, 0, ss - idx);
267                                         ss++;
268                                         int l = ss;
269                                         while (tarr [l] != 0)
270                                                 l++;
271                                         string r = new string (tarr, ss, l - ss);
272                                         cmaps.Add (new Contraction (
273                                                 src, r, null));
274                                         idx = l + 1;
275                                         break;
276                                 default:
277                                         throw new NotImplementedException (String.Format ("Mono INTERNAL ERROR (Should not happen): Collation tailoring table is broken for culture {0} ({1}) at 0x{2:X}", culture.LCID, culture.Name, idx));
278                                 }
279                         }
280                         cmaps.Sort (ContractionComparer.Instance);
281                         dmaps.Sort (Level2MapComparer.Instance);
282                         contractions = cmaps.ToArray (typeof (Contraction))
283                                 as Contraction [];
284                         diacriticals = dmaps.ToArray (typeof (Level2Map))
285                                 as Level2Map [];
286                 }
287
288                 private void SetCJKTable (CultureInfo culture,
289                         ref ushort [] cjkTable, ref CodePointIndexer cjkIndexer,
290                         ref byte [] cjkLv2Table, ref CodePointIndexer cjkLv2Indexer)
291                 {
292                         string name = GetNeutralCulture (culture).Name;
293
294                         Uni.FillCJK (name);
295
296                         // custom CJK table support.
297                         switch (name) {
298                         case "zh-CHS":
299                                 cjkTable = Uni.CjkCHS;
300                                 cjkIndexer = UUtil.CjkCHS;
301                                 break;
302                         case "zh-CHT":
303                                 cjkTable = Uni.CjkCHT;
304                                 cjkIndexer = UUtil.Cjk;
305                                 break;
306                         case "ja":
307                                 cjkTable = Uni.CjkJA;
308                                 cjkIndexer = UUtil.Cjk;
309                                 break;
310                         case "ko":
311                                 cjkTable = Uni.CjkKO;
312                                 cjkLv2Table = Uni.CjkKOLv2;
313                                 cjkIndexer = UUtil.Cjk;
314                                 cjkLv2Indexer = UUtil.Cjk;
315                                 break;
316                         }
317                 }
318
319                 static CultureInfo GetNeutralCulture (CultureInfo info)
320                 {
321                         CultureInfo ret = info;
322                         while (ret.Parent != null && ret.Parent.LCID != 127)
323                                 ret = ret.Parent;
324                         return ret;
325                 }
326
327                 #endregion
328
329                 byte Category (int cp)
330                 {
331                         if (cp < 0x3000 || cjkTable == null)
332                                 return categories [categoryIndexer.ToIndex (cp)];
333                         int idx = cjkIndexer.ToIndex (cp);
334                         ushort cjk = idx < 0 ? (ushort) 0 : cjkTable [idx];
335                         return cjk != 0 ? (byte) ((cjk & 0xFF00) >> 8) :
336                                 categories [categoryIndexer.ToIndex (cp)];
337                 }
338
339                 byte Level1 (int cp)
340                 {
341                         if (cp < 0x3000 || cjkTable == null)
342                                 return level1 [lv1Indexer.ToIndex (cp)];
343                         int idx = cjkIndexer.ToIndex (cp);
344                         ushort cjk = idx < 0 ? (ushort) 0 : cjkTable [idx];
345                         return cjk != 0 ? (byte) (cjk & 0xFF) :
346                                 level1 [lv1Indexer.ToIndex (cp)];
347                 }
348
349                 byte Level2 (int cp, ExtenderType ext)
350                 {
351                         if (ext == ExtenderType.Buggy)
352                                 return 5;
353                         else if (ext == ExtenderType.Conditional)
354                                 return 0;
355
356                         if (cp < 0x3000 || cjkLv2Table == null)
357                                 return level2 [lv2Indexer.ToIndex (cp)];
358                         int idx = cjkLv2Indexer.ToIndex (cp);
359                         byte ret = idx < 0 ? (byte) 0 : cjkLv2Table [idx];
360                         if (ret != 0)
361                                 return ret;
362                         ret = level2 [lv2Indexer.ToIndex (cp)];
363                         if (level2Maps.Length == 0)
364                                 return ret;
365                         for (int i = 0; i < level2Maps.Length; i++) {
366                                 if (level2Maps [i].Source == ret)
367                                         return level2Maps [i].Replace;
368                                 else if (level2Maps [i].Source > ret)
369                                         break;
370                         }
371                         return ret;
372                 }
373
374                 bool IsHalfKana (int cp)
375                 {
376                         return ignoreWidth || Uni.IsHalfWidthKana ((char) cp);
377                 }
378
379                 void SetOptions (CompareOptions options)
380                 {
381                         this.ignoreNonSpace = (options & CompareOptions.IgnoreNonSpace) != 0;
382                         this.ignoreSymbols = (options & CompareOptions.IgnoreSymbols) != 0;
383                         this.ignoreWidth = (options & CompareOptions.IgnoreWidth) != 0;
384                         this.ignoreCase = (options & CompareOptions.IgnoreCase) != 0;
385                         this.ignoreKanaType = (options & CompareOptions.IgnoreKanaType) != 0;
386                 }
387
388                 Contraction GetContraction (string s, int start, int end)
389                 {
390                         Contraction c = GetContraction (s, start, end, contractions);
391                         if (c != null || lcid == 127)
392                                 return c;
393                         return GetContraction (s, start, end, invariant.contractions);
394                 }
395
396                 Contraction GetContraction (string s, int start, int end, Contraction [] clist)
397                 {
398                         for (int i = 0; i < clist.Length; i++) {
399                                 Contraction ct = clist [i];
400                                 int diff = ct.Source [0] - s [start];
401                                 if (diff > 0)
402                                         return null; // it's already sorted
403                                 else if (diff < 0)
404                                         continue;
405                                 char [] chars = ct.Source;
406                                 if (end - start < chars.Length)
407                                         continue;
408                                 bool match = true;
409                                 for (int n = 0; n < chars.Length; n++)
410                                         if (s [start + n] != chars [n]) {
411                                                 match = false;
412                                                 break;
413                                         }
414                                 if (match)
415                                         return ct;
416                         }
417                         return null;
418                 }
419
420                 Contraction GetTailContraction (string s, int start, int end)
421                 {
422                         Contraction c = GetTailContraction (s, start, end, contractions);
423                         if (c != null || lcid == 127)
424                                 return c;
425                         return GetTailContraction (s, start, end, invariant.contractions);
426                 }
427
428                 Contraction GetTailContraction (string s, int start, int end, Contraction [] clist)
429                 {
430                         for (int i = 0; i < clist.Length; i++) {
431                                 Contraction ct = clist [i];
432                                 int diff = ct.Source [0] - s [end];
433                                 if (diff > 0)
434                                         return null; // it's already sorted
435                                 else if (diff < 0)
436                                         continue;
437                                 char [] chars = ct.Source;
438                                 if (start - end + 1 < chars.Length)
439                                         continue;
440                                 bool match = true;
441                                 int offset = start - chars.Length + 1;
442                                 for (int n = 0; n < chars.Length; n++)
443                                         if (s [offset + n] != chars [n]) {
444                                                 match = false;
445                                                 break;
446                                         }
447                                 if (match)
448                                         return ct;
449                         }
450                         return null;
451                 }
452
453                 Contraction GetContraction (char c)
454                 {
455                         Contraction ct = GetContraction (c, contractions);
456                         if (ct != null || lcid == 127)
457                                 return ct;
458                         return GetContraction (c, invariant.contractions);
459                 }
460
461                 Contraction GetContraction (char c, Contraction [] clist)
462                 {
463                         for (int i = 0; i < clist.Length; i++) {
464                                 Contraction ct = clist [i];
465                                 if (ct.Source [0] > c)
466                                         return null; // it's already sorted
467                                 if (ct.Source [0] == c && ct.Source.Length == 1)
468                                         return ct;
469                         }
470                         return null;
471                 }
472
473                 int FilterOptions (int i)
474                 {
475                         if (ignoreWidth) {
476                                 int x = widthCompat [widthIndexer.ToIndex (i)];
477                                 if (x != 0)
478                                         i = x;
479                         }
480                         if (ignoreCase)
481                                 i = textInfo.ToLower ((char) i);
482                         if (ignoreKanaType)
483                                 i = Uni.ToKanaTypeInsensitive (i);
484                         return i;
485                 }
486
487                 int previousChar = -1;
488                 byte [] previousSortKey = null;
489                 int previousChar2 = -1;
490                 byte [] previousSortKey2 = null;
491
492                 enum ExtenderType {
493                         None,
494                         Simple,
495                         Voiced,
496                         Conditional,
497                         Buggy,
498                 }
499
500                 ExtenderType GetExtenderType (int i)
501                 {
502                         // LAMESPEC: Windows expects true for U+3005, but 
503                         // sometimes it does not represent to repeat just
504                         // one character.
505                         // Windows also expects true for U+3031 and U+3032,
506                         // but they should *never* repeat one character.
507
508                         if (i < 0x3005 || i > 0xFF70)
509                                 return ExtenderType.None;
510                         if (i == 0xFE7C || i == 0xFE7D)
511                                 return ExtenderType.Simple;
512                         if (i == 0xFF70)
513                                 return ExtenderType.Conditional;
514                         if (i > 0x30FE)
515                                 return ExtenderType.None;
516                         switch (i) {
517                         case 0x3005: // LAMESPEC: see above.
518                                 return ExtenderType.Buggy;
519                         case 0x3031: // LAMESPEC: see above.
520                         case 0x3032: // LAMESPEC: see above.
521                         case 0x309D:
522                         case 0x30FD:
523                                 return ExtenderType.Simple;
524                         case 0x309E:
525                         case 0x30FE:
526                                 return ExtenderType.Voiced;
527                         case 0x30FC:
528                                 return ExtenderType.Conditional;
529                         default:
530                                 return ExtenderType.None;
531                         }
532                 }
533
534                 byte ToDashTypeValue (ExtenderType ext)
535                 {
536                         if (ignoreNonSpace) // LAMESPEC: huh, why?
537                                 return 3;
538                         switch (ext) {
539                         case ExtenderType.None:
540                                 return 3;
541                         case ExtenderType.Conditional:
542                                 return 5;
543                         default:
544                                 return 4;
545                         }
546                 }
547
548                 int FilterExtender (int i, ExtenderType ext)
549                 {
550                         if (ext == ExtenderType.Conditional &&
551                                 Uni.HasSpecialWeight ((char) i)) {
552                                 bool half = IsHalfKana ((char) i);
553                                 bool katakana = !Uni.IsHiragana ((char) i);
554                                 switch (Level1 (i) & 7) {
555                                 case 2:
556                                         return half ? 0xFF71 : katakana ? 0x30A2 : 0x3042;
557                                 case 3:
558                                         return half ? 0xFF72 : katakana ? 0x30A4 : 0x3044;
559                                 case 4:
560                                         return half ? 0xFF73 : katakana ? 0x30A6 : 0x3046;
561                                 case 5:
562                                         return half ? 0xFF74 : katakana ? 0x30A8 : 0x3048;
563                                 case 6:
564                                         return half ? 0xFF75 : katakana ? 0x30AA : 0x304A;
565                                 }
566                         }
567                         return i;
568                 }
569
570                 bool IsIgnorable (int i)
571                 {
572                         return Uni.IsIgnorable (i) ||
573                                 ignoreSymbols && Uni.IsIgnorableSymbol (i) ||
574                                 ignoreNonSpace && Uni.IsIgnorableNonSpacing (i);
575                 }
576
577
578                 #region GetSortKey()
579
580                 public SortKey GetSortKey (string s)
581                 {
582                         return GetSortKey (s, CompareOptions.None);
583                 }
584
585                 public SortKey GetSortKey (string s, CompareOptions options)
586                 {
587                         return GetSortKey (s, 0, s.Length, options);
588                 }
589
590                 public SortKey GetSortKey (string s, int start, int length, CompareOptions options)
591                 {
592                         SetOptions (options);
593
594                         buf.Initialize (options, lcid, s, frenchSort);
595                         int end = start + length;
596                         previousChar = -1;
597                         GetSortKey (s, start, end);
598                         return buf.GetResultAndReset ();
599                 }
600
601                 void GetSortKey (string s, int start, int end)
602                 {
603                         for (int n = start; n < end; n++) {
604                                 int i = s [n];
605
606                                 ExtenderType ext = GetExtenderType (i);
607                                 if (ext != ExtenderType.None) {
608                                         i = FilterExtender (previousChar, ext);
609                                         if (i >= 0)
610                                                 FillSortKeyRaw (i, ext);
611                                         else if (previousSortKey != null) {
612                                                 byte [] b = previousSortKey;
613                                                 buf.AppendNormal (
614                                                         b [0],
615                                                         b [1],
616                                                         b [2] != 1 ? b [2] : Level2 (i, ext),
617                                                         b [3] != 1 ? b [3] : level3 [lv3Indexer.ToIndex (i)]);
618                                         }
619                                         // otherwise do nothing.
620                                         // (if the extender is the first char
621                                         // in the string, then just ignore.)
622                                         continue;
623                                 }
624
625                                 if (IsIgnorable (i))
626                                         continue;
627                                 i = FilterOptions (i);
628
629                                 Contraction ct = GetContraction (s, n, end);
630                                 if (ct != null) {
631                                         if (ct.Replacement != null) {
632                                                 GetSortKey (ct.Replacement, 0, ct.Replacement.Length);
633                                         } else {
634                                                 byte [] b = ct.SortKey;
635                                                 buf.AppendNormal (
636                                                         b [0],
637                                                         b [1],
638                                                         b [2] != 1 ? b [2] : Level2 (i, ext),
639                                                         b [3] != 1 ? b [3] : level3 [lv3Indexer.ToIndex (i)]);
640                                                 previousSortKey = b;
641                                                 previousChar = -1;
642                                         }
643                                         n += ct.Source.Length - 1;
644                                 }
645                                 else {
646                                         if (!Uni.IsIgnorableNonSpacing (i))
647                                                 previousChar = i;
648                                         FillSortKeyRaw (i, ExtenderType.None);
649                                 }
650                         }
651                 }
652
653                 void FillSortKeyRaw (int i, ExtenderType ext)
654                 {
655                         if (0x3400 <= i && i <= 0x4DB5) {
656                                 int diff = i - 0x3400;
657                                 buf.AppendCJKExtension (
658                                         (byte) (0x10 + diff / 254),
659                                         (byte) (diff % 254 + 2));
660                                 return;
661                         }
662
663                         UnicodeCategory uc = char.GetUnicodeCategory ((char) i);
664                         switch (uc) {
665                         case UnicodeCategory.PrivateUse:
666                                 int diff = i - 0xE000;
667                                 buf.AppendNormal (
668                                         (byte) (0xE5 + diff / 254),
669                                         (byte) (diff % 254 + 2),
670                                         0,
671                                         0);
672                                 return;
673                         case UnicodeCategory.Surrogate:
674                                 FillSurrogateSortKeyRaw (i);
675                                 return;
676                         }
677
678                         byte level2 = Level2 (i, ext);
679                         if (Uni.HasSpecialWeight ((char) i)) {
680                                 byte level1 = Level1 (i);
681                                 buf.AppendKana (
682                                         Category (i),
683                                         level1,
684                                         level2,
685                                         Uni.Level3 (i),
686                                         Uni.IsJapaneseSmallLetter ((char) i),
687                                         ToDashTypeValue (ext),
688                                         !Uni.IsHiragana ((char) i),
689                                         IsHalfKana ((char) i)
690                                         );
691                                 if (!ignoreNonSpace && ext == ExtenderType.Voiced)
692                                         // Append voice weight
693                                         buf.AppendNormal (1, 1, 1, 0);
694                         }
695                         else
696                                 buf.AppendNormal (
697                                         Category (i),
698                                         Level1 (i),
699                                         level2,
700                                         Uni.Level3 (i));
701                 }
702
703                 void FillSurrogateSortKeyRaw (int i)
704                 {
705                         int diffbase = 0;
706                         int segment = 0;
707                         byte lower = 0;
708
709                         if (i < 0xD840) {
710                                 diffbase = 0xD800;
711                                 segment = 0x41;
712                                 lower = (byte) ((i == 0xD800) ? 0x3E : 0x3F);
713                         } else if (0xD840 <= i && i < 0xD880) {
714                                 diffbase = 0xD840;
715                                 segment = 0xF2;
716                                 lower = 0x3E;
717                         } else if (0xDB80 <= i && i < 0xDC00) {
718                                 diffbase = 0xDB80 - 0x40;
719                                 segment = 0xFE;
720                                 lower = 0x3E;
721                         } else {
722                                 diffbase = 0xDC00 - 0xF8 + 2;
723                                 segment = 0x41;
724                                 lower = 0x3F;
725                         }
726                         int diff = i - diffbase;
727
728                         buf.AppendNormal (
729                                 (byte) (segment + diff / 254),
730                                 (byte) (diff % 254 + 2),
731                                 lower,
732                                 lower);
733                 }
734
735                 #endregion
736
737                 #region Compare()
738
739                 public int Compare (string s1, string s2)
740                 {
741                         return Compare (s1, s2, CompareOptions.None);
742                 }
743
744                 public int Compare (string s1, string s2, CompareOptions options)
745                 {
746                         return Compare (s1, 0, s1.Length, s2, 0, s2.Length, options);
747                 }
748
749                 class Escape
750                 {
751                         public string Source;
752                         public int Index;
753                         public int Start;
754                         public int End;
755                 }
756
757                 // Those instances are reused not to invoke instantiation
758                 // during Compare().
759                 Escape escape1 = new Escape ();
760                 Escape escape2 = new Escape ();
761
762                 private int CompareOrdinal (string s1, int idx1, int len1,
763                         string s2, int idx2, int len2)
764                 {
765                         int min = len1 < len2 ? len1 : len2;
766                         int end1 = idx1 + min;
767                         int end2 = idx2 + min;
768                         for (int i1 = idx1, i2 = idx2;
769                                 i1 < end1 && i2 < end2; i1++, i2++)
770                                 if (s1 [i1] != s2 [i2])
771                                         return s1 [i1] - s2 [i2];
772                         return len1 == len2 ? 0 :
773                                 len1 == min ? - 1 : 1;
774                 }
775
776                 public int Compare (string s1, int idx1, int len1,
777                         string s2, int idx2, int len2, CompareOptions options)
778                 {
779                         // quick equality check
780                         if (idx1 == idx2 && len1 == len2 &&
781                                 Object.ReferenceEquals (s1, s2))
782                                 return 0;
783                         // FIXME: this should be done inside Compare() at
784                         // any time.
785 //                      int ord = CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
786 //                      if (ord == 0)
787 //                              return 0;
788                         if (options == CompareOptions.Ordinal)
789                                 return CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
790
791 #if false // stable easy version, depends on GetSortKey().
792                         SortKey sk1 = GetSortKey (s1, idx1, len1, options);
793                         SortKey sk2 = GetSortKey (s2, idx2, len2, options);
794                         byte [] d1 = sk1.KeyData;
795                         byte [] d2 = sk2.KeyData;
796                         int len = d1.Length > d2.Length ? d2.Length : d1.Length;
797                         for (int i = 0; i < len; i++)
798                                 if (d1 [i] != d2 [i])
799                                         return d1 [i] < d2 [i] ? -1 : 1;
800                         return d1.Length == d2.Length ? 0 : d1.Length < d2.Length ? -1 : 1;
801 #else
802                         SetOptions (options);
803                         escape1.Source = null;
804                         escape2.Source = null;
805                         previousSortKey= previousSortKey2 = null;
806                         previousChar = previousChar2 = -1;
807                         bool dummy;
808                         int ret = CompareInternal (s1, idx1, len1, s2, idx2, len2, (options & CompareOptions.StringSort) != 0, out dummy);
809                         return ret == 0 ? 0 : ret < 0 ? -1 : 1;
810 #endif
811                 }
812
813                 int CompareInternal (string s1, int idx1, int len1, string s2,
814                         int idx2, int len2, bool stringSort,
815                         out bool targetConsumed)
816                 {
817                         int start1 = idx1;
818                         int start2 = idx2;
819                         int end1 = idx1 + len1;
820                         int end2 = idx2 + len2;
821                         targetConsumed = false;
822
823                         // It holds final result that comes from the comparison
824                         // at level 2 or lower. Even if Compare() found the
825                         // difference at level 2 or lower, it still has to
826                         // continue level 1 comparison. FinalResult is used
827                         // when there was no further differences.
828                         int finalResult = 0;
829                         // It holds the comparison level to do. It starts from
830                         // 5, and becomes 1 when we do primary-only comparison.
831                         int currentLevel = 5;
832
833                         int lv5At1 = -1;
834                         int lv5At2 = -1;
835                         int lv5Value1 = 0;
836                         int lv5Value2 = 0;
837
838                         // Skip heading extenders
839                         for (; idx1 < end1; idx1++)
840                                 if (GetExtenderType (s1 [idx1]) == ExtenderType.None)
841                                         break;
842                         for (; idx2 < end2; idx2++)
843                                 if (GetExtenderType (s2 [idx2]) == ExtenderType.None)
844                                         break;
845
846                         ExtenderType ext1 = ExtenderType.None;
847                         ExtenderType ext2 = ExtenderType.None;
848
849                         while (true) {
850                                 for (; idx1 < end1; idx1++)
851                                         if (!IsIgnorable (s1 [idx1]))
852                                                 break;
853                                 for (; idx2 < end2; idx2++)
854                                         if (!IsIgnorable (s2 [idx2]))
855                                                 break;
856
857                                 if (idx1 >= end1) {
858                                         if (escape1.Source == null)
859                                                 break;
860                                         s1 = escape1.Source;
861                                         start1 = escape1.Start;
862                                         idx1 = escape1.Index;
863                                         end1 = escape1.End;
864                                         escape1.Source = null;
865                                         continue;
866                                 }
867                                 if (idx2 >= end2) {
868                                         if (escape2.Source == null)
869                                                 break;
870                                         s2 = escape2.Source;
871                                         start2 = escape2.Start;
872                                         idx2 = escape2.Index;
873                                         end2 = escape2.End;
874                                         escape2.Source = null;
875                                         continue;
876                                 }
877 #if false
878 // FIXME: optimization could be done here.
879
880                                 if (s1 [idx1] == s2 [idx2]) {
881                                         idx1++;
882                                         idx2++;
883                                         continue;
884                                 }
885 //                              while (idx1 >= start1 && !IsSafe ((int) s [idx1]))
886 //                                      idx1--;
887 //                              while (idx2 >= start2 && !IsSafe ((int) s [idx2]))
888 //                                      idx2--;
889 #endif
890
891                                 int cur1 = idx1;
892                                 int cur2 = idx2;
893                                 byte [] sk1 = null;
894                                 byte [] sk2 = null;
895                                 int i1 = FilterOptions (s1 [idx1]);
896                                 int i2 = FilterOptions (s2 [idx2]);
897                                 bool special1 = false;
898                                 bool special2 = false;
899
900                                 // If current character is an expander, then
901                                 // repeat the previous character.
902                                 ext1 = GetExtenderType (i1);
903                                 if (ext1 != ExtenderType.None) {
904                                         if (previousChar < 0) {
905                                                 if (previousSortKey == null) {
906                                                         // nothing to extend
907                                                         idx1++;
908                                                         continue;
909                                                 }
910                                                 sk1 = previousSortKey;
911                                         }
912                                         else
913                                                 i1 = FilterExtender (previousChar, ext1);
914                                 }
915                                 ext2 = GetExtenderType (i2);
916                                 if (ext2 != ExtenderType.None) {
917                                         if (previousChar2 < 0) {
918                                                 if (previousSortKey2 == null) {
919                                                         // nothing to extend
920                                                         idx2++;
921                                                         continue;
922                                                 }
923                                                 sk2 = previousSortKey2;
924                                         }
925                                         else
926                                                 i2 = FilterExtender (previousChar2, ext2);
927                                 }
928
929                                 byte cat1 = Category (i1);
930                                 byte cat2 = Category (i2);
931
932                                 // Handle special weight characters
933                                 if (!stringSort && currentLevel > 4) {
934                                         if (cat1 == 6) {
935                                                 lv5At1 = escape1.Source != null ?
936                                                         escape1.Index - escape1.Start :
937                                                         cur1 - start1;
938                                                 // here Windows has a bug that it does
939                                                 // not consider thirtiary weight.
940                                                 lv5Value1 = Level1 (i1) << 8 + Uni.Level3 (i1);
941                                                 previousChar = i1;
942                                                 idx1++;
943                                         }
944                                         if (cat2 == 6) {
945                                                 lv5At2 = escape2.Source != null ?
946                                                         escape2.Index - escape2.Start :
947                                                         cur2 - start2;
948                                                 // here Windows has a bug that it does
949                                                 // not consider thirtiary weight.
950                                                 lv5Value2 = Level1 (i2) << 8 + Uni.Level3 (i2);
951                                                 previousChar2 = i2;
952                                                 idx2++;
953                                         }
954                                         if (cat1 == 6 || cat2 == 6) {
955                                                 currentLevel = 4;
956                                                 continue;
957                                         }
958                                 }
959
960                                 Contraction ct1 = null;
961                                 if (ext1 == ExtenderType.None)
962                                         ct1 = GetContraction (s1, idx1, end1);
963
964                                 int offset1 = 1;
965                                 if (sk1 != null)
966                                         offset1 = 1;
967                                 else if (ct1 != null) {
968                                         offset1 = ct1.Source.Length;
969                                         if (ct1.SortKey != null) {
970                                                 sk1 = charSortKey;
971                                                 for (int i = 0; i < ct1.SortKey.Length; i++)
972                                                         sk1 [i] = ct1.SortKey [i];
973                                                 previousChar = -1;
974                                                 previousSortKey = sk1;
975                                         }
976                                         else if (escape1.Source == null) {
977                                                 escape1.Source = s1;
978                                                 escape1.Start = start1;
979                                                 escape1.Index = cur1 + ct1.Source.Length;
980                                                 escape1.End = end1;
981                                                 s1 = ct1.Replacement;
982                                                 idx1 = 0;
983                                                 start1 = 0;
984                                                 end1 = s1.Length;
985                                                 continue;
986                                         }
987                                 }
988                                 else {
989                                         sk1 = charSortKey;
990                                         sk1 [0] = cat1;
991                                         sk1 [1] = Level1 (i1);
992                                         if (!ignoreNonSpace && currentLevel > 1)
993                                                 sk1 [2] = Level2 (i1, ext1);
994                                         if (currentLevel > 2)
995                                                 sk1 [3] = Uni.Level3 (i1);
996                                         if (currentLevel > 3)
997                                                 special1 = Uni.HasSpecialWeight ((char) i1);
998                                         if (cat1 > 1)
999                                                 previousChar = i1;
1000                                 }
1001
1002                                 Contraction ct2 = null;
1003                                 if (ext2 == ExtenderType.None)
1004                                         ct2 = GetContraction (s2, idx2, end2);
1005
1006                                 if (sk2 != null)
1007                                         idx2++;
1008                                 else if (ct2 != null) {
1009                                         idx2 += ct2.Source.Length;
1010                                         if (ct2.SortKey != null) {
1011                                                 sk2 = charSortKey2;
1012                                                 for (int i = 0; i < ct2.SortKey.Length; i++)
1013                                                         sk2 [i] = ct2.SortKey [i];
1014                                                 previousChar2 = -1;
1015                                                 previousSortKey2 = sk2;
1016                                         }
1017                                         else if (escape2.Source == null) {
1018                                                 escape2.Source = s2;
1019                                                 escape2.Start = start2;
1020                                                 escape2.Index = cur2 + ct2.Source.Length;
1021                                                 escape2.End = end2;
1022                                                 s2 = ct2.Replacement;
1023                                                 idx2 = 0;
1024                                                 start2 = 0;
1025                                                 end2 = s2.Length;
1026                                                 continue;
1027                                         }
1028                                 }
1029                                 else {
1030                                         sk2 = charSortKey2;
1031                                         sk2 [0] = cat2;
1032                                         sk2 [1] = Level1 (i2);
1033                                         if (!ignoreNonSpace && currentLevel > 1)
1034                                                 sk2 [2] = Level2 (i2, ext2);
1035                                         if (currentLevel > 2)
1036                                                 sk2 [3] = Uni.Level3 (i2);
1037                                         if (currentLevel > 3)
1038                                                 special2 = Uni.HasSpecialWeight ((char) i2);
1039                                         if (cat2 > 1)
1040                                                 previousChar2 = i2;
1041                                         idx2++;
1042                                 }
1043
1044                                 // Add offset here so that it does not skip
1045                                 // idx1 while s2 has a replacement.
1046                                 idx1 += offset1;
1047
1048                                 // add diacritical marks in s1 here
1049                                 if (!ignoreNonSpace) {
1050                                         while (idx1 < end1) {
1051                                                 if (Category (s1 [idx1]) != 1)
1052                                                         break;
1053                                                 if (sk1 [2] == 0)
1054                                                         sk1 [2] = 2;
1055                                                 sk1 [2] = (byte) (sk1 [2] + 
1056                                                         Level2 (s1 [idx1], ExtenderType.None));
1057                                                 idx1++;
1058                                         }
1059
1060                                         // add diacritical marks in s2 here
1061                                         while (idx2 < end2) {
1062                                                 if (Category (s2 [idx2]) != 1)
1063                                                         break;
1064                                                 if (sk2 [2] == 0)
1065                                                         sk2 [2] = 2;
1066                                                 sk2 [2] = (byte) (sk2 [2] + 
1067                                                         Level2 (s2 [idx2], ExtenderType.None));
1068                                                 idx2++;
1069                                         }
1070                                 }
1071
1072                                 int ret = sk1 [0] - sk2 [0];
1073                                 ret = ret != 0 ? ret : sk1 [1] - sk2 [1];
1074                                 if (ret != 0)
1075                                         return ret;
1076                                 if (currentLevel == 1)
1077                                         continue;
1078                                 if (!ignoreNonSpace) {
1079                                         ret = sk1 [2] - sk2 [2];
1080                                         if (ret != 0) {
1081                                                 finalResult = ret;
1082                                                 currentLevel = frenchSort ? 2 : 1;
1083                                                 continue;
1084                                         }
1085                                 }
1086                                 if (currentLevel == 2)
1087                                         continue;
1088                                 ret = sk1 [3] - sk2 [3];
1089                                 if (ret != 0) {
1090                                         finalResult = ret;
1091                                         currentLevel = 2;
1092                                         continue;
1093                                 }
1094                                 if (currentLevel == 3)
1095                                         continue;
1096                                 if (special1 != special2) {
1097                                         finalResult = special1 ? 1 : -1;
1098                                         currentLevel = 3;
1099                                         continue;
1100                                 }
1101                                 if (special1) {
1102                                         ret = CompareFlagPair (
1103                                                 !Uni.IsJapaneseSmallLetter ((char) i1),
1104                                                 !Uni.IsJapaneseSmallLetter ((char) i2));
1105                                         ret = ret != 0 ? ret :
1106                                                 ToDashTypeValue (ext1) -
1107                                                 ToDashTypeValue (ext2);
1108                                         ret = ret != 0 ? ret : CompareFlagPair (
1109                                                 Uni.IsHiragana ((char) i1),
1110                                                 Uni.IsHiragana ((char) i2));
1111                                         ret = ret != 0 ? ret : CompareFlagPair (
1112                                                 !IsHalfKana ((char) i1),
1113                                                 !IsHalfKana ((char) i2));
1114                                         if (ret != 0) {
1115                                                 finalResult = ret;
1116                                                 currentLevel = 3;
1117                                                 continue;
1118                                         }
1119                                 }
1120                         }
1121
1122                         // If there were only level 3 or lower differences,
1123                         // then we still have to find diacritical differences
1124                         // if any.
1125                         if (!ignoreNonSpace &&
1126                                 finalResult != 0 && currentLevel > 2) {
1127                                 while (idx1 < end1 && idx2 < end2) {
1128                                         if (!Uni.IsIgnorableNonSpacing (s1 [idx1]))
1129                                                 break;
1130                                         if (!Uni.IsIgnorableNonSpacing (s2 [idx2]))
1131                                                 break;
1132                                         finalResult = Level2 (FilterOptions ((s1 [idx1])), ext1) - Level2 (FilterOptions (s2 [idx2]), ext2);
1133                                         if (finalResult != 0)
1134                                                 break;
1135                                         idx1++;
1136                                         idx2++;
1137                                         // they should work only for the first character
1138                                         ext1 = ExtenderType.None;
1139                                         ext2 = ExtenderType.None;
1140                                 }
1141                         }
1142                         // we still have to handle level 5
1143                         if (finalResult == 0) {
1144                                 finalResult = lv5At1 - lv5At2;
1145                                 if (finalResult == 0)
1146                                         finalResult = lv5Value1 - lv5Value2;
1147                         }
1148                         if (finalResult == 0 && idx2 == end2)
1149                                 targetConsumed = true;
1150                         return idx1 != end1 ? 1 : idx2 == end2 ? finalResult : -1;
1151                 }
1152
1153                 #endregion
1154
1155                 #region IsPrefix() and IsSuffix()
1156
1157                 public bool IsPrefix (string src, string target, CompareOptions opt)
1158                 {
1159                         return IsPrefix (src, target, 0, src.Length, opt);
1160                 }
1161
1162                 public bool IsPrefix (string s, string target, int start, int length, CompareOptions opt)
1163                 {
1164                         SetOptions (opt);
1165                         return IsPrefix (s, target, start, length, 
1166                                 (opt & CompareOptions.StringSort) != 0);
1167                 }
1168
1169                 public bool IsPrefix (string s, string target, int start, int length, bool stringSort)
1170                 {
1171                         bool consumed;
1172                         escape1.Source = null;
1173                         escape2.Source = null;
1174                         previousSortKey= previousSortKey2 = null;
1175                         previousChar = previousChar2 = -1;
1176                         int ret = CompareInternal (s, start, length,
1177                                 target, 0, target.Length, stringSort,
1178                                 out consumed);
1179                         return consumed;
1180                 }
1181
1182 /*
1183                 public bool IsPrefix (string s, string target, int start, int length, CompareOptions opt)
1184                 {
1185                         SetOptions (opt);
1186                         return IsPrefix (s, target, start, length);
1187                 }
1188
1189                 // returns the consumed length in positive number, or -1 if
1190                 // target was not a prefix.
1191                 bool IsPrefix (string s, string target, int start, int length)
1192                 {
1193                         // quick check : simple codepoint comparison
1194                         if (s.Length >= target.Length) {
1195                                 int si = start;
1196                                 for (int i = 0; si < length && i < target.Length; i++, si++)
1197                                         if (s [si] != target [i])
1198                                                 break;
1199                                 if (si == start + target.Length)
1200                                         return true;
1201                         }
1202
1203                         escapedSourceIndex = -1;
1204                         return IsPrefixInternal (s, target, start, length);
1205                 }
1206
1207                 bool IsPrefixInternal (string s, string target, int start, int length)
1208                 {
1209                         int si = start;
1210                         int end = start + length;
1211                         int ti = 0;
1212                         string source = s;
1213
1214                         while (ti < target.Length) {
1215                                 if (IsIgnorable (target [ti])) {
1216                                         ti++;
1217                                         continue;
1218                                 }
1219                                 if (si >= end) {
1220                                         if (s == source)
1221                                                 break;
1222                                         s = source;
1223                                         si = escapedSourceIndex;
1224                                         end = start + length;
1225                                         escapedSourceIndex = -1;
1226                                         continue;
1227                                 }
1228                                 if (IsIgnorable (s [si])) {
1229                                         si++;
1230                                         continue;
1231                                 }
1232
1233                                 // Check contraction for target.
1234                                 Contraction ctt = GetContraction (target, ti, target.Length);
1235                                 if (ctt != null) {
1236                                         ti += ctt.Source.Length;
1237                                         if (ctt.SortKey != null) {
1238                                                 int ret = GetMatchLength (ref s, ref si, ref end, -1, ctt.SortKey, true);
1239                                                 if (ret < 0)
1240                                                         return false;
1241                                                 si += ret;
1242                                         } else {
1243                                                 string r = ctt.Replacement;
1244                                                 int i = 0;
1245                                                 while (i < r.Length && si < end) {
1246                                                         int ret = GetMatchLength (ref s, ref si, ref end, r [i]);
1247                                                         if (ret < 0)
1248                                                                 return false;
1249                                                         si += ret;
1250                                                         i++;
1251                                                 }
1252                                                 if (i < r.Length && si >= end)
1253                                                         return false;
1254                                         }
1255                                 }
1256                                 else {
1257                                         int ret = GetMatchLength (ref s, ref si, ref end, target [ti]);
1258                                         if (ret < 0)
1259                                                 return false;
1260                                         si += ret;
1261                                         ti++;
1262                                 }
1263                         }
1264                         if (si == end) {
1265                                 // All codepoints in the compared range
1266                                 // matches. In that case, what matters 
1267                                 // is whether the remaining part of 
1268                                 // "target" is ignorable or not.
1269                                 while (ti < target.Length)
1270                                         if (!IsIgnorable (target [ti++]))
1271                                                 return false;
1272                                 return true;
1273                         }
1274                         return true;
1275                 }
1276 */
1277
1278                 // WARNING: Don't invoke it outside IsPrefix().
1279                 int GetMatchLength (ref string s, ref int idx, ref int end, char target)
1280                 {
1281                         int it = FilterOptions ((int) target);
1282                         charSortKey [0] = Category (it);
1283                         charSortKey [1] = Level1 (it);
1284                         if (!ignoreNonSpace)
1285                                 // FIXME: pass ExtenderType
1286                                 charSortKey [2] = Level2 (it, ExtenderType.None);
1287                         charSortKey [3] = Uni.Level3 (it);
1288
1289                         return GetMatchLength (ref s, ref idx, ref end, it, charSortKey, !Uni.HasSpecialWeight ((char) it));
1290                 }
1291
1292                 // WARNING: Don't invoke it outside IsPrefix().
1293                 // returns consumed source length (mostly 1, source length in case of contraction)
1294                 int GetMatchLength (ref string s, ref int idx, ref int end, int it, byte [] sortkey, bool noLv4)
1295                 {
1296                         Contraction ct = null;
1297                         // If there is already expansion, then it should not
1298                         // process further expansions.
1299                         if (escapedSourceIndex < 0)
1300                                 ct = GetContraction (s, idx, end);
1301                         if (ct != null) {
1302                                 if (ct.SortKey != null) {
1303                                         if (!noLv4)
1304                                                 return -1;
1305                                         for (int i = 0; i < ct.SortKey.Length; i++)
1306                                                 if (sortkey [i] != ct.SortKey [i])
1307                                                         return -1;
1308                                         return ct.Source.Length;
1309                                 } else {
1310                                         escapedSourceIndex = idx + ct.Source.Length;
1311                                         s = ct.Replacement;
1312                                         idx = 0;
1313                                         end = s.Length;
1314                                         return GetMatchLength (ref s, ref idx, ref end, it, sortkey, noLv4);
1315                                 }
1316                         } else {
1317                                 // primitive comparison
1318                                 if (Compare (s [idx], it, sortkey) != 0)
1319                                         return -1;
1320                                 return 1;
1321                         }
1322                 }
1323
1324                 // IsSuffix()
1325
1326                 public bool IsSuffix (string src, string target, CompareOptions opt)
1327                 {
1328                         return IsSuffix (src, target, src.Length - 1, src.Length, opt);
1329                 }
1330
1331                 public bool IsSuffix (string s, string target, int start, int length, CompareOptions opt)
1332                 {
1333                         SetOptions (opt);
1334
1335                         // quick check : simple codepoint comparison
1336                         if (s.Length >= target.Length) {
1337                                 int si = start;
1338                                 int se = start - length;
1339                                 for (int i = target.Length - 1; si >= se && i >= 0; i--, si--)
1340                                         if (s [si] != target [i])
1341                                                 break;
1342                                 if (si == start + target.Length)
1343                                         return true;
1344                         }
1345
1346                         escapedSourceIndex = -1;
1347                         return IsSuffix (s, target, start, length);
1348                 }
1349
1350                 bool IsSuffix (string s, string target, int start, int length)
1351                 {
1352                         int si = start;
1353                         int ti = target.Length - 1;
1354                         string source = s;
1355                         int end = start - length + 1;
1356
1357                         while (ti >= 0) {
1358                                 if (IsIgnorable (target [ti])) {
1359                                         ti--;
1360                                         continue;
1361                                 }
1362                                 if (si < 0) {
1363                                         if (s == source)
1364                                                 break;
1365                                         s = source;
1366                                         si = escapedSourceIndex;
1367                                         escapedSourceIndex = -1;
1368                                         continue;
1369                                 }
1370                                 if (IsIgnorable (s [si])) {
1371                                         si--;
1372                                         continue;
1373                                 }
1374
1375                                 // Check contraction for target.
1376                                 Contraction ctt = GetTailContraction (target, ti, 0);
1377                                 if (ctt != null) {
1378                                         ti -= ctt.Source.Length;
1379                                         if (ctt.SortKey != null) {
1380                                                 int ret = GetMatchLengthBack (ref s, ref si, ref end, -1, ctt.SortKey, true);
1381                                                 if (ret < 0)
1382                                                         return false;
1383                                                 si -= ret;
1384                                         } else {
1385                                                 string r = ctt.Replacement;
1386                                                 int i = r.Length - 1;
1387                                                 while (i >= 0 && si >= end) {
1388                                                         int ret = GetMatchLengthBack (ref s, ref si, ref end, r [i]);
1389                                                         if (ret < 0)
1390                                                                 return false;
1391                                                         si -= ret;
1392                                                         i--;
1393                                                 }
1394                                                 if (i >= 0 && si < end)
1395                                                         return false;
1396                                         }
1397                                 }
1398                                 else {
1399                                         int ret = GetMatchLengthBack (ref s, ref si, ref end, target [ti]);
1400                                         if (ret < 0)
1401                                                 return false;
1402                                         si -= ret;
1403                                         ti--;
1404                                 }
1405                         }
1406                         if (si < end) {
1407                                 // All codepoints in the compared range
1408                                 // matches. In that case, what matters 
1409                                 // is whether the remaining part of 
1410                                 // "target" is ignorable or not.
1411                                 while (ti >= 0)
1412                                         if (!IsIgnorable (target [ti--]))
1413                                                 return false;
1414                                 return true;
1415                         }
1416                         return true;
1417                 }
1418
1419                 // WARNING: Don't invoke it outside IsSuffix().
1420                 int GetMatchLengthBack (ref string s, ref int idx, ref int end, char target)
1421                 {
1422                         int it = FilterOptions ((int) target);
1423                         charSortKey [0] = Category (it);
1424                         charSortKey [1] = Level1 (it);
1425                         if (!ignoreNonSpace)
1426                                 // FIXME: pass extender type
1427                                 charSortKey [2] = Level2 (it, ExtenderType.None);
1428                         charSortKey [3] = Uni.Level3 (it);
1429
1430                         return GetMatchLengthBack (ref s, ref idx, ref end, it, charSortKey, !Uni.HasSpecialWeight ((char) it));
1431                 }
1432
1433                 // WARNING: Don't invoke it outside IsSuffix().
1434                 // returns consumed source length (mostly 1, source length in case of contraction)
1435                 int GetMatchLengthBack (ref string s, ref int idx, ref int end, int it, byte [] sortkey, bool noLv4)
1436                 {
1437                         Contraction ct = null;
1438                         // If there is already expansion, then it should not
1439                         // process further expansions.
1440                         if (escapedSourceIndex < 0)
1441                                 ct = GetTailContraction (s, idx, end);
1442                         if (ct != null) {
1443                                 if (ct.SortKey != null) {
1444                                         if (!noLv4)
1445                                                 return -1;
1446                                         for (int i = 0; i < ct.SortKey.Length; i++)
1447                                                 if (sortkey [i] != ct.SortKey [i])
1448                                                         return -1;
1449                                         return ct.Source.Length;
1450                                 } else {
1451                                         escapedSourceIndex = idx - ct.Source.Length;
1452                                         s = ct.Replacement;
1453                                         idx = s.Length - 1;
1454                                         end = 0;
1455                                         return GetMatchLength (ref s, ref idx, ref end, it, sortkey, noLv4);
1456                                 }
1457                         } else {
1458                                 // primitive comparison
1459                                 if (Compare (s [idx], it, sortkey) != 0)
1460                                         return -1;
1461                                 return 1;
1462                         }
1463                 }
1464
1465                 // Common use methods 
1466
1467                 // returns comparison result.
1468                 private int Compare (char src, int ct, byte [] sortkey)
1469                 {
1470                         // char-by-char comparison.
1471                         int cs = FilterOptions (src);
1472                         if (cs == ct)
1473                                 return 0;
1474                         // lv.1 to 3
1475                         int ret = Category (cs) - Category (ct);
1476                         if (ret != 0)
1477                                 return ret;
1478                         ret = Level1 (cs) - Level1 (ct);
1479                         if (ret != 0)
1480                                 return ret;
1481                         if (!ignoreNonSpace) {
1482                                 // FIXME: pass ExtenderType
1483                                 ret = Level2 (cs, ExtenderType.None) - Level2 (ct, ExtenderType.None);
1484                                 if (ret != 0)
1485                                         return ret;
1486                         }
1487                         ret = Uni.Level3 (cs) - Uni.Level3 (ct);
1488                         if (ret != 0)
1489                                 return ret;
1490                         // lv.4 (only when required). No need to check cj coz
1491                         // there is no pair of characters that has the same
1492                         // primary level and differs here.
1493                         if (!Uni.HasSpecialWeight (src))
1494                                 return 0;
1495                         char target = (char) ct;
1496                         ret = CompareFlagPair (
1497                                 !Uni.IsJapaneseSmallLetter (src),
1498                                 !Uni.IsJapaneseSmallLetter (target));
1499                         if (ret != 0)
1500                                 return ret;
1501                         ret = Uni.GetJapaneseDashType (src) -
1502                                 Uni.GetJapaneseDashType (target);
1503                         if (ret != 0)
1504                                 return ret;
1505                         ret = CompareFlagPair (Uni.IsHiragana (src),
1506                                 Uni.IsHiragana (target));
1507                         if (ret != 0)
1508                                 return ret;
1509                         ret = CompareFlagPair (!IsHalfKana (src),
1510                                 !IsHalfKana (target));
1511                         return ret;
1512                 }
1513
1514                 int CompareFlagPair (bool b1, bool b2)
1515                 {
1516                         return b1 == b2 ? 0 : b1 ? 1 : -1;
1517                 }
1518
1519                 #endregion
1520
1521                 #region IndexOf() / LastIndexOf()
1522
1523                 // IndexOf (string, string, CompareOptions)
1524                 // IndexOf (string, string, int, int, CompareOptions)
1525                 // IndexOf (string, char, int, int, CompareOptions)
1526                 // IndexOfPrimitiveChar (string, int, int, char)
1527                 // IndexOfSortKey (string, int, int, byte[], char, int, bool)
1528                 // IndexOf (string, string, int, int)
1529
1530                 public int IndexOf (string s, string target, CompareOptions opt)
1531                 {
1532                         return IndexOf (s, target, 0, s.Length, opt);
1533                 }
1534
1535                 public int IndexOf (string s, string target, int start, int length, CompareOptions opt)
1536                 {
1537                         SetOptions (opt);
1538                         return IndexOf (s, target, start, length,
1539                                 (opt & CompareOptions.StringSort) != 0);
1540                 }
1541
1542                 public int IndexOf (string s, char target, CompareOptions opt)
1543                 {
1544                         return IndexOf (s, target, 0, s.Length, opt);
1545                 }
1546
1547                 public int IndexOf (string s, char target, int start, int length, CompareOptions opt)
1548                 {
1549                         SetOptions (opt);
1550
1551                         // If target is contraction, then use string search.
1552                         Contraction ct = GetContraction (target);
1553                         if (ct != null) {
1554                                 if (ct.Replacement != null)
1555                                         return IndexOfPrimitiveChar (s, start, length, ct.Replacement [0]);
1556                                 else
1557                                         return IndexOfSortKey (s, start, length, ct.SortKey, char.MinValue, -1, true);
1558                         }
1559                         else
1560                                 return IndexOfPrimitiveChar (s, start, length, target);
1561                 }
1562
1563                 // Searches target char w/o checking contractions
1564                 int IndexOfPrimitiveChar (string s, int start, int length, char target)
1565                 {
1566                         int ti = FilterOptions ((int) target);
1567                         charSortKey [0] = Category (ti);
1568                         charSortKey [1] = Level1 (ti);
1569                         if (!ignoreNonSpace)
1570                                 // FIXME: pass ExtenderType
1571                                 charSortKey [2] = Level2 (ti, ExtenderType.None);
1572                         charSortKey [3] = Uni.Level3 (ti);
1573                         return IndexOfSortKey (s, start, length, charSortKey, target, ti, !Uni.HasSpecialWeight ((char) ti));
1574                 }
1575
1576                 // Searches target byte[] keydata
1577                 int IndexOfSortKey (string s, int start, int length, byte [] sortkey, char target, int ti, bool noLv4)
1578                 {
1579                         int end = start + length;
1580                         for (int idx = start; idx < end; idx++) {
1581                                 int cur = idx;
1582                                 if (Matches (s, ref idx, end, ti, target, sortkey, noLv4, false))
1583                                         return cur;
1584                         }
1585                         return -1;
1586                 }
1587
1588                 // Searches string. Search head character (or keydata when
1589                 // the head is contraction sortkey) and try IsPrefix().
1590                 int IndexOf (string s, string target, int start, int length, bool stringSort)
1591                 {
1592                         int tidx = 0;
1593                         for (; tidx < target.Length; tidx++)
1594                                 if (!IsIgnorable (target [tidx]))
1595                                         break;
1596                         if (tidx == target.Length)
1597                                 return start;
1598                         Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1599                         byte [] sortkey = ct != null ? ct.SortKey : null;
1600                         string replace = ct != null ? ct.Replacement : null;
1601                         do {
1602                                 int idx = 0;
1603                                 if (sortkey != null)
1604                                         idx = IndexOfSortKey (s, start, length, ct.SortKey, char.MinValue, -1, true);
1605                                 else if (replace != null)
1606                                         idx = IndexOf (s, replace, start, length, stringSort);
1607                                 else
1608                                         idx = IndexOfPrimitiveChar (s, start, length, target [tidx]);
1609                                 if (idx < 0)
1610                                         return -1;
1611                                 if (IsPrefix (s, target, idx, length - (idx - start), stringSort))
1612                                         return idx;
1613                                 Contraction cts = GetContraction (s, idx, length - (idx - start));
1614                                 if (cts != null) {
1615                                         start += cts.Source.Length;
1616                                         length -= cts.Source.Length;
1617                                 } else {
1618                                         start++;
1619                                         length--;
1620                                 }
1621                         } while (length > 0);
1622                         return -1;
1623                 }
1624
1625                 //
1626                 // There are the same number of IndexOf() related methods,
1627                 // with the same functionalities for each.
1628                 //
1629
1630                 public int LastIndexOf (string s, string target, CompareOptions opt)
1631                 {
1632                         return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1633                 }
1634
1635                 public int LastIndexOf (string s, string target, int start, int length, CompareOptions opt)
1636                 {
1637                         SetOptions (opt);
1638                         return LastIndexOf (s, target, start, length,
1639                                 (opt & CompareOptions.StringSort) != 0);
1640                 }
1641
1642                 public int LastIndexOf (string s, char target, CompareOptions opt)
1643                 {
1644                         return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1645                 }
1646
1647                 public int LastIndexOf (string s, char target, int start, int length, CompareOptions opt)
1648                 {
1649                         SetOptions (opt);
1650
1651                         // If target is contraction, then use string search.
1652                         Contraction ct = GetContraction (target);
1653                         if (ct != null) {
1654                                 if (ct.Replacement != null)
1655                                         return LastIndexOfPrimitiveChar (s, start, length, ct.Replacement [0]);
1656                                 else
1657                                         return LastIndexOfSortKey (s, start, length, ct.SortKey, char.MinValue, -1, true);
1658                         }
1659                         else
1660                                 return LastIndexOfPrimitiveChar (s, start, length, target);
1661                 }
1662
1663                 // Searches target char w/o checking contractions
1664                 int LastIndexOfPrimitiveChar (string s, int start, int length, char target)
1665                 {
1666                         int ti = FilterOptions ((int) target);
1667                         charSortKey [0] = Category (ti);
1668                         charSortKey [1] = Level1 (ti);
1669                         if (!ignoreNonSpace)
1670                                 // FIXME: pass ExtenderType
1671                                 charSortKey [2] = Level2 (ti, ExtenderType.None);
1672                         charSortKey [3] = Uni.Level3 (ti);
1673                         return LastIndexOfSortKey (s, start, length, charSortKey, target, ti, !Uni.HasSpecialWeight ((char) ti));
1674                 }
1675
1676                 // Searches target byte[] keydata
1677                 int LastIndexOfSortKey (string s, int start, int length, byte [] sortkey, char target, int ti, bool noLv4)
1678                 {
1679                         int end = start - length;
1680
1681                         for (int idx = start; idx > end; idx--) {
1682                                 int cur = idx;
1683                                 if (Matches (s, ref idx, end, ti, target, sortkey, noLv4, true))
1684                                         return cur;
1685                         }
1686                         return -1;
1687                 }
1688
1689                 // Searches string. Search head character (or keydata when
1690                 // the head is contraction sortkey) and try IsPrefix().
1691                 int LastIndexOf (string s, string target, int start, int length, bool stringSort)
1692                 {
1693                         int orgStart = start;
1694                         int tidx = 0;
1695                         for (; tidx < target.Length; tidx++)
1696                                 if (!IsIgnorable (target [tidx]))
1697                                         break;
1698                         if (tidx == target.Length)
1699                                 return start;
1700                         Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1701                         byte [] sortkey = ct != null ? ct.SortKey : null;
1702                         string replace = ct != null ? ct.Replacement : null;
1703
1704                         do {
1705                                 int idx = 0;
1706                                 if (sortkey != null)
1707                                         idx = LastIndexOfSortKey (s, start, length, ct.SortKey, char.MinValue, -1, true);
1708                                 else if (replace != null)
1709                                         idx = LastIndexOf (s, replace, start, length, stringSort);
1710                                 else
1711                                         idx = LastIndexOfPrimitiveChar (s, start, length, target [tidx]);
1712
1713                                 if (idx < 0)
1714                                         return -1;
1715                                 if (IsPrefix (s, target, idx, orgStart - idx + 1, stringSort)) {
1716                                         for (;idx < orgStart; idx++)
1717                                                 if (!IsIgnorable (s [idx]))
1718                                                         break;
1719                                         return idx;
1720                                 }
1721                                 Contraction cts = GetContraction (s, idx, orgStart - idx + 1);
1722                                 if (cts != null) {
1723                                         start -= cts.Source.Length;
1724                                         length -= cts.Source.Length;
1725                                 } else {
1726                                         start--;
1727                                         length--;
1728                                 }
1729                         } while (length > 0);
1730                         return -1;
1731                 }
1732
1733                 private bool Matches (string s, ref int idx, int end, int ti, char target, byte [] sortkey, bool noLv4, bool lastIndexOf)
1734                 {
1735                         switch (char.GetUnicodeCategory (s [idx])) {
1736                         case UnicodeCategory.PrivateUse:
1737                         case UnicodeCategory.Surrogate:
1738                                 if (s [idx] != target)
1739                                         return false;
1740                                 return true;
1741                         }
1742
1743                         char sc = char.MinValue;
1744                         Contraction ct = GetContraction (s, idx, end);
1745                         // if lv4 exists, it never matches contraction
1746                         if (ct != null && noLv4) {
1747                                 if (lastIndexOf)
1748                                         idx -= ct.Source.Length - 1;
1749                                 else
1750                                         idx += ct.Source.Length - 1;
1751                                 if (ct.SortKey != null) {
1752                                         for (int i = 0; i < sortkey.Length; i++)
1753                                                 if (ct.SortKey [i] != sortkey [i])
1754                                                         return false;
1755                                         return true;
1756                                 }
1757                                 // Here is the core of LAMESPEC
1758                                 // described at the top of the source.
1759                                 sc = ct.Replacement [0];
1760                         }
1761                         else
1762                                 sc = s [idx];
1763
1764                         if (sc == target)
1765                                 return true;
1766                         int si = FilterOptions ((int) sc);
1767                         if (Category (si) != sortkey [0] ||
1768                                 Level1 (si) != sortkey [1] ||
1769                                 // FIXME: pass ExtenderType
1770                                 !ignoreNonSpace && Level2 (si, ExtenderType.None) != sortkey [2] ||
1771                                 Uni.Level3 (si) != sortkey [3])
1772                                 return false;
1773                         if (noLv4 && !Uni.HasSpecialWeight ((char) si))
1774                                 return true;
1775                         else if (noLv4)
1776                                 return false;
1777                         if (Uni.IsJapaneseSmallLetter ((char) si) !=
1778                                 Uni.IsJapaneseSmallLetter ((char) ti) ||
1779                                 Uni.GetJapaneseDashType ((char) si) !=
1780                                 Uni.GetJapaneseDashType ((char) ti) ||
1781                                 !Uni.IsHiragana ((char) si) !=
1782                                 !Uni.IsHiragana ((char) ti) ||
1783                                 IsHalfKana ((char) si) !=
1784                                 IsHalfKana ((char) ti))
1785                                 return false;
1786                         return true;
1787                 }
1788
1789                 #endregion
1790         }
1791 }