2005-07-27 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / SimpleCollator.cs
1 //
2 // SimpleCollator.cs
3 //
4 // This class will demonstrate CompareInfo functionality that will just work.
5 //
6
7 //
8 // Here's a summary for supporting contractions, expansions and diacritical 
9 // remappings.
10 //
11 // Diacritical mapping is a simple tailoring rule that "remaps" diacritical
12 // weight value from one to another. For now it applies to all range of
13 // characters, but at some stage we might need to limit the remapping ranges.
14 //
15 // A Contraction consists of a string (actually char[]) and a sortkey entry
16 // (i.e. byte[]). It indicates that a sequence of characters is interpreted
17 // as a single character that has the mapped sortkey value. There is no
18 // character which goes across "special rules". When the collator encountered
19 // such a sequence of characters, it just returns the sortkey value without
20 // further replacement.
21 //
22 // Since it is still likely to happen that a contraction sequence matches
23 // other character than the identical sequence (depending on CompareOptions
24 // and of course, defined sortkey value itself), comparison cannot be done
25 // at source char[] level.
26 //
27 // In IndexOf(), the first character in the target string or the target char
28 // itself is turned into sortkey bytes. If the character has a contraction and
29 // that is sortkey map, then it is used instead. If the contraction exists and
30 // that is replacement map, then the first character of the replacement string
31 // is searched instead. IndexOf() always searches only for the top character,
32 // and if it returned negative value, then it returns -1. Otherwise, it then
33 // tries IsPrefix() from that location. If it returns true, then it returns
34 // the index.
35 //
36 // LAMESPEC: IndexOf() is lame as a whole API. It never matches in the middle
37 // of expansion and there is no proper way to return such indexes within
38 // a single int return value.
39 //
40 // For example, try below in .NET:
41 //      IndexOf("\u00E6", "a")
42 //      IndexOf("\u00E6", "e")
43 //
44
45
46 using System;
47 using System.Collections;
48 using System.Globalization;
49
50 using Uni = Mono.Globalization.Unicode.MSCompatUnicodeTable;
51 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
52
53 namespace Mono.Globalization.Unicode
54 {
55         internal class SimpleCollator
56         {
57                 static SimpleCollator invariant =
58                         new SimpleCollator (CultureInfo.InvariantCulture);
59
60                 SortKeyBuffer buf;
61                 // CompareOptions expanded.
62                 bool ignoreNonSpace; // used in IndexOf()
63                 bool ignoreSymbols;
64                 bool ignoreWidth;
65                 bool ignoreCase;
66                 bool ignoreKanaType;
67                 TextInfo textInfo; // for ToLower().
68                 bool frenchSort;
69                 unsafe readonly byte* cjkCatTable;
70                 unsafe readonly byte* cjkLv1Table;
71                 readonly CodePointIndexer cjkIndexer;
72                 unsafe readonly byte* cjkLv2Table;
73                 readonly CodePointIndexer cjkLv2Indexer;
74                 readonly int lcid;
75                 readonly Contraction [] contractions;
76                 readonly Level2Map [] level2Maps;
77
78                 // This flag marks characters as "unsafe", where the character
79                 // could be used as part of a contraction (whose length > 1).
80                 readonly byte [] unsafeFlags;
81
82                 const int UnsafeFlagLength = 0x300 / 8;
83
84                 // temporary sortkey buffer for index search/comparison
85                 byte [] charSortKey = new byte [4];
86                 byte [] charSortKey2 = new byte [4];
87                 byte [] charSortKeyIndexTarget = new byte [4];
88
89                 #region .ctor() and split functions
90
91                 public SimpleCollator (CultureInfo culture)
92                 {
93                         lcid = culture.LCID;
94                         textInfo = culture.TextInfo;
95                         buf = new SortKeyBuffer (culture.LCID);
96
97                         unsafe {
98                                 SetCJKTable (culture, ref cjkIndexer,
99                                         ref cjkCatTable, ref cjkLv1Table,
100                                         ref cjkLv2Indexer, ref cjkLv2Table);
101                         }
102
103                         // Get tailoring info
104                         TailoringInfo t = null;
105                         for (CultureInfo ci = culture; ci.LCID != 127; ci = ci.Parent) {
106                                 t = Uni.GetTailoringInfo (ci.LCID);
107                                 if (t != null)
108                                         break;
109                         }
110                         if (t == null) // then use invariant
111                                 t = Uni.GetTailoringInfo (127);
112
113                         frenchSort = t.FrenchSort;
114                         Uni.BuildTailoringTables (culture, t, ref contractions,
115                                 ref level2Maps);
116                         unsafeFlags = new byte [UnsafeFlagLength];
117                         foreach (Contraction c in contractions)
118                                 if (c.Source.Length > 1)
119                                         foreach (char ch in c.Source)
120                                                 unsafeFlags [(int) ch / 8 ]
121                                                         |= (byte) ((int) ch % 8);
122                         if (lcid != 127)
123                                 foreach (Contraction c in invariant.contractions)
124                                         if (c.Source.Length > 1)
125                                                 foreach (char ch in c.Source)
126                                                         unsafeFlags [(int) ch / 8 ] 
127                                                                 |= (byte) ((int) ch % 8);
128
129                         // FIXME: Since tailorings are mostly for latin
130                         // (and in some cases Cyrillic) characters, it would
131                         // be much better for performance to store "start 
132                         // indexes" for > 370 (culture-specific letters).
133
134 /*
135 // dump tailoring table
136 Console.WriteLine ("******** building table for {0} : c - {1} d - {2}",
137 culture.LCID, contractions.Length, level2Maps.Length);
138 foreach (Contraction c in contractions) {
139 foreach (char cc in c.Source)
140 Console.Write ("{0:X4} ", (int) cc);
141 Console.WriteLine (" -> '{0}'", c.Replacement);
142 }
143 */
144                 }
145
146                 unsafe private void SetCJKTable (
147                         CultureInfo culture, ref CodePointIndexer cjkIndexer,
148                         ref byte* catTable, ref byte* lv1Table,
149                         ref CodePointIndexer lv2Indexer, ref byte* lv2Table)
150                 {
151                         string name = GetNeutralCulture (culture).Name;
152
153                         Uni.FillCJK (name, ref cjkIndexer, ref catTable,
154                                 ref lv1Table, ref lv2Indexer, ref lv2Table);
155                 }
156
157                 static CultureInfo GetNeutralCulture (CultureInfo info)
158                 {
159                         CultureInfo ret = info;
160                         while (ret.Parent != null && ret.Parent.LCID != 127)
161                                 ret = ret.Parent;
162                         return ret;
163                 }
164
165                 #endregion
166
167                 unsafe byte Category (int cp)
168                 {
169                         if (cp < 0x3000 || cjkCatTable == null)
170                                 return Uni.Category (cp);
171                         int idx = cjkIndexer.ToIndex (cp);
172                         return idx < 0 ? Uni.Category (cp) : cjkCatTable [idx];
173                 }
174
175                 unsafe byte Level1 (int cp)
176                 {
177                         if (cp < 0x3000 || cjkLv1Table == null)
178                                 return Uni.Level1 (cp);
179                         int idx = cjkIndexer.ToIndex (cp);
180                         return idx < 0 ? Uni.Level1 (cp) : cjkLv1Table [idx];
181                 }
182
183                 unsafe byte Level2 (int cp, ExtenderType ext)
184                 {
185                         if (ext == ExtenderType.Buggy)
186                                 return 5;
187                         else if (ext == ExtenderType.Conditional)
188                                 return 0;
189
190                         if (cp < 0x3000 || cjkLv2Table == null)
191                                 return Uni.Level2 (cp);
192                         int idx = cjkLv2Indexer.ToIndex (cp);
193                         byte ret = idx < 0 ? (byte) 0 : cjkLv2Table [idx];
194                         if (ret != 0)
195                                 return ret;
196                         ret = Uni.Level2 (cp);
197                         if (level2Maps.Length == 0)
198                                 return ret;
199                         for (int i = 0; i < level2Maps.Length; i++) {
200                                 if (level2Maps [i].Source == ret)
201                                         return level2Maps [i].Replace;
202                                 else if (level2Maps [i].Source > ret)
203                                         break;
204                         }
205                         return ret;
206                 }
207
208                 bool IsHalfKana (int cp)
209                 {
210                         return ignoreWidth || Uni.IsHalfWidthKana ((char) cp);
211                 }
212
213                 void SetOptions (CompareOptions options)
214                 {
215                         this.ignoreNonSpace = (options & CompareOptions.IgnoreNonSpace) != 0;
216                         this.ignoreSymbols = (options & CompareOptions.IgnoreSymbols) != 0;
217                         this.ignoreWidth = (options & CompareOptions.IgnoreWidth) != 0;
218                         this.ignoreCase = (options & CompareOptions.IgnoreCase) != 0;
219                         this.ignoreKanaType = (options & CompareOptions.IgnoreKanaType) != 0;
220                         previousChar = previousChar2 = -1;
221                         previousSortKey = previousSortKey2 = null;
222                         escape1.Source = escape2.Source = null;
223                 }
224
225                 Contraction GetContraction (string s, int start, int end)
226                 {
227                         Contraction c = GetContraction (s, start, end, contractions);
228                         if (c != null || lcid == 127)
229                                 return c;
230                         return GetContraction (s, start, end, invariant.contractions);
231                 }
232
233                 Contraction GetContraction (string s, int start, int end, Contraction [] clist)
234                 {
235                         for (int i = 0; i < clist.Length; i++) {
236                                 Contraction ct = clist [i];
237                                 int diff = ct.Source [0] - s [start];
238                                 if (diff > 0)
239                                         return null; // it's already sorted
240                                 else if (diff < 0)
241                                         continue;
242                                 char [] chars = ct.Source;
243                                 if (end - start < chars.Length)
244                                         continue;
245                                 bool match = true;
246                                 for (int n = 0; n < chars.Length; n++)
247                                         if (s [start + n] != chars [n]) {
248                                                 match = false;
249                                                 break;
250                                         }
251                                 if (match)
252                                         return ct;
253                         }
254                         return null;
255                 }
256
257                 Contraction GetTailContraction (string s, int start, int end)
258                 {
259                         Contraction c = GetTailContraction (s, start, end, contractions);
260                         if (c != null || lcid == 127)
261                                 return c;
262                         return GetTailContraction (s, start, end, invariant.contractions);
263                 }
264
265                 Contraction GetTailContraction (string s, int start, int end, Contraction [] clist)
266                 {
267                         for (int i = 0; i < clist.Length; i++) {
268                                 Contraction ct = clist [i];
269                                 int diff = ct.Source [0] - s [end];
270                                 if (diff > 0)
271                                         return null; // it's already sorted
272                                 else if (diff < 0)
273                                         continue;
274                                 char [] chars = ct.Source;
275                                 if (start - end + 1 < chars.Length)
276                                         continue;
277                                 bool match = true;
278                                 int offset = start - chars.Length + 1;
279                                 for (int n = 0; n < chars.Length; n++)
280                                         if (s [offset + n] != chars [n]) {
281                                                 match = false;
282                                                 break;
283                                         }
284                                 if (match)
285                                         return ct;
286                         }
287                         return null;
288                 }
289
290                 Contraction GetContraction (char c)
291                 {
292                         Contraction ct = GetContraction (c, contractions);
293                         if (ct != null || lcid == 127)
294                                 return ct;
295                         return GetContraction (c, invariant.contractions);
296                 }
297
298                 Contraction GetContraction (char c, Contraction [] clist)
299                 {
300                         for (int i = 0; i < clist.Length; i++) {
301                                 Contraction ct = clist [i];
302                                 if (ct.Source [0] > c)
303                                         return null; // it's already sorted
304                                 if (ct.Source [0] == c && ct.Source.Length == 1)
305                                         return ct;
306                         }
307                         return null;
308                 }
309
310                 int FilterOptions (int i)
311                 {
312                         if (ignoreWidth) {
313                                 int x = Uni.ToWidthCompat (i);
314                                 if (x != 0)
315                                         i = x;
316                         }
317                         if (ignoreCase)
318                                 i = textInfo.ToLower ((char) i);
319                         if (ignoreKanaType)
320                                 i = Uni.ToKanaTypeInsensitive (i);
321                         return i;
322                 }
323
324                 int previousChar = -1;
325                 byte [] previousSortKey = null;
326                 int previousChar2 = -1;
327                 byte [] previousSortKey2 = null;
328
329                 enum ExtenderType {
330                         None,
331                         Simple,
332                         Voiced,
333                         Conditional,
334                         Buggy,
335                 }
336
337                 ExtenderType GetExtenderType (int i)
338                 {
339                         // LAMESPEC: Windows expects true for U+3005, but 
340                         // sometimes it does not represent to repeat just
341                         // one character.
342                         // Windows also expects true for U+3031 and U+3032,
343                         // but they should *never* repeat one character.
344
345                         // U+2015 becomes an extender only when it is Japanese
346                         if (i == 0x2015)
347                                 return lcid == 16 ? ExtenderType.Conditional :
348                                         ExtenderType.None;
349
350                         if (i < 0x3005 || i > 0xFF70)
351                                 return ExtenderType.None;
352                         if (i >= 0xFE7C) {
353                                 switch (i) {
354                                 case 0xFE7C:
355                                 case 0xFE7D:
356                                         return ExtenderType.Simple;
357                                 case 0xFF70:
358                                         return ExtenderType.Conditional;
359                                 case 0xFF9E:
360                                 case 0xFF9F:
361                                         return ExtenderType.Voiced;
362                                 }
363                         }
364                         if (i > 0x30FE)
365                                 return ExtenderType.None;
366                         switch (i) {
367                         case 0x3005: // LAMESPEC: see above.
368                                 return ExtenderType.Buggy;
369                         case 0x3031: // LAMESPEC: see above.
370                         case 0x3032: // LAMESPEC: see above.
371                         case 0x309D:
372                         case 0x30FD:
373                                 return ExtenderType.Simple;
374                         case 0x309E:
375                         case 0x30FE:
376                                 return ExtenderType.Voiced;
377                         case 0x30FC:
378                                 return ExtenderType.Conditional;
379                         default:
380                                 return ExtenderType.None;
381                         }
382                 }
383
384                 byte ToDashTypeValue (ExtenderType ext)
385                 {
386                         if (ignoreNonSpace) // LAMESPEC: huh, why?
387                                 return 3;
388                         switch (ext) {
389                         case ExtenderType.None:
390                                 return 3;
391                         case ExtenderType.Conditional:
392                                 return 5;
393                         default:
394                                 return 4;
395                         }
396                 }
397
398                 int FilterExtender (int i, ExtenderType ext)
399                 {
400                         if (ext == ExtenderType.Conditional &&
401                                 Uni.HasSpecialWeight ((char) i)) {
402                                 bool half = IsHalfKana ((char) i);
403                                 bool katakana = !Uni.IsHiragana ((char) i);
404                                 switch (Level1 (i) & 7) {
405                                 case 2:
406                                         return half ? 0xFF71 : katakana ? 0x30A2 : 0x3042;
407                                 case 3:
408                                         return half ? 0xFF72 : katakana ? 0x30A4 : 0x3044;
409                                 case 4:
410                                         return half ? 0xFF73 : katakana ? 0x30A6 : 0x3046;
411                                 case 5:
412                                         return half ? 0xFF74 : katakana ? 0x30A8 : 0x3048;
413                                 case 6:
414                                         return half ? 0xFF75 : katakana ? 0x30AA : 0x304A;
415                                 }
416                         }
417                         return i;
418                 }
419
420                 bool IsIgnorable (int i)
421                 {
422                         return Uni.IsIgnorable (i) ||
423                                 ignoreSymbols && Uni.IsIgnorableSymbol (i) ||
424                                 ignoreNonSpace && Uni.IsIgnorableNonSpacing (i);
425                 }
426
427                 bool IsSafe (int i)
428                 {
429                         return i / 8 >= unsafeFlags.Length ? true : (unsafeFlags [i / 8] & (1 << (i % 8))) == 0;
430                 }
431
432                 #region GetSortKey()
433
434                 public SortKey GetSortKey (string s)
435                 {
436                         return GetSortKey (s, CompareOptions.None);
437                 }
438
439                 public SortKey GetSortKey (string s, CompareOptions options)
440                 {
441                         return GetSortKey (s, 0, s.Length, options);
442                 }
443
444                 public SortKey GetSortKey (string s, int start, int length, CompareOptions options)
445                 {
446                         SetOptions (options);
447
448                         buf.Initialize (options, lcid, s, frenchSort);
449                         int end = start + length;
450                         GetSortKey (s, start, end);
451                         return buf.GetResultAndReset ();
452                 }
453
454                 void GetSortKey (string s, int start, int end)
455                 {
456                         for (int n = start; n < end; n++) {
457                                 int i = s [n];
458
459                                 ExtenderType ext = GetExtenderType (i);
460                                 if (ext != ExtenderType.None) {
461                                         i = FilterExtender (previousChar, ext);
462                                         if (i >= 0)
463                                                 FillSortKeyRaw (i, ext);
464                                         else if (previousSortKey != null) {
465                                                 byte [] b = previousSortKey;
466                                                 buf.AppendNormal (
467                                                         b [0],
468                                                         b [1],
469                                                         b [2] != 1 ? b [2] : Level2 (i, ext),
470                                                         b [3] != 1 ? b [3] : Uni.Level3 (i));
471                                         }
472                                         // otherwise do nothing.
473                                         // (if the extender is the first char
474                                         // in the string, then just ignore.)
475                                         continue;
476                                 }
477
478                                 if (IsIgnorable (i))
479                                         continue;
480                                 i = FilterOptions (i);
481
482                                 Contraction ct = GetContraction (s, n, end);
483                                 if (ct != null) {
484                                         if (ct.Replacement != null) {
485                                                 GetSortKey (ct.Replacement, 0, ct.Replacement.Length);
486                                         } else {
487                                                 byte [] b = ct.SortKey;
488                                                 buf.AppendNormal (
489                                                         b [0],
490                                                         b [1],
491                                                         b [2] != 1 ? b [2] : Level2 (i, ext),
492                                                         b [3] != 1 ? b [3] : Uni.Level3 (i));
493                                                 previousSortKey = b;
494                                                 previousChar = -1;
495                                         }
496                                         n += ct.Source.Length - 1;
497                                 }
498                                 else {
499                                         if (!Uni.IsIgnorableNonSpacing (i))
500                                                 previousChar = i;
501                                         FillSortKeyRaw (i, ExtenderType.None);
502                                 }
503                         }
504                 }
505
506                 void FillSortKeyRaw (int i, ExtenderType ext)
507                 {
508                         if (0x3400 <= i && i <= 0x4DB5) {
509                                 int diff = i - 0x3400;
510                                 buf.AppendCJKExtension (
511                                         (byte) (0x10 + diff / 254),
512                                         (byte) (diff % 254 + 2));
513                                 return;
514                         }
515
516                         UnicodeCategory uc = char.GetUnicodeCategory ((char) i);
517                         switch (uc) {
518                         case UnicodeCategory.PrivateUse:
519                                 int diff = i - 0xE000;
520                                 buf.AppendNormal (
521                                         (byte) (0xE5 + diff / 254),
522                                         (byte) (diff % 254 + 2),
523                                         0,
524                                         0);
525                                 return;
526                         case UnicodeCategory.Surrogate:
527                                 FillSurrogateSortKeyRaw (i);
528                                 return;
529                         }
530
531                         byte level2 = Level2 (i, ext);
532                         if (Uni.HasSpecialWeight ((char) i)) {
533                                 byte level1 = Level1 (i);
534                                 buf.AppendKana (
535                                         Category (i),
536                                         level1,
537                                         level2,
538                                         Uni.Level3 (i),
539                                         Uni.IsJapaneseSmallLetter ((char) i),
540                                         ToDashTypeValue (ext),
541                                         !Uni.IsHiragana ((char) i),
542                                         IsHalfKana ((char) i)
543                                         );
544                                 if (!ignoreNonSpace && ext == ExtenderType.Voiced)
545                                         // Append voice weight
546                                         buf.AppendNormal (1, 1, 1, 0);
547                         }
548                         else
549                                 buf.AppendNormal (
550                                         Category (i),
551                                         Level1 (i),
552                                         level2,
553                                         Uni.Level3 (i));
554                 }
555
556                 void FillSurrogateSortKeyRaw (int i)
557                 {
558                         int diffbase = 0;
559                         int segment = 0;
560                         byte lower = 0;
561
562                         if (i < 0xD840) {
563                                 diffbase = 0xD800;
564                                 segment = 0x41;
565                                 lower = (byte) ((i == 0xD800) ? 0x3E : 0x3F);
566                         } else if (0xD840 <= i && i < 0xD880) {
567                                 diffbase = 0xD840;
568                                 segment = 0xF2;
569                                 lower = 0x3E;
570                         } else if (0xDB80 <= i && i < 0xDC00) {
571                                 diffbase = 0xDB80 - 0x40;
572                                 segment = 0xFE;
573                                 lower = 0x3E;
574                         } else {
575                                 diffbase = 0xDC00 - 0xF8 + 2;
576                                 segment = 0x41;
577                                 lower = 0x3F;
578                         }
579                         int diff = i - diffbase;
580
581                         buf.AppendNormal (
582                                 (byte) (segment + diff / 254),
583                                 (byte) (diff % 254 + 2),
584                                 lower,
585                                 lower);
586                 }
587
588                 #endregion
589
590                 #region Compare()
591
592                 public int Compare (string s1, string s2)
593                 {
594                         return Compare (s1, s2, CompareOptions.None);
595                 }
596
597                 public int Compare (string s1, string s2, CompareOptions options)
598                 {
599                         return Compare (s1, 0, s1.Length, s2, 0, s2.Length, options);
600                 }
601
602                 class Escape
603                 {
604                         public string Source;
605                         public int Index;
606                         public int Start;
607                         public int End;
608                         public int Optional;
609                 }
610
611                 // Those instances are reused not to invoke instantiation
612                 // during Compare().
613                 Escape escape1 = new Escape ();
614                 Escape escape2 = new Escape ();
615
616                 private int CompareOrdinal (string s1, int idx1, int len1,
617                         string s2, int idx2, int len2)
618                 {
619                         int min = len1 < len2 ? len1 : len2;
620                         int end1 = idx1 + min;
621                         int end2 = idx2 + min;
622                         for (int i1 = idx1, i2 = idx2;
623                                 i1 < end1 && i2 < end2; i1++, i2++)
624                                 if (s1 [i1] != s2 [i2])
625                                         return s1 [i1] - s2 [i2];
626                         return len1 == len2 ? 0 :
627                                 len1 == min ? - 1 : 1;
628                 }
629
630                 public int Compare (string s1, int idx1, int len1,
631                         string s2, int idx2, int len2, CompareOptions options)
632                 {
633                         // quick equality check
634                         if (idx1 == idx2 && len1 == len2 &&
635                                 Object.ReferenceEquals (s1, s2))
636                                 return 0;
637                         // FIXME: this should be done inside Compare() at
638                         // any time.
639 //                      int ord = CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
640 //                      if (ord == 0)
641 //                              return 0;
642                         if (options == CompareOptions.Ordinal)
643                                 return CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
644
645 #if false // stable easy version, depends on GetSortKey().
646                         SortKey sk1 = GetSortKey (s1, idx1, len1, options);
647                         SortKey sk2 = GetSortKey (s2, idx2, len2, options);
648                         byte [] d1 = sk1.KeyData;
649                         byte [] d2 = sk2.KeyData;
650                         int len = d1.Length > d2.Length ? d2.Length : d1.Length;
651                         for (int i = 0; i < len; i++)
652                                 if (d1 [i] != d2 [i])
653                                         return d1 [i] < d2 [i] ? -1 : 1;
654                         return d1.Length == d2.Length ? 0 : d1.Length < d2.Length ? -1 : 1;
655 #else
656                         SetOptions (options);
657                         bool dummy, dummy2;
658                         int ret = CompareInternal (s1, idx1, len1, s2, idx2, len2, (options & CompareOptions.StringSort) != 0, out dummy, out dummy2, true);
659                         return ret == 0 ? 0 : ret < 0 ? -1 : 1;
660 #endif
661                 }
662
663                 int CompareInternal (string s1, int idx1, int len1, string s2,
664                         int idx2, int len2, bool stringSort,
665                         out bool targetConsumed, out bool sourceConsumed,
666                         bool skipHeadingExtenders)
667                 {
668                         int start1 = idx1;
669                         int start2 = idx2;
670                         int end1 = idx1 + len1;
671                         int end2 = idx2 + len2;
672                         targetConsumed = false;
673                         sourceConsumed = false;
674
675                         // It holds final result that comes from the comparison
676                         // at level 2 or lower. Even if Compare() found the
677                         // difference at level 2 or lower, it still has to
678                         // continue level 1 comparison. FinalResult is used
679                         // when there was no further differences.
680                         int finalResult = 0;
681                         // It holds the comparison level to do. It starts from
682                         // 5, and becomes 1 when we do primary-only comparison.
683                         int currentLevel = 5;
684
685                         int lv5At1 = -1;
686                         int lv5At2 = -1;
687                         int lv5Value1 = 0;
688                         int lv5Value2 = 0;
689
690                         // Skip heading extenders
691                         if (skipHeadingExtenders) {
692                                 for (; idx1 < end1; idx1++)
693                                         if (GetExtenderType (s1 [idx1]) == ExtenderType.None)
694                                                 break;
695                                 for (; idx2 < end2; idx2++)
696                                         if (GetExtenderType (s2 [idx2]) == ExtenderType.None)
697                                                 break;
698                         }
699
700                         ExtenderType ext1 = ExtenderType.None;
701                         ExtenderType ext2 = ExtenderType.None;
702
703                         int quickCheckPos1 = idx1;
704                         int quickCheckPos2 = idx2;
705
706                         while (true) {
707                                 for (; idx1 < end1; idx1++)
708                                         if (!IsIgnorable (s1 [idx1]))
709                                                 break;
710                                 for (; idx2 < end2; idx2++)
711                                         if (!IsIgnorable (s2 [idx2]))
712                                                 break;
713
714                                 if (idx1 >= end1) {
715                                         if (escape1.Source == null)
716                                                 break;
717                                         s1 = escape1.Source;
718                                         start1 = escape1.Start;
719                                         idx1 = escape1.Index;
720                                         end1 = escape1.End;
721                                         quickCheckPos1 = escape1.Optional;
722                                         escape1.Source = null;
723                                         continue;
724                                 }
725                                 if (idx2 >= end2) {
726                                         if (escape2.Source == null)
727                                                 break;
728                                         s2 = escape2.Source;
729                                         start2 = escape2.Start;
730                                         idx2 = escape2.Index;
731                                         end2 = escape2.End;
732                                         quickCheckPos2 = escape2.Optional;
733                                         escape2.Source = null;
734                                         continue;
735                                 }
736 #if true
737 // If comparison is unstable, then this part is one of the most doubtful part.
738 // Here we run quick codepoint comparison and run back to "stable" area to
739 // compare characters.
740
741                                 // Strictly to say, even the first character
742                                 // could be compared here, but it messed
743                                 // backward step, so I just avoided mess.
744                                 if (quickCheckPos1 < idx1 && quickCheckPos2 < idx2) {
745                                         while (idx1 < end1 && idx2 < end2 &&
746                                                 s1 [idx1] == s2 [idx2]) {
747                                                 idx1++;
748                                                 idx2++;
749                                         }
750                                         if (idx1 == end1 || idx2 == end2)
751                                                 continue; // check replacement
752
753                                         int backwardEnd1 = quickCheckPos1;
754                                         int backwardEnd2 = quickCheckPos2;
755                                         quickCheckPos1 = idx1;
756                                         quickCheckPos2 = idx2;
757
758                                         idx1--;
759                                         idx2--;
760
761                                         for (;idx1 > backwardEnd1; idx1--)
762                                                 if (Category (s1 [idx1]) != 1)
763                                                         break;
764                                         for (;idx2 > backwardEnd2; idx2--)
765                                                 if (Category (s2 [idx2]) != 1)
766                                                         break;
767                                         for (;idx1 > backwardEnd1; idx1--)
768                                                 if (IsSafe (s1 [idx1]))
769                                                         break;
770                                         for (;idx2 > backwardEnd2; idx2--)
771                                                 if (IsSafe (s2 [idx2]))
772                                                         break;
773                                 }
774 #endif
775
776                                 int cur1 = idx1;
777                                 int cur2 = idx2;
778                                 byte [] sk1 = null;
779                                 byte [] sk2 = null;
780                                 int i1 = FilterOptions (s1 [idx1]);
781                                 int i2 = FilterOptions (s2 [idx2]);
782                                 bool special1 = false;
783                                 bool special2 = false;
784
785                                 // If current character is an expander, then
786                                 // repeat the previous character.
787                                 ext1 = GetExtenderType (i1);
788                                 if (ext1 != ExtenderType.None) {
789                                         if (previousChar < 0) {
790                                                 if (previousSortKey == null) {
791                                                         // nothing to extend
792                                                         idx1++;
793                                                         continue;
794                                                 }
795                                                 sk1 = previousSortKey;
796                                         }
797                                         else
798                                                 i1 = FilterExtender (previousChar, ext1);
799                                 }
800                                 ext2 = GetExtenderType (i2);
801                                 if (ext2 != ExtenderType.None) {
802                                         if (previousChar2 < 0) {
803                                                 if (previousSortKey2 == null) {
804                                                         // nothing to extend
805                                                         idx2++;
806                                                         continue;
807                                                 }
808                                                 sk2 = previousSortKey2;
809                                         }
810                                         else
811                                                 i2 = FilterExtender (previousChar2, ext2);
812                                 }
813
814                                 byte cat1 = Category (i1);
815                                 byte cat2 = Category (i2);
816
817                                 // Handle special weight characters
818                                 if (!stringSort && currentLevel > 4) {
819                                         if (cat1 == 6) {
820                                                 lv5At1 = escape1.Source != null ?
821                                                         escape1.Index - escape1.Start :
822                                                         cur1 - start1;
823                                                 // here Windows has a bug that it does
824                                                 // not consider thirtiary weight.
825                                                 lv5Value1 = Level1 (i1) << 8 + Uni.Level3 (i1);
826                                                 previousChar = i1;
827                                                 idx1++;
828                                         }
829                                         if (cat2 == 6) {
830                                                 lv5At2 = escape2.Source != null ?
831                                                         escape2.Index - escape2.Start :
832                                                         cur2 - start2;
833                                                 // here Windows has a bug that it does
834                                                 // not consider thirtiary weight.
835                                                 lv5Value2 = Level1 (i2) << 8 + Uni.Level3 (i2);
836                                                 previousChar2 = i2;
837                                                 idx2++;
838                                         }
839                                         if (cat1 == 6 || cat2 == 6) {
840                                                 currentLevel = 4;
841                                                 continue;
842                                         }
843                                 }
844
845                                 Contraction ct1 = null;
846                                 if (ext1 == ExtenderType.None)
847                                         ct1 = GetContraction (s1, idx1, end1);
848
849                                 int offset1 = 1;
850                                 if (sk1 != null)
851                                         offset1 = 1;
852                                 else if (ct1 != null) {
853                                         offset1 = ct1.Source.Length;
854                                         if (ct1.SortKey != null) {
855                                                 sk1 = charSortKey;
856                                                 for (int i = 0; i < ct1.SortKey.Length; i++)
857                                                         sk1 [i] = ct1.SortKey [i];
858                                                 previousChar = -1;
859                                                 previousSortKey = sk1;
860                                         }
861                                         else if (escape1.Source == null) {
862                                                 escape1.Source = s1;
863                                                 escape1.Start = start1;
864                                                 escape1.Index = cur1 + ct1.Source.Length;
865                                                 escape1.End = end1;
866                                                 escape1.Optional = quickCheckPos1;
867                                                 s1 = ct1.Replacement;
868                                                 idx1 = 0;
869                                                 start1 = 0;
870                                                 end1 = s1.Length;
871                                                 quickCheckPos1 = 0;
872                                                 continue;
873                                         }
874                                 }
875                                 else {
876                                         sk1 = charSortKey;
877                                         sk1 [0] = cat1;
878                                         sk1 [1] = Level1 (i1);
879                                         if (!ignoreNonSpace && currentLevel > 1)
880                                                 sk1 [2] = Level2 (i1, ext1);
881                                         if (currentLevel > 2)
882                                                 sk1 [3] = Uni.Level3 (i1);
883                                         if (currentLevel > 3)
884                                                 special1 = Uni.HasSpecialWeight ((char) i1);
885                                         if (cat1 > 1)
886                                                 previousChar = i1;
887                                 }
888
889                                 Contraction ct2 = null;
890                                 if (ext2 == ExtenderType.None)
891                                         ct2 = GetContraction (s2, idx2, end2);
892
893                                 if (sk2 != null)
894                                         idx2++;
895                                 else if (ct2 != null) {
896                                         idx2 += ct2.Source.Length;
897                                         if (ct2.SortKey != null) {
898                                                 sk2 = charSortKey2;
899                                                 for (int i = 0; i < ct2.SortKey.Length; i++)
900                                                         sk2 [i] = ct2.SortKey [i];
901                                                 previousChar2 = -1;
902                                                 previousSortKey2 = sk2;
903                                         }
904                                         else if (escape2.Source == null) {
905                                                 escape2.Source = s2;
906                                                 escape2.Start = start2;
907                                                 escape2.Index = cur2 + ct2.Source.Length;
908                                                 escape2.End = end2;
909                                                 escape2.Optional = quickCheckPos2;
910                                                 s2 = ct2.Replacement;
911                                                 idx2 = 0;
912                                                 start2 = 0;
913                                                 end2 = s2.Length;
914                                                 quickCheckPos2 = 0;
915                                                 continue;
916                                         }
917                                 }
918                                 else {
919                                         sk2 = charSortKey2;
920                                         sk2 [0] = cat2;
921                                         sk2 [1] = Level1 (i2);
922                                         if (!ignoreNonSpace && currentLevel > 1)
923                                                 sk2 [2] = Level2 (i2, ext2);
924                                         if (currentLevel > 2)
925                                                 sk2 [3] = Uni.Level3 (i2);
926                                         if (currentLevel > 3)
927                                                 special2 = Uni.HasSpecialWeight ((char) i2);
928                                         if (cat2 > 1)
929                                                 previousChar2 = i2;
930                                         idx2++;
931                                 }
932
933                                 // Add offset here so that it does not skip
934                                 // idx1 while s2 has a replacement.
935                                 idx1 += offset1;
936
937                                 // add diacritical marks in s1 here
938                                 if (!ignoreNonSpace) {
939                                         while (idx1 < end1) {
940                                                 if (Category (s1 [idx1]) != 1)
941                                                         break;
942                                                 if (sk1 [2] == 0)
943                                                         sk1 [2] = 2;
944                                                 sk1 [2] = (byte) (sk1 [2] + 
945                                                         Level2 (s1 [idx1], ExtenderType.None));
946                                                 idx1++;
947                                         }
948
949                                         // add diacritical marks in s2 here
950                                         while (idx2 < end2) {
951                                                 if (Category (s2 [idx2]) != 1)
952                                                         break;
953                                                 if (sk2 [2] == 0)
954                                                         sk2 [2] = 2;
955                                                 sk2 [2] = (byte) (sk2 [2] + 
956                                                         Level2 (s2 [idx2], ExtenderType.None));
957                                                 idx2++;
958                                         }
959                                 }
960
961                                 int ret = sk1 [0] - sk2 [0];
962                                 ret = ret != 0 ? ret : sk1 [1] - sk2 [1];
963                                 if (ret != 0)
964                                         return ret;
965                                 if (currentLevel == 1)
966                                         continue;
967                                 if (!ignoreNonSpace) {
968                                         ret = sk1 [2] - sk2 [2];
969                                         if (ret != 0) {
970                                                 finalResult = ret;
971                                                 currentLevel = frenchSort ? 2 : 1;
972                                                 continue;
973                                         }
974                                 }
975                                 if (currentLevel == 2)
976                                         continue;
977                                 ret = sk1 [3] - sk2 [3];
978                                 if (ret != 0) {
979                                         finalResult = ret;
980                                         currentLevel = 2;
981                                         continue;
982                                 }
983                                 if (currentLevel == 3)
984                                         continue;
985                                 if (special1 != special2) {
986                                         finalResult = special1 ? 1 : -1;
987                                         currentLevel = 3;
988                                         continue;
989                                 }
990                                 if (special1) {
991                                         ret = CompareFlagPair (
992                                                 !Uni.IsJapaneseSmallLetter ((char) i1),
993                                                 !Uni.IsJapaneseSmallLetter ((char) i2));
994                                         ret = ret != 0 ? ret :
995                                                 ToDashTypeValue (ext1) -
996                                                 ToDashTypeValue (ext2);
997                                         ret = ret != 0 ? ret : CompareFlagPair (
998                                                 Uni.IsHiragana ((char) i1),
999                                                 Uni.IsHiragana ((char) i2));
1000                                         ret = ret != 0 ? ret : CompareFlagPair (
1001                                                 !IsHalfKana ((char) i1),
1002                                                 !IsHalfKana ((char) i2));
1003                                         if (ret != 0) {
1004                                                 finalResult = ret;
1005                                                 currentLevel = 3;
1006                                                 continue;
1007                                         }
1008                                 }
1009                         }
1010
1011                         // If there were only level 3 or lower differences,
1012                         // then we still have to find diacritical differences
1013                         // if any.
1014                         if (!ignoreNonSpace &&
1015                                 finalResult != 0 && currentLevel > 2) {
1016                                 while (idx1 < end1 && idx2 < end2) {
1017                                         if (!Uni.IsIgnorableNonSpacing (s1 [idx1]))
1018                                                 break;
1019                                         if (!Uni.IsIgnorableNonSpacing (s2 [idx2]))
1020                                                 break;
1021                                         finalResult = Level2 (FilterOptions ((s1 [idx1])), ext1) - Level2 (FilterOptions (s2 [idx2]), ext2);
1022                                         if (finalResult != 0)
1023                                                 break;
1024                                         idx1++;
1025                                         idx2++;
1026                                         // they should work only for the first character
1027                                         ext1 = ExtenderType.None;
1028                                         ext2 = ExtenderType.None;
1029                                 }
1030                         }
1031                         if (currentLevel == 1 && finalResult != 0) {
1032                                 while (idx1 < end1)
1033                                         if (Uni.IsIgnorableNonSpacing (s1 [idx1]))
1034                                                 idx1++;
1035                                 while (idx2 < end2)
1036                                         if (Uni.IsIgnorableNonSpacing (s2 [idx2]))
1037                                                 idx2++;
1038                         }
1039                         // we still have to handle level 5
1040                         if (finalResult == 0) {
1041                                 finalResult = lv5At1 - lv5At2;
1042                                 if (finalResult == 0)
1043                                         finalResult = lv5Value1 - lv5Value2;
1044                         }
1045                         if (finalResult == 0) {
1046                                 if (idx2 == end2)
1047                                         targetConsumed = true;
1048                                 if (idx1 == end1)
1049                                         sourceConsumed = true;
1050                         }
1051                         return idx1 != end1 ? 1 : idx2 == end2 ? finalResult : -1;
1052                 }
1053
1054                 int CompareFlagPair (bool b1, bool b2)
1055                 {
1056                         return b1 == b2 ? 0 : b1 ? 1 : -1;
1057                 }
1058
1059                 #endregion
1060
1061                 #region IsPrefix() and IsSuffix()
1062
1063                 public bool IsPrefix (string src, string target, CompareOptions opt)
1064                 {
1065                         return IsPrefix (src, target, 0, src.Length, opt);
1066                 }
1067
1068                 public bool IsPrefix (string s, string target, int start, int length, CompareOptions opt)
1069                 {
1070                         SetOptions (opt);
1071                         return IsPrefix (s, target, start, length, 
1072                                 (opt & CompareOptions.StringSort) != 0, true);
1073                 }
1074
1075                 public bool IsPrefix (string s, string target, int start, int length, bool stringSort, bool skipHeadingExtenders)
1076                 {
1077                         bool consumed, dummy;
1078                         int ret = CompareInternal (s, start, length,
1079                                 target, 0, target.Length, stringSort,
1080                                 out consumed, out dummy, skipHeadingExtenders);
1081                         return consumed;
1082                 }
1083
1084                 // IsSuffix()
1085
1086                 public bool IsSuffix (string src, string target, CompareOptions opt)
1087                 {
1088                         return IsSuffix (src, target, src.Length - 1, src.Length, opt);
1089                 }
1090
1091                 public bool IsSuffix (string s, string target, int start, int length, CompareOptions opt)
1092                 {
1093                         // quick check : simple codepoint comparison
1094                         if (s.Length >= target.Length) {
1095                                 int si = start;
1096                                 int se = start - length;
1097                                 for (int i = target.Length - 1; si >= se && i >= 0; i--, si--)
1098                                         if (s [si] != target [i])
1099                                                 break;
1100                                 if (si == start + target.Length)
1101                                         return true;
1102                         }
1103
1104                         SetOptions (opt);
1105                         return IsSuffix (s, target, start, length,
1106                                 (opt & CompareOptions.StringSort) != 0);
1107                 }
1108
1109                 bool IsSuffix (string s, string t, int start, int length,
1110                         bool stringSort)
1111                 {
1112                         int tstart = 0;
1113                         for (;tstart < t.Length; tstart++)
1114                                 if (!IsIgnorable (t [tstart]))
1115                                         break;
1116                         if (tstart == t.Length)
1117                                 return true; // as if target is String.Empty.
1118
1119 #if false
1120 // This is still not working. If it is not likely to get working, then just remove it.
1121                         int si = start;
1122                         int send = start - length;
1123                         int ti = t.Length - 1;
1124                         int tend = -1;
1125
1126                         int sStep = start + 1;
1127                         int tStep = t.Length;
1128                         bool sourceConsumed, targetConsumed;
1129                         while (true) {
1130                                 for (; send < si; si--)
1131                                         if (!IsIgnorable (s [si]))
1132                                                 break;
1133                                 for (; tend < ti; ti--)
1134                                         if (!IsIgnorable (t [ti]))
1135                                                 break;
1136                                 if (tend == ti)
1137                                         break;
1138                                 for (; send < si; si--)
1139                                         if (IsSafe (s [si]))
1140                                                 break;
1141                                 for (; tend < ti; ti--)
1142                                         if (IsSafe (t [ti]))
1143                                                 break;
1144 Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, length, t, ti, tstart, sStep - si, tStep - ti);
1145                                 if (CompareInternal (s, si, sStep - si,
1146                                         t, ti, tStep - ti, stringSort,
1147                                         out targetConsumed, out sourceConsumed,
1148                                         true) != 0)
1149                                         return false;
1150                                 if (send == si)
1151                                         return false;
1152                                 sStep = si;
1153                                 tStep = ti;
1154                                 si--;
1155                                 ti--;
1156                         }
1157                         return true;
1158 #else
1159                         // FIXME: it is not efficient for very long target.
1160                         bool sourceConsumed, targetConsumed;
1161                         int mismatchCount = 0;
1162                         for (int i = 0; i < length; i++) {
1163                                 escape1.Source = escape2.Source = null;
1164                                 previousSortKey = previousSortKey2 = null;
1165                                 previousChar = previousChar2 = -1;
1166
1167                                 int ret = CompareInternal (s, start - i, i + 1,
1168                                         t, tstart, t.Length - tstart,
1169                                         stringSort, out targetConsumed,
1170                                         out sourceConsumed, true);
1171                                 if (ret == 0)
1172                                         return true;
1173                                 if (!sourceConsumed && targetConsumed)
1174                                         return false;
1175                                 if (!targetConsumed) {
1176                                         mismatchCount++;
1177                                         if (mismatchCount > Uni.MaxExpansionLength)
1178                                                 // The largest length of an
1179                                                 // expansion is 3, so if the
1180                                                 // target was not consumed more
1181                                                 // than 3 times, then the tail
1182                                                 // character does not match.
1183                                                 return false;
1184                                 }
1185                         }
1186                         return false;
1187 #endif
1188                 }
1189
1190                 #endregion
1191
1192                 #region IndexOf() / LastIndexOf()
1193
1194                 public int IndexOf (string s, string target, CompareOptions opt)
1195                 {
1196                         return IndexOf (s, target, 0, s.Length, opt);
1197                 }
1198
1199                 public int IndexOf (string s, string target, int start, int length, CompareOptions opt)
1200                 {
1201                         SetOptions (opt);
1202                         return IndexOf (s, target, start, length,
1203                                 (opt & CompareOptions.StringSort) != 0);
1204                 }
1205
1206                 public int IndexOf (string s, char target, CompareOptions opt)
1207                 {
1208                         return IndexOf (s, target, 0, s.Length, opt);
1209                 }
1210
1211                 public int IndexOf (string s, char target, int start, int length, CompareOptions opt)
1212                 {
1213                         SetOptions (opt);
1214
1215                         // If target is contraction, then use string search.
1216                         Contraction ct = GetContraction (target);
1217                         if (ct != null) {
1218                                 if (ct.Replacement != null)
1219                                         return IndexOf (s, ct.Replacement, start, length,
1220                                                 (opt & CompareOptions.StringSort) != 0);
1221                                 else
1222                                         return IndexOfSortKey (s, start, length, ct.SortKey, char.MinValue, -1, true);
1223                         } else {
1224                                 int ti = FilterOptions ((int) target);
1225                                 charSortKeyIndexTarget [0] = Category (ti);
1226                                 charSortKeyIndexTarget [1] = Level1 (ti);
1227                                 if (!ignoreNonSpace)
1228                                         charSortKeyIndexTarget [2] =
1229                                                 Level2 (ti, ExtenderType.None);
1230                                 charSortKeyIndexTarget [3] = Uni.Level3 (ti);
1231                                 return IndexOfSortKey (s, start, length,
1232                                         charSortKeyIndexTarget, target, ti,
1233                                         !Uni.HasSpecialWeight ((char) ti));
1234                         }
1235                 }
1236
1237                 // Searches target byte[] keydata
1238                 int IndexOfSortKey (string s, int start, int length, byte [] sortkey, char target, int ti, bool noLv4)
1239                 {
1240                         int end = start + length;
1241                         int idx = start;
1242                         while (idx < end) {
1243                                 int cur = idx;
1244                                 if (MatchesForward (s, ref idx, end, ti, sortkey, noLv4))
1245                                         return cur;
1246                         }
1247                         return -1;
1248                 }
1249
1250                 // Searches string. Search head character (or keydata when
1251                 // the head is contraction sortkey) and try IsPrefix().
1252                 int IndexOf (string s, string target, int start, int length, bool stringSort)
1253                 {
1254                         int tidx = 0;
1255                         for (; tidx < target.Length; tidx++)
1256                                 if (!IsIgnorable (target [tidx]))
1257                                         break;
1258                         if (tidx == target.Length)
1259                                 return start;
1260                         Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1261                         string replace = ct != null ? ct.Replacement : null;
1262                         byte [] sk = replace == null ? charSortKeyIndexTarget : null;
1263                         bool noLv4 = true;
1264                         char tc = char.MinValue;
1265                         int ti = -1;
1266                         if (ct != null && sk != null) {
1267                                 for (int i = 0; i < ct.SortKey.Length; i++)
1268                                         sk [i] = ct.SortKey [i];
1269                         } else if (sk != null) {
1270                                 tc = target [tidx];
1271                                 ti = FilterOptions (target [tidx]);
1272                                 sk [0] = Category (ti);
1273                                 sk [1] = Level1 (ti);
1274                                 if (!ignoreNonSpace)
1275                                         sk [2] = Level2 (ti, ExtenderType.None);
1276                                 sk [3] = Uni.Level3 (ti);
1277                                 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1278                         }
1279                         if (sk != null) {
1280                                 for (tidx++; tidx < target.Length; tidx++) {
1281                                         if (Category (target [tidx]) != 1)
1282                                                 break;
1283                                         if (sk [2] == 0)
1284                                                 sk [2] = 2;
1285                                         sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1286                                 }
1287                         }
1288
1289                         do {
1290                                 int idx = 0;
1291                                 if (replace != null)
1292                                         idx = IndexOf (s, replace, start, length, stringSort);
1293                                 else
1294                                         idx = IndexOfSortKey (s, start, length, sk, tc, ti, noLv4);
1295                                 if (idx < 0)
1296                                         return -1;
1297                                 length -= idx - start;
1298                                 start = idx;
1299                                 if (IsPrefix (s, target, start, length, stringSort, false))
1300                                         return idx;
1301                                 Contraction cts = GetContraction (s, start, length);
1302                                 if (cts != null) {
1303                                         start += cts.Source.Length;
1304                                         length -= cts.Source.Length;
1305                                 } else {
1306                                         start++;
1307                                         length--;
1308                                 }
1309                         } while (length > 0);
1310                         return -1;
1311                 }
1312
1313                 //
1314                 // There are the same number of IndexOf() related methods,
1315                 // with the same functionalities for each.
1316                 //
1317
1318                 public int LastIndexOf (string s, string target, CompareOptions opt)
1319                 {
1320                         return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1321                 }
1322
1323                 public int LastIndexOf (string s, string target, int start, int length, CompareOptions opt)
1324                 {
1325                         SetOptions (opt);
1326                         return LastIndexOf (s, target, start, length,
1327                                 (opt & CompareOptions.StringSort) != 0);
1328                 }
1329
1330                 public int LastIndexOf (string s, char target, CompareOptions opt)
1331                 {
1332                         return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1333                 }
1334
1335                 public int LastIndexOf (string s, char target, int start, int length, CompareOptions opt)
1336                 {
1337                         SetOptions (opt);
1338
1339                         // If target is contraction, then use string search.
1340                         Contraction ct = GetContraction (target);
1341                         if (ct != null) {
1342                                 if (ct.Replacement != null)
1343                                         return LastIndexOf (s, ct.Replacement, start, length, (opt & CompareOptions.StringSort) != 0);
1344                                 else
1345                                         return LastIndexOfSortKey (s, start, start, length, ct.SortKey, char.MinValue, -1, true);
1346                         }
1347                         else {
1348                                 int ti = FilterOptions ((int) target);
1349                                 charSortKeyIndexTarget [0] = Category (ti);
1350                                 charSortKeyIndexTarget [1] = Level1 (ti);
1351                                 if (!ignoreNonSpace)
1352                                         charSortKeyIndexTarget [2] = Level2 (ti, ExtenderType.None);
1353                                 charSortKeyIndexTarget [3] = Uni.Level3 (ti);
1354                                 return LastIndexOfSortKey (s, start, start, length, charSortKeyIndexTarget, target, ti, !Uni.HasSpecialWeight ((char) ti));
1355                         }
1356                 }
1357
1358                 // Searches target byte[] keydata
1359                 int LastIndexOfSortKey (string s, int start, int orgStart, int length, byte [] sortkey, char target, int ti, bool noLv4)
1360                 {
1361                         int end = start - length;
1362                         int idx = start;
1363                         while (idx > end) {
1364                                 int cur = idx;
1365                                 if (MatchesBackward (s, ref idx, end, orgStart, ti, sortkey, noLv4))
1366                                         return cur;
1367                         }
1368                         return -1;
1369                 }
1370
1371                 // Searches string. Search head character (or keydata when
1372                 // the head is contraction sortkey) and try IsPrefix().
1373                 int LastIndexOf (string s, string target, int start, int length, bool stringSort)
1374                 {
1375                         int orgStart = start;
1376                         int tidx = 0;
1377                         for (; tidx < target.Length; tidx++)
1378                                 if (!IsIgnorable (target [tidx]))
1379                                         break;
1380                         if (tidx == target.Length)
1381                                 return start;
1382                         Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1383                         string replace = ct != null ? ct.Replacement : null;
1384                         byte [] sk = replace == null ? charSortKeyIndexTarget : null;
1385
1386                         bool noLv4 = true;
1387                         char tc = char.MinValue;
1388                         int ti = -1;
1389                         if (ct != null && sk != null) {
1390                                 for (int i = 0; i < ct.SortKey.Length; i++)
1391                                         sk [i] = ct.SortKey [i];
1392                         } else if (sk != null) {
1393                                 tc = target [tidx];
1394                                 ti = FilterOptions (target [tidx]);
1395                                 sk [0] = Category (ti);
1396                                 sk [1] = Level1 (ti);
1397                                 if (!ignoreNonSpace)
1398                                         sk [2] = Level2 (ti, ExtenderType.None);
1399                                 sk [3] = Uni.Level3 (ti);
1400                                 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1401                         }
1402                         if (sk != null) {
1403                                 for (tidx++; tidx < target.Length; tidx++) {
1404                                         if (Category (target [tidx]) != 1)
1405                                                 break;
1406                                         if (sk [2] == 0)
1407                                                 sk [2] = 2;
1408                                         sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1409                                 }
1410                         }
1411
1412                         do {
1413                                 int idx = 0;
1414
1415                                 if (replace != null)
1416                                         idx = LastIndexOf (s, replace, start, length, stringSort);
1417                                 else
1418                                         idx = LastIndexOfSortKey (s, start, orgStart, length, sk, tc, ti, noLv4);
1419                                 if (idx < 0)
1420                                         return -1;
1421                                 length -= start - idx;
1422                                 start = idx;
1423
1424                                 if (IsPrefix (s, target, idx, orgStart - idx + 1, stringSort, false)) {
1425                                         for (;idx < orgStart; idx++)
1426                                                 if (!IsIgnorable (s [idx]))
1427                                                         break;
1428                                         return idx;
1429                                 }
1430                                 Contraction cts = GetContraction (s, idx, orgStart - idx + 1);
1431                                 if (cts != null) {
1432                                         start -= cts.Source.Length;
1433                                         length -= cts.Source.Length;
1434                                 } else {
1435                                         start--;
1436                                         length--;
1437                                 }
1438                         } while (length > 0);
1439                         return -1;
1440                 }
1441
1442                 private bool MatchesForward (string s, ref int idx, int end, int ti, byte [] sortkey, bool noLv4)
1443                 {
1444                         int si = -1;
1445                         ExtenderType ext = GetExtenderType (s [idx]);
1446                         Contraction ct = null;
1447                         if (ext == ExtenderType.None)
1448                                 ct = GetContraction (s, idx, end);
1449                         else if (previousChar < 0) {
1450                                 if (previousSortKey == null) {
1451                                         idx++;
1452                                         return false;
1453                                 }
1454                                 charSortKey = previousSortKey;
1455                         }
1456                         else
1457                                 si = FilterExtender (previousChar, ext);
1458                         // if lv4 exists, it never matches contraction
1459                         if (ct != null) {
1460                                 idx += ct.Source.Length;
1461                                 if (!noLv4)
1462                                         return false;
1463                                 if (ct.SortKey != null) {
1464                                         for (int i = 0; i < sortkey.Length; i++)
1465                                                 charSortKey [i] = sortkey [i];
1466                                         previousChar = -1;
1467                                         previousSortKey = charSortKey;
1468                                 } else {
1469                                         // Here is the core of LAMESPEC
1470                                         // described at the top of the source.
1471                                         int dummy = 0;
1472                                         return MatchesForward (ct.Replacement, ref dummy,
1473                                                 ct.Replacement.Length, ti, sortkey, noLv4);
1474                                 }
1475                         } else {
1476                                 if (si < 0)
1477                                         si = FilterOptions (s [idx]);
1478                                 idx++;
1479                                 charSortKey [0] = Category (si);
1480                                 bool noMatch = false;
1481                                 if (sortkey [0] == charSortKey [0])
1482                                         charSortKey [1] = Level1 (si);
1483                                 else
1484                                         noMatch = true;
1485                                 if (!ignoreNonSpace && sortkey [1] == charSortKey [1])
1486                                         charSortKey [2] = Level2 (si, ext);
1487                                 else if (!ignoreNonSpace)
1488                                         noMatch = true;
1489                                 if (noMatch) {
1490                                         for (; idx < end; idx++) {
1491                                                 if (Category (s [idx]) != 1)
1492                                                         break;
1493                                         }
1494                                         return false;
1495                                 }
1496                                 charSortKey [3] = Uni.Level3 (si);
1497                                 if (charSortKey [0] != 1)
1498                                         previousChar = si;
1499                         }
1500                         for (; idx < end; idx++) {
1501                                 if (Category (s [idx]) != 1)
1502                                         break;
1503                                 if (ignoreNonSpace)
1504                                         continue;
1505                                 if (charSortKey [2] == 0)
1506                                                 charSortKey [2] = 2;
1507                                         charSortKey [2] = (byte) (charSortKey [2]
1508                                                 + Level2 (s [idx], ExtenderType.None));
1509                         }
1510
1511                         return MatchesPrimitive (charSortKey, si, ext, sortkey, ti, noLv4);
1512                 }
1513
1514                 private bool MatchesPrimitive (byte [] charSortKey, int si, ExtenderType ext, byte [] sortkey, int ti, bool noLv4)
1515                 {
1516                         if (charSortKey [0] != sortkey [0] ||
1517                                 charSortKey [1] != sortkey [1] ||
1518                                 (!ignoreNonSpace && charSortKey [2] != sortkey [2]) ||
1519                                 charSortKey [3] != sortkey [3])
1520                                 return false;
1521                         if (noLv4 && (si < 0 || !Uni.HasSpecialWeight ((char) si)))
1522                                 return true;
1523                         else if (noLv4)
1524                                 return false;
1525                         // Since target can never be an extender, if the source
1526                         // is an expander and it matters, then they never match.
1527                         if (!ignoreNonSpace && ext == ExtenderType.Conditional)
1528                                 return false;
1529                         if (Uni.IsJapaneseSmallLetter ((char) si) !=
1530                                 Uni.IsJapaneseSmallLetter ((char) ti) ||
1531                                 ToDashTypeValue (ext) !=
1532                                 // FIXME: we will have to specify correct value for target
1533                                 ToDashTypeValue (ExtenderType.None) ||
1534                                 !Uni.IsHiragana ((char) si) !=
1535                                 !Uni.IsHiragana ((char) ti) ||
1536                                 IsHalfKana ((char) si) !=
1537                                 IsHalfKana ((char) ti))
1538                                 return false;
1539                         return true;
1540                 }
1541
1542                 private bool MatchesBackward (string s, ref int idx, int end, int orgStart, int ti, byte [] sortkey, bool noLv4)
1543                 {
1544                         int cur = idx;
1545                         int si = -1;
1546                         ExtenderType ext = GetExtenderType (s [idx]);
1547                         // To handle extenders in source, we need to
1548                         // check next _primary_ character.
1549                         if (ext != ExtenderType.None) {
1550                                 byte diacritical = 0;
1551                                 for (int tmp = 0; ; tmp--) {
1552                                         if (tmp < 0) // heading extender
1553                                                 return false;
1554                                         if (IsIgnorable (s [tmp]))
1555                                                 continue;
1556                                         int tmpi = FilterOptions (s [tmp]);
1557                                         byte category = Category (tmpi);
1558                                         if (category == 1) {
1559                                                 diacritical = Level2 (tmpi, ExtenderType.None);
1560                                                 continue;
1561                                         }
1562                                         si = FilterExtender (tmpi, ext);
1563                                         charSortKey [0] = category;
1564                                         charSortKey [1] = Level1 (si);
1565                                         if (!ignoreNonSpace)
1566                                                 charSortKey [2] = Level2 (si, ext);
1567                                         charSortKey [3] = Uni.Level3 (si);
1568                                         if (ext != ExtenderType.Conditional &&
1569                                                 diacritical != 0)
1570                                                 charSortKey [2] =
1571                                                         (charSortKey [2] == 0) ?
1572                                                         (byte) (diacritical + 2) :
1573                                                         diacritical;
1574                                         break;
1575                                 }
1576                                 idx--;
1577                         }
1578                         Contraction ct = null;
1579                         if (ext == ExtenderType.None)
1580                                 ct = GetContraction (s, idx, end);
1581                         // if lv4 exists, it never matches contraction
1582                         if (ct != null) {
1583                                 idx -= ct.Source.Length;
1584                                 if (!noLv4)
1585                                         return false;
1586                                 if (ct.SortKey != null) {
1587                                         for (int i = 0; i < sortkey.Length; i++)
1588                                                 charSortKey [i] = sortkey [i];
1589                                         previousChar = -1;
1590                                         previousSortKey = charSortKey;
1591                                 } else {
1592                                         // Here is the core of LAMESPEC
1593                                         // described at the top of the source.
1594                                         int dummy = ct.Replacement.Length - 1;
1595                                         return MatchesBackward (ct.Replacement, 
1596                                                 ref dummy, dummy, -1, ti, sortkey, noLv4);
1597                                 }
1598                         } else if (ext == ExtenderType.None) {
1599                                 if (si < 0)
1600                                         si = FilterOptions (s [idx]);
1601                                 idx--;
1602                                 bool noMatch = false;
1603                                 charSortKey [0] = Category (si);
1604                                 if (charSortKey [0] == sortkey [0])
1605                                         charSortKey [1] = Level1 (si);
1606                                 else
1607                                         noMatch = true;
1608                                 if (!ignoreNonSpace && charSortKey [1] == sortkey [1])
1609                                         charSortKey [2] = Level2 (si, ext);
1610                                 else if (!ignoreNonSpace)
1611                                         noMatch = true;
1612                                 if (noMatch)
1613                                         return false;
1614                                 charSortKey [3] = Uni.Level3 (si);
1615                                 if (charSortKey [0] != 1)
1616                                         previousChar = si;
1617                         }
1618                         if (ext == ExtenderType.None) {
1619                                 for (int tmp = cur + 1; tmp < orgStart; tmp++) {
1620                                         if (Category (s [tmp]) != 1)
1621                                                 break;
1622                                         if (ignoreNonSpace)
1623                                                 continue;
1624                                         if (charSortKey [2] == 0)
1625                                                         charSortKey [2] = 2;
1626                                         charSortKey [2] = (byte) (charSortKey [2]
1627                                                 + Level2 (s [tmp], ExtenderType.None));
1628                                 }
1629                         }
1630                         return MatchesPrimitive (charSortKey, si, ext, sortkey, ti, noLv4);
1631                 }
1632                 #endregion
1633         }
1634 }