2005-07-26 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / SimpleCollator.cs
1 //
2 // SimpleCollator.cs
3 //
4 // This class will demonstrate CompareInfo functionality that will just work.
5 //
6
7 //
8 // Here's a summary for supporting contractions, expansions and diacritical 
9 // remappings.
10 //
11 // Diacritical mapping is a simple tailoring rule that "remaps" diacritical
12 // weight value from one to another. For now it applies to all range of
13 // characters, but at some stage we might need to limit the remapping ranges.
14 //
15 // A Contraction consists of a string (actually char[]) and a sortkey entry
16 // (i.e. byte[]). It indicates that a sequence of characters is interpreted
17 // as a single character that has the mapped sortkey value. There is no
18 // character which goes across "special rules". When the collator encountered
19 // such a sequence of characters, it just returns the sortkey value without
20 // further replacement.
21 //
22 // Since it is still likely to happen that a contraction sequence matches
23 // other character than the identical sequence (depending on CompareOptions
24 // and of course, defined sortkey value itself), comparison cannot be done
25 // at source char[] level.
26 //
27 // In IndexOf(), the first character in the target string or the target char
28 // itself is turned into sortkey bytes. If the character has a contraction and
29 // that is sortkey map, then it is used instead. If the contraction exists and
30 // that is replacement map, then the first character of the replacement string
31 // is searched instead. IndexOf() always searches only for the top character,
32 // and if it returned negative value, then it returns -1. Otherwise, it then
33 // tries IsPrefix() from that location. If it returns true, then it returns
34 // the index.
35 //
36 // LAMESPEC: IndexOf() is lame as a whole API. It never matches in the middle
37 // of expansion and there is no proper way to return such indexes within
38 // a single int return value.
39 //
40 // For example, try below in .NET:
41 //      IndexOf("\u00E6", "a")
42 //      IndexOf("\u00E6", "e")
43 //
44
45
46 using System;
47 using System.Collections;
48 using System.Globalization;
49
50 using Uni = Mono.Globalization.Unicode.MSCompatUnicodeTable;
51 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
52
53 namespace Mono.Globalization.Unicode
54 {
55         internal class SimpleCollator
56         {
57                 static SimpleCollator invariant =
58                         new SimpleCollator (CultureInfo.InvariantCulture);
59
60                 SortKeyBuffer buf;
61                 // CompareOptions expanded.
62                 bool ignoreNonSpace; // used in IndexOf()
63                 bool ignoreSymbols;
64                 bool ignoreWidth;
65                 bool ignoreCase;
66                 bool ignoreKanaType;
67                 TextInfo textInfo; // for ToLower().
68                 bool frenchSort;
69                 unsafe readonly byte* cjkCatTable;
70                 unsafe readonly byte* cjkLv1Table;
71                 readonly CodePointIndexer cjkIndexer;
72                 unsafe readonly byte* cjkLv2Table;
73                 readonly CodePointIndexer cjkLv2Indexer;
74                 readonly int lcid;
75                 readonly Contraction [] contractions;
76                 readonly Level2Map [] level2Maps;
77
78                 // This flag marks characters as "unsafe", where the character
79                 // could be used as part of a contraction (whose length > 1).
80                 readonly bool [] unsafeFlags;
81
82                 const int UnsafeFlagLength = 0x300;
83
84                 // temporary sortkey buffer for index search/comparison
85                 byte [] charSortKey = new byte [4];
86                 byte [] charSortKey2 = new byte [4];
87                 byte [] charSortKeyIndexTarget = new byte [4];
88
89                 #region .ctor() and split functions
90
91                 public SimpleCollator (CultureInfo culture)
92                 {
93                         lcid = culture.LCID;
94                         textInfo = culture.TextInfo;
95                         buf = new SortKeyBuffer (culture.LCID);
96
97                         unsafe {
98                                 SetCJKTable (culture, ref cjkIndexer,
99                                         ref cjkCatTable, ref cjkLv1Table,
100                                         ref cjkLv2Indexer, ref cjkLv2Table);
101                         }
102
103                         // Get tailoring info
104                         TailoringInfo t = null;
105                         for (CultureInfo ci = culture; ci.LCID != 127; ci = ci.Parent) {
106                                 t = Uni.GetTailoringInfo (ci.LCID);
107                                 if (t != null)
108                                         break;
109                         }
110                         if (t == null) // then use invariant
111                                 t = Uni.GetTailoringInfo (127);
112
113                         frenchSort = t.FrenchSort;
114                         Uni.BuildTailoringTables (culture, t, ref contractions,
115                                 ref level2Maps);
116                         unsafeFlags = new bool [UnsafeFlagLength];
117                         foreach (Contraction c in contractions)
118                                 if (c.Source.Length > 1)
119                                         foreach (char ch in c.Source)
120                                                 unsafeFlags [(int) ch] = true;
121
122                         // FIXME: Since tailorings are mostly for latin
123                         // (and in some cases Cyrillic) characters, it would
124                         // be much better for performance to store "start 
125                         // indexes" for > 370 (culture-specific letters).
126
127 /*
128 // dump tailoring table
129 Console.WriteLine ("******** building table for {0} : c - {1} d - {2}",
130 culture.LCID, contractions.Length, level2Maps.Length);
131 foreach (Contraction c in contractions) {
132 foreach (char cc in c.Source)
133 Console.Write ("{0:X4} ", (int) cc);
134 Console.WriteLine (" -> '{0}'", c.Replacement);
135 }
136 */
137                 }
138
139                 unsafe private void SetCJKTable (
140                         CultureInfo culture, ref CodePointIndexer cjkIndexer,
141                         ref byte* catTable, ref byte* lv1Table,
142                         ref CodePointIndexer lv2Indexer, ref byte* lv2Table)
143                 {
144                         string name = GetNeutralCulture (culture).Name;
145
146                         Uni.FillCJK (name, ref cjkIndexer, ref catTable,
147                                 ref lv1Table, ref lv2Indexer, ref lv2Table);
148                 }
149
150                 static CultureInfo GetNeutralCulture (CultureInfo info)
151                 {
152                         CultureInfo ret = info;
153                         while (ret.Parent != null && ret.Parent.LCID != 127)
154                                 ret = ret.Parent;
155                         return ret;
156                 }
157
158                 #endregion
159
160                 unsafe byte Category (int cp)
161                 {
162                         if (cp < 0x3000 || cjkCatTable == null)
163                                 return Uni.Category (cp);
164                         int idx = cjkIndexer.ToIndex (cp);
165                         return idx < 0 ? Uni.Category (cp) : cjkCatTable [idx];
166                 }
167
168                 unsafe byte Level1 (int cp)
169                 {
170                         if (cp < 0x3000 || cjkLv1Table == null)
171                                 return Uni.Level1 (cp);
172                         int idx = cjkIndexer.ToIndex (cp);
173                         return idx < 0 ? Uni.Level1 (cp) : cjkLv1Table [idx];
174                 }
175
176                 unsafe byte Level2 (int cp, ExtenderType ext)
177                 {
178                         if (ext == ExtenderType.Buggy)
179                                 return 5;
180                         else if (ext == ExtenderType.Conditional)
181                                 return 0;
182
183                         if (cp < 0x3000 || cjkLv2Table == null)
184                                 return Uni.Level2 (cp);
185                         int idx = cjkLv2Indexer.ToIndex (cp);
186                         byte ret = idx < 0 ? (byte) 0 : cjkLv2Table [idx];
187                         if (ret != 0)
188                                 return ret;
189                         ret = Uni.Level2 (cp);
190                         if (level2Maps.Length == 0)
191                                 return ret;
192                         for (int i = 0; i < level2Maps.Length; i++) {
193                                 if (level2Maps [i].Source == ret)
194                                         return level2Maps [i].Replace;
195                                 else if (level2Maps [i].Source > ret)
196                                         break;
197                         }
198                         return ret;
199                 }
200
201                 bool IsHalfKana (int cp)
202                 {
203                         return ignoreWidth || Uni.IsHalfWidthKana ((char) cp);
204                 }
205
206                 void SetOptions (CompareOptions options)
207                 {
208                         this.ignoreNonSpace = (options & CompareOptions.IgnoreNonSpace) != 0;
209                         this.ignoreSymbols = (options & CompareOptions.IgnoreSymbols) != 0;
210                         this.ignoreWidth = (options & CompareOptions.IgnoreWidth) != 0;
211                         this.ignoreCase = (options & CompareOptions.IgnoreCase) != 0;
212                         this.ignoreKanaType = (options & CompareOptions.IgnoreKanaType) != 0;
213                         previousChar = previousChar2 = -1;
214                         previousSortKey = previousSortKey2 = null;
215                         escape1.Source = escape2.Source = null;
216                 }
217
218                 Contraction GetContraction (string s, int start, int end)
219                 {
220                         Contraction c = GetContraction (s, start, end, contractions);
221                         if (c != null || lcid == 127)
222                                 return c;
223                         return GetContraction (s, start, end, invariant.contractions);
224                 }
225
226                 Contraction GetContraction (string s, int start, int end, Contraction [] clist)
227                 {
228                         for (int i = 0; i < clist.Length; i++) {
229                                 Contraction ct = clist [i];
230                                 int diff = ct.Source [0] - s [start];
231                                 if (diff > 0)
232                                         return null; // it's already sorted
233                                 else if (diff < 0)
234                                         continue;
235                                 char [] chars = ct.Source;
236                                 if (end - start < chars.Length)
237                                         continue;
238                                 bool match = true;
239                                 for (int n = 0; n < chars.Length; n++)
240                                         if (s [start + n] != chars [n]) {
241                                                 match = false;
242                                                 break;
243                                         }
244                                 if (match)
245                                         return ct;
246                         }
247                         return null;
248                 }
249
250                 Contraction GetTailContraction (string s, int start, int end)
251                 {
252                         Contraction c = GetTailContraction (s, start, end, contractions);
253                         if (c != null || lcid == 127)
254                                 return c;
255                         return GetTailContraction (s, start, end, invariant.contractions);
256                 }
257
258                 Contraction GetTailContraction (string s, int start, int end, Contraction [] clist)
259                 {
260                         for (int i = 0; i < clist.Length; i++) {
261                                 Contraction ct = clist [i];
262                                 int diff = ct.Source [0] - s [end];
263                                 if (diff > 0)
264                                         return null; // it's already sorted
265                                 else if (diff < 0)
266                                         continue;
267                                 char [] chars = ct.Source;
268                                 if (start - end + 1 < chars.Length)
269                                         continue;
270                                 bool match = true;
271                                 int offset = start - chars.Length + 1;
272                                 for (int n = 0; n < chars.Length; n++)
273                                         if (s [offset + n] != chars [n]) {
274                                                 match = false;
275                                                 break;
276                                         }
277                                 if (match)
278                                         return ct;
279                         }
280                         return null;
281                 }
282
283                 Contraction GetContraction (char c)
284                 {
285                         Contraction ct = GetContraction (c, contractions);
286                         if (ct != null || lcid == 127)
287                                 return ct;
288                         return GetContraction (c, invariant.contractions);
289                 }
290
291                 Contraction GetContraction (char c, Contraction [] clist)
292                 {
293                         for (int i = 0; i < clist.Length; i++) {
294                                 Contraction ct = clist [i];
295                                 if (ct.Source [0] > c)
296                                         return null; // it's already sorted
297                                 if (ct.Source [0] == c && ct.Source.Length == 1)
298                                         return ct;
299                         }
300                         return null;
301                 }
302
303                 int FilterOptions (int i)
304                 {
305                         if (ignoreWidth) {
306                                 int x = Uni.ToWidthCompat (i);
307                                 if (x != 0)
308                                         i = x;
309                         }
310                         if (ignoreCase)
311                                 i = textInfo.ToLower ((char) i);
312                         if (ignoreKanaType)
313                                 i = Uni.ToKanaTypeInsensitive (i);
314                         return i;
315                 }
316
317                 int previousChar = -1;
318                 byte [] previousSortKey = null;
319                 int previousChar2 = -1;
320                 byte [] previousSortKey2 = null;
321
322                 enum ExtenderType {
323                         None,
324                         Simple,
325                         Voiced,
326                         Conditional,
327                         Buggy,
328                 }
329
330                 ExtenderType GetExtenderType (int i)
331                 {
332                         // LAMESPEC: Windows expects true for U+3005, but 
333                         // sometimes it does not represent to repeat just
334                         // one character.
335                         // Windows also expects true for U+3031 and U+3032,
336                         // but they should *never* repeat one character.
337
338                         // U+2015 becomes an extender only when it is Japanese
339                         if (i == 0x2015)
340                                 return lcid == 16 ? ExtenderType.Conditional :
341                                         ExtenderType.None;
342
343                         if (i < 0x3005 || i > 0xFF70)
344                                 return ExtenderType.None;
345                         if (i >= 0xFE7C) {
346                                 switch (i) {
347                                 case 0xFE7C:
348                                 case 0xFE7D:
349                                         return ExtenderType.Simple;
350                                 case 0xFF70:
351                                         return ExtenderType.Conditional;
352                                 case 0xFF9E:
353                                 case 0xFF9F:
354                                         return ExtenderType.Voiced;
355                                 }
356                         }
357                         if (i > 0x30FE)
358                                 return ExtenderType.None;
359                         switch (i) {
360                         case 0x3005: // LAMESPEC: see above.
361                                 return ExtenderType.Buggy;
362                         case 0x3031: // LAMESPEC: see above.
363                         case 0x3032: // LAMESPEC: see above.
364                         case 0x309D:
365                         case 0x30FD:
366                                 return ExtenderType.Simple;
367                         case 0x309E:
368                         case 0x30FE:
369                                 return ExtenderType.Voiced;
370                         case 0x30FC:
371                                 return ExtenderType.Conditional;
372                         default:
373                                 return ExtenderType.None;
374                         }
375                 }
376
377                 byte ToDashTypeValue (ExtenderType ext)
378                 {
379                         if (ignoreNonSpace) // LAMESPEC: huh, why?
380                                 return 3;
381                         switch (ext) {
382                         case ExtenderType.None:
383                                 return 3;
384                         case ExtenderType.Conditional:
385                                 return 5;
386                         default:
387                                 return 4;
388                         }
389                 }
390
391                 int FilterExtender (int i, ExtenderType ext)
392                 {
393                         if (ext == ExtenderType.Conditional &&
394                                 Uni.HasSpecialWeight ((char) i)) {
395                                 bool half = IsHalfKana ((char) i);
396                                 bool katakana = !Uni.IsHiragana ((char) i);
397                                 switch (Level1 (i) & 7) {
398                                 case 2:
399                                         return half ? 0xFF71 : katakana ? 0x30A2 : 0x3042;
400                                 case 3:
401                                         return half ? 0xFF72 : katakana ? 0x30A4 : 0x3044;
402                                 case 4:
403                                         return half ? 0xFF73 : katakana ? 0x30A6 : 0x3046;
404                                 case 5:
405                                         return half ? 0xFF74 : katakana ? 0x30A8 : 0x3048;
406                                 case 6:
407                                         return half ? 0xFF75 : katakana ? 0x30AA : 0x304A;
408                                 }
409                         }
410                         return i;
411                 }
412
413                 bool IsIgnorable (int i)
414                 {
415                         return Uni.IsIgnorable (i) ||
416                                 ignoreSymbols && Uni.IsIgnorableSymbol (i) ||
417                                 ignoreNonSpace && Uni.IsIgnorableNonSpacing (i);
418                 }
419
420                 bool IsSafe (int i)
421                 {
422                         return i >= unsafeFlags.Length ? true : !unsafeFlags [i];
423                 }
424
425                 #region GetSortKey()
426
427                 public SortKey GetSortKey (string s)
428                 {
429                         return GetSortKey (s, CompareOptions.None);
430                 }
431
432                 public SortKey GetSortKey (string s, CompareOptions options)
433                 {
434                         return GetSortKey (s, 0, s.Length, options);
435                 }
436
437                 public SortKey GetSortKey (string s, int start, int length, CompareOptions options)
438                 {
439                         SetOptions (options);
440
441                         buf.Initialize (options, lcid, s, frenchSort);
442                         int end = start + length;
443                         GetSortKey (s, start, end);
444                         return buf.GetResultAndReset ();
445                 }
446
447                 void GetSortKey (string s, int start, int end)
448                 {
449                         for (int n = start; n < end; n++) {
450                                 int i = s [n];
451
452                                 ExtenderType ext = GetExtenderType (i);
453                                 if (ext != ExtenderType.None) {
454                                         i = FilterExtender (previousChar, ext);
455                                         if (i >= 0)
456                                                 FillSortKeyRaw (i, ext);
457                                         else if (previousSortKey != null) {
458                                                 byte [] b = previousSortKey;
459                                                 buf.AppendNormal (
460                                                         b [0],
461                                                         b [1],
462                                                         b [2] != 1 ? b [2] : Level2 (i, ext),
463                                                         b [3] != 1 ? b [3] : Uni.Level3 (i));
464                                         }
465                                         // otherwise do nothing.
466                                         // (if the extender is the first char
467                                         // in the string, then just ignore.)
468                                         continue;
469                                 }
470
471                                 if (IsIgnorable (i))
472                                         continue;
473                                 i = FilterOptions (i);
474
475                                 Contraction ct = GetContraction (s, n, end);
476                                 if (ct != null) {
477                                         if (ct.Replacement != null) {
478                                                 GetSortKey (ct.Replacement, 0, ct.Replacement.Length);
479                                         } else {
480                                                 byte [] b = ct.SortKey;
481                                                 buf.AppendNormal (
482                                                         b [0],
483                                                         b [1],
484                                                         b [2] != 1 ? b [2] : Level2 (i, ext),
485                                                         b [3] != 1 ? b [3] : Uni.Level3 (i));
486                                                 previousSortKey = b;
487                                                 previousChar = -1;
488                                         }
489                                         n += ct.Source.Length - 1;
490                                 }
491                                 else {
492                                         if (!Uni.IsIgnorableNonSpacing (i))
493                                                 previousChar = i;
494                                         FillSortKeyRaw (i, ExtenderType.None);
495                                 }
496                         }
497                 }
498
499                 void FillSortKeyRaw (int i, ExtenderType ext)
500                 {
501                         if (0x3400 <= i && i <= 0x4DB5) {
502                                 int diff = i - 0x3400;
503                                 buf.AppendCJKExtension (
504                                         (byte) (0x10 + diff / 254),
505                                         (byte) (diff % 254 + 2));
506                                 return;
507                         }
508
509                         UnicodeCategory uc = char.GetUnicodeCategory ((char) i);
510                         switch (uc) {
511                         case UnicodeCategory.PrivateUse:
512                                 int diff = i - 0xE000;
513                                 buf.AppendNormal (
514                                         (byte) (0xE5 + diff / 254),
515                                         (byte) (diff % 254 + 2),
516                                         0,
517                                         0);
518                                 return;
519                         case UnicodeCategory.Surrogate:
520                                 FillSurrogateSortKeyRaw (i);
521                                 return;
522                         }
523
524                         byte level2 = Level2 (i, ext);
525                         if (Uni.HasSpecialWeight ((char) i)) {
526                                 byte level1 = Level1 (i);
527                                 buf.AppendKana (
528                                         Category (i),
529                                         level1,
530                                         level2,
531                                         Uni.Level3 (i),
532                                         Uni.IsJapaneseSmallLetter ((char) i),
533                                         ToDashTypeValue (ext),
534                                         !Uni.IsHiragana ((char) i),
535                                         IsHalfKana ((char) i)
536                                         );
537                                 if (!ignoreNonSpace && ext == ExtenderType.Voiced)
538                                         // Append voice weight
539                                         buf.AppendNormal (1, 1, 1, 0);
540                         }
541                         else
542                                 buf.AppendNormal (
543                                         Category (i),
544                                         Level1 (i),
545                                         level2,
546                                         Uni.Level3 (i));
547                 }
548
549                 void FillSurrogateSortKeyRaw (int i)
550                 {
551                         int diffbase = 0;
552                         int segment = 0;
553                         byte lower = 0;
554
555                         if (i < 0xD840) {
556                                 diffbase = 0xD800;
557                                 segment = 0x41;
558                                 lower = (byte) ((i == 0xD800) ? 0x3E : 0x3F);
559                         } else if (0xD840 <= i && i < 0xD880) {
560                                 diffbase = 0xD840;
561                                 segment = 0xF2;
562                                 lower = 0x3E;
563                         } else if (0xDB80 <= i && i < 0xDC00) {
564                                 diffbase = 0xDB80 - 0x40;
565                                 segment = 0xFE;
566                                 lower = 0x3E;
567                         } else {
568                                 diffbase = 0xDC00 - 0xF8 + 2;
569                                 segment = 0x41;
570                                 lower = 0x3F;
571                         }
572                         int diff = i - diffbase;
573
574                         buf.AppendNormal (
575                                 (byte) (segment + diff / 254),
576                                 (byte) (diff % 254 + 2),
577                                 lower,
578                                 lower);
579                 }
580
581                 #endregion
582
583                 #region Compare()
584
585                 public int Compare (string s1, string s2)
586                 {
587                         return Compare (s1, s2, CompareOptions.None);
588                 }
589
590                 public int Compare (string s1, string s2, CompareOptions options)
591                 {
592                         return Compare (s1, 0, s1.Length, s2, 0, s2.Length, options);
593                 }
594
595                 class Escape
596                 {
597                         public string Source;
598                         public int Index;
599                         public int Start;
600                         public int End;
601                         public int Optional;
602                 }
603
604                 // Those instances are reused not to invoke instantiation
605                 // during Compare().
606                 Escape escape1 = new Escape ();
607                 Escape escape2 = new Escape ();
608
609                 private int CompareOrdinal (string s1, int idx1, int len1,
610                         string s2, int idx2, int len2)
611                 {
612                         int min = len1 < len2 ? len1 : len2;
613                         int end1 = idx1 + min;
614                         int end2 = idx2 + min;
615                         for (int i1 = idx1, i2 = idx2;
616                                 i1 < end1 && i2 < end2; i1++, i2++)
617                                 if (s1 [i1] != s2 [i2])
618                                         return s1 [i1] - s2 [i2];
619                         return len1 == len2 ? 0 :
620                                 len1 == min ? - 1 : 1;
621                 }
622
623                 public int Compare (string s1, int idx1, int len1,
624                         string s2, int idx2, int len2, CompareOptions options)
625                 {
626                         // quick equality check
627                         if (idx1 == idx2 && len1 == len2 &&
628                                 Object.ReferenceEquals (s1, s2))
629                                 return 0;
630                         // FIXME: this should be done inside Compare() at
631                         // any time.
632 //                      int ord = CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
633 //                      if (ord == 0)
634 //                              return 0;
635                         if (options == CompareOptions.Ordinal)
636                                 return CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
637
638 #if false // stable easy version, depends on GetSortKey().
639                         SortKey sk1 = GetSortKey (s1, idx1, len1, options);
640                         SortKey sk2 = GetSortKey (s2, idx2, len2, options);
641                         byte [] d1 = sk1.KeyData;
642                         byte [] d2 = sk2.KeyData;
643                         int len = d1.Length > d2.Length ? d2.Length : d1.Length;
644                         for (int i = 0; i < len; i++)
645                                 if (d1 [i] != d2 [i])
646                                         return d1 [i] < d2 [i] ? -1 : 1;
647                         return d1.Length == d2.Length ? 0 : d1.Length < d2.Length ? -1 : 1;
648 #else
649                         SetOptions (options);
650                         bool dummy, dummy2;
651                         int ret = CompareInternal (s1, idx1, len1, s2, idx2, len2, (options & CompareOptions.StringSort) != 0, out dummy, out dummy2, true);
652                         return ret == 0 ? 0 : ret < 0 ? -1 : 1;
653 #endif
654                 }
655
656                 int CompareInternal (string s1, int idx1, int len1, string s2,
657                         int idx2, int len2, bool stringSort,
658                         out bool targetConsumed, out bool sourceConsumed,
659                         bool skipHeadingExtenders)
660                 {
661                         int start1 = idx1;
662                         int start2 = idx2;
663                         int end1 = idx1 + len1;
664                         int end2 = idx2 + len2;
665                         targetConsumed = false;
666                         sourceConsumed = false;
667
668                         // It holds final result that comes from the comparison
669                         // at level 2 or lower. Even if Compare() found the
670                         // difference at level 2 or lower, it still has to
671                         // continue level 1 comparison. FinalResult is used
672                         // when there was no further differences.
673                         int finalResult = 0;
674                         // It holds the comparison level to do. It starts from
675                         // 5, and becomes 1 when we do primary-only comparison.
676                         int currentLevel = 5;
677
678                         int lv5At1 = -1;
679                         int lv5At2 = -1;
680                         int lv5Value1 = 0;
681                         int lv5Value2 = 0;
682
683                         // Skip heading extenders
684                         if (skipHeadingExtenders) {
685                                 for (; idx1 < end1; idx1++)
686                                         if (GetExtenderType (s1 [idx1]) == ExtenderType.None)
687                                                 break;
688                                 for (; idx2 < end2; idx2++)
689                                         if (GetExtenderType (s2 [idx2]) == ExtenderType.None)
690                                                 break;
691                         }
692
693                         ExtenderType ext1 = ExtenderType.None;
694                         ExtenderType ext2 = ExtenderType.None;
695
696                         int quickCheckPos1 = idx1;
697                         int quickCheckPos2 = idx2;
698
699                         while (true) {
700                                 for (; idx1 < end1; idx1++)
701                                         if (!IsIgnorable (s1 [idx1]))
702                                                 break;
703                                 for (; idx2 < end2; idx2++)
704                                         if (!IsIgnorable (s2 [idx2]))
705                                                 break;
706
707                                 if (idx1 >= end1) {
708                                         if (escape1.Source == null)
709                                                 break;
710                                         s1 = escape1.Source;
711                                         start1 = escape1.Start;
712                                         idx1 = escape1.Index;
713                                         end1 = escape1.End;
714                                         quickCheckPos1 = escape1.Optional;
715                                         escape1.Source = null;
716                                         continue;
717                                 }
718                                 if (idx2 >= end2) {
719                                         if (escape2.Source == null)
720                                                 break;
721                                         s2 = escape2.Source;
722                                         start2 = escape2.Start;
723                                         idx2 = escape2.Index;
724                                         end2 = escape2.End;
725                                         quickCheckPos2 = escape2.Optional;
726                                         escape2.Source = null;
727                                         continue;
728                                 }
729 #if true
730 // If comparison is unstable, then this part is one of the most doubtful part.
731 // Here we run quick codepoint comparison and run back to "stable" area to
732 // compare characters.
733
734                                 // Strictly to say, even the first character
735                                 // could be compared here, but it messed
736                                 // backward step, so I just avoided mess.
737                                 if (quickCheckPos1 < idx1 && quickCheckPos2 < idx2) {
738                                         while (idx1 < end1 && idx2 < end2 &&
739                                                 s1 [idx1] == s2 [idx2]) {
740                                                 idx1++;
741                                                 idx2++;
742                                         }
743                                         if (idx1 == end1 || idx2 == end2)
744                                                 continue; // check replacement
745
746                                         int backwardEnd1 = quickCheckPos1;
747                                         int backwardEnd2 = quickCheckPos2;
748                                         quickCheckPos1 = idx1;
749                                         quickCheckPos2 = idx2;
750
751                                         idx1--;
752                                         idx2--;
753
754                                         for (;idx1 > backwardEnd1; idx1--)
755                                                 if (Category (s1 [idx1]) != 1)
756                                                         break;
757                                         for (;idx2 > backwardEnd2; idx2--)
758                                                 if (Category (s2 [idx2]) != 1)
759                                                         break;
760                                         for (;idx1 > backwardEnd1; idx1--)
761                                                 if (IsSafe (s1 [idx1]))
762                                                         break;
763                                         for (;idx2 > backwardEnd2; idx2--)
764                                                 if (IsSafe (s2 [idx2]))
765                                                         break;
766                                 }
767 #endif
768
769                                 int cur1 = idx1;
770                                 int cur2 = idx2;
771                                 byte [] sk1 = null;
772                                 byte [] sk2 = null;
773                                 int i1 = FilterOptions (s1 [idx1]);
774                                 int i2 = FilterOptions (s2 [idx2]);
775                                 bool special1 = false;
776                                 bool special2 = false;
777
778                                 // If current character is an expander, then
779                                 // repeat the previous character.
780                                 ext1 = GetExtenderType (i1);
781                                 if (ext1 != ExtenderType.None) {
782                                         if (previousChar < 0) {
783                                                 if (previousSortKey == null) {
784                                                         // nothing to extend
785                                                         idx1++;
786                                                         continue;
787                                                 }
788                                                 sk1 = previousSortKey;
789                                         }
790                                         else
791                                                 i1 = FilterExtender (previousChar, ext1);
792                                 }
793                                 ext2 = GetExtenderType (i2);
794                                 if (ext2 != ExtenderType.None) {
795                                         if (previousChar2 < 0) {
796                                                 if (previousSortKey2 == null) {
797                                                         // nothing to extend
798                                                         idx2++;
799                                                         continue;
800                                                 }
801                                                 sk2 = previousSortKey2;
802                                         }
803                                         else
804                                                 i2 = FilterExtender (previousChar2, ext2);
805                                 }
806
807                                 byte cat1 = Category (i1);
808                                 byte cat2 = Category (i2);
809
810                                 // Handle special weight characters
811                                 if (!stringSort && currentLevel > 4) {
812                                         if (cat1 == 6) {
813                                                 lv5At1 = escape1.Source != null ?
814                                                         escape1.Index - escape1.Start :
815                                                         cur1 - start1;
816                                                 // here Windows has a bug that it does
817                                                 // not consider thirtiary weight.
818                                                 lv5Value1 = Level1 (i1) << 8 + Uni.Level3 (i1);
819                                                 previousChar = i1;
820                                                 idx1++;
821                                         }
822                                         if (cat2 == 6) {
823                                                 lv5At2 = escape2.Source != null ?
824                                                         escape2.Index - escape2.Start :
825                                                         cur2 - start2;
826                                                 // here Windows has a bug that it does
827                                                 // not consider thirtiary weight.
828                                                 lv5Value2 = Level1 (i2) << 8 + Uni.Level3 (i2);
829                                                 previousChar2 = i2;
830                                                 idx2++;
831                                         }
832                                         if (cat1 == 6 || cat2 == 6) {
833                                                 currentLevel = 4;
834                                                 continue;
835                                         }
836                                 }
837
838                                 Contraction ct1 = null;
839                                 if (ext1 == ExtenderType.None)
840                                         ct1 = GetContraction (s1, idx1, end1);
841
842                                 int offset1 = 1;
843                                 if (sk1 != null)
844                                         offset1 = 1;
845                                 else if (ct1 != null) {
846                                         offset1 = ct1.Source.Length;
847                                         if (ct1.SortKey != null) {
848                                                 sk1 = charSortKey;
849                                                 for (int i = 0; i < ct1.SortKey.Length; i++)
850                                                         sk1 [i] = ct1.SortKey [i];
851                                                 previousChar = -1;
852                                                 previousSortKey = sk1;
853                                         }
854                                         else if (escape1.Source == null) {
855                                                 escape1.Source = s1;
856                                                 escape1.Start = start1;
857                                                 escape1.Index = cur1 + ct1.Source.Length;
858                                                 escape1.End = end1;
859                                                 escape1.Optional = quickCheckPos1;
860                                                 s1 = ct1.Replacement;
861                                                 idx1 = 0;
862                                                 start1 = 0;
863                                                 end1 = s1.Length;
864                                                 quickCheckPos1 = 0;
865                                                 continue;
866                                         }
867                                 }
868                                 else {
869                                         sk1 = charSortKey;
870                                         sk1 [0] = cat1;
871                                         sk1 [1] = Level1 (i1);
872                                         if (!ignoreNonSpace && currentLevel > 1)
873                                                 sk1 [2] = Level2 (i1, ext1);
874                                         if (currentLevel > 2)
875                                                 sk1 [3] = Uni.Level3 (i1);
876                                         if (currentLevel > 3)
877                                                 special1 = Uni.HasSpecialWeight ((char) i1);
878                                         if (cat1 > 1)
879                                                 previousChar = i1;
880                                 }
881
882                                 Contraction ct2 = null;
883                                 if (ext2 == ExtenderType.None)
884                                         ct2 = GetContraction (s2, idx2, end2);
885
886                                 if (sk2 != null)
887                                         idx2++;
888                                 else if (ct2 != null) {
889                                         idx2 += ct2.Source.Length;
890                                         if (ct2.SortKey != null) {
891                                                 sk2 = charSortKey2;
892                                                 for (int i = 0; i < ct2.SortKey.Length; i++)
893                                                         sk2 [i] = ct2.SortKey [i];
894                                                 previousChar2 = -1;
895                                                 previousSortKey2 = sk2;
896                                         }
897                                         else if (escape2.Source == null) {
898                                                 escape2.Source = s2;
899                                                 escape2.Start = start2;
900                                                 escape2.Index = cur2 + ct2.Source.Length;
901                                                 escape2.End = end2;
902                                                 escape2.Optional = quickCheckPos2;
903                                                 s2 = ct2.Replacement;
904                                                 idx2 = 0;
905                                                 start2 = 0;
906                                                 end2 = s2.Length;
907                                                 quickCheckPos2 = 0;
908                                                 continue;
909                                         }
910                                 }
911                                 else {
912                                         sk2 = charSortKey2;
913                                         sk2 [0] = cat2;
914                                         sk2 [1] = Level1 (i2);
915                                         if (!ignoreNonSpace && currentLevel > 1)
916                                                 sk2 [2] = Level2 (i2, ext2);
917                                         if (currentLevel > 2)
918                                                 sk2 [3] = Uni.Level3 (i2);
919                                         if (currentLevel > 3)
920                                                 special2 = Uni.HasSpecialWeight ((char) i2);
921                                         if (cat2 > 1)
922                                                 previousChar2 = i2;
923                                         idx2++;
924                                 }
925
926                                 // Add offset here so that it does not skip
927                                 // idx1 while s2 has a replacement.
928                                 idx1 += offset1;
929
930                                 // add diacritical marks in s1 here
931                                 if (!ignoreNonSpace) {
932                                         while (idx1 < end1) {
933                                                 if (Category (s1 [idx1]) != 1)
934                                                         break;
935                                                 if (sk1 [2] == 0)
936                                                         sk1 [2] = 2;
937                                                 sk1 [2] = (byte) (sk1 [2] + 
938                                                         Level2 (s1 [idx1], ExtenderType.None));
939                                                 idx1++;
940                                         }
941
942                                         // add diacritical marks in s2 here
943                                         while (idx2 < end2) {
944                                                 if (Category (s2 [idx2]) != 1)
945                                                         break;
946                                                 if (sk2 [2] == 0)
947                                                         sk2 [2] = 2;
948                                                 sk2 [2] = (byte) (sk2 [2] + 
949                                                         Level2 (s2 [idx2], ExtenderType.None));
950                                                 idx2++;
951                                         }
952                                 }
953
954                                 int ret = sk1 [0] - sk2 [0];
955                                 ret = ret != 0 ? ret : sk1 [1] - sk2 [1];
956                                 if (ret != 0)
957                                         return ret;
958                                 if (currentLevel == 1)
959                                         continue;
960                                 if (!ignoreNonSpace) {
961                                         ret = sk1 [2] - sk2 [2];
962                                         if (ret != 0) {
963                                                 finalResult = ret;
964                                                 currentLevel = frenchSort ? 2 : 1;
965                                                 continue;
966                                         }
967                                 }
968                                 if (currentLevel == 2)
969                                         continue;
970                                 ret = sk1 [3] - sk2 [3];
971                                 if (ret != 0) {
972                                         finalResult = ret;
973                                         currentLevel = 2;
974                                         continue;
975                                 }
976                                 if (currentLevel == 3)
977                                         continue;
978                                 if (special1 != special2) {
979                                         finalResult = special1 ? 1 : -1;
980                                         currentLevel = 3;
981                                         continue;
982                                 }
983                                 if (special1) {
984                                         ret = CompareFlagPair (
985                                                 !Uni.IsJapaneseSmallLetter ((char) i1),
986                                                 !Uni.IsJapaneseSmallLetter ((char) i2));
987                                         ret = ret != 0 ? ret :
988                                                 ToDashTypeValue (ext1) -
989                                                 ToDashTypeValue (ext2);
990                                         ret = ret != 0 ? ret : CompareFlagPair (
991                                                 Uni.IsHiragana ((char) i1),
992                                                 Uni.IsHiragana ((char) i2));
993                                         ret = ret != 0 ? ret : CompareFlagPair (
994                                                 !IsHalfKana ((char) i1),
995                                                 !IsHalfKana ((char) i2));
996                                         if (ret != 0) {
997                                                 finalResult = ret;
998                                                 currentLevel = 3;
999                                                 continue;
1000                                         }
1001                                 }
1002                         }
1003
1004                         // If there were only level 3 or lower differences,
1005                         // then we still have to find diacritical differences
1006                         // if any.
1007                         if (!ignoreNonSpace &&
1008                                 finalResult != 0 && currentLevel > 2) {
1009                                 while (idx1 < end1 && idx2 < end2) {
1010                                         if (!Uni.IsIgnorableNonSpacing (s1 [idx1]))
1011                                                 break;
1012                                         if (!Uni.IsIgnorableNonSpacing (s2 [idx2]))
1013                                                 break;
1014                                         finalResult = Level2 (FilterOptions ((s1 [idx1])), ext1) - Level2 (FilterOptions (s2 [idx2]), ext2);
1015                                         if (finalResult != 0)
1016                                                 break;
1017                                         idx1++;
1018                                         idx2++;
1019                                         // they should work only for the first character
1020                                         ext1 = ExtenderType.None;
1021                                         ext2 = ExtenderType.None;
1022                                 }
1023                         }
1024                         if (currentLevel == 1 && finalResult != 0) {
1025                                 while (idx1 < end1)
1026                                         if (Uni.IsIgnorableNonSpacing (s1 [idx1]))
1027                                                 idx1++;
1028                                 while (idx2 < end2)
1029                                         if (Uni.IsIgnorableNonSpacing (s2 [idx2]))
1030                                                 idx2++;
1031                         }
1032                         // we still have to handle level 5
1033                         if (finalResult == 0) {
1034                                 finalResult = lv5At1 - lv5At2;
1035                                 if (finalResult == 0)
1036                                         finalResult = lv5Value1 - lv5Value2;
1037                         }
1038                         if (finalResult == 0) {
1039                                 if (idx2 == end2)
1040                                         targetConsumed = true;
1041                                 if (idx1 == end1)
1042                                         sourceConsumed = true;
1043                         }
1044                         return idx1 != end1 ? 1 : idx2 == end2 ? finalResult : -1;
1045                 }
1046
1047                 int CompareFlagPair (bool b1, bool b2)
1048                 {
1049                         return b1 == b2 ? 0 : b1 ? 1 : -1;
1050                 }
1051
1052                 #endregion
1053
1054                 #region IsPrefix() and IsSuffix()
1055
1056                 public bool IsPrefix (string src, string target, CompareOptions opt)
1057                 {
1058                         return IsPrefix (src, target, 0, src.Length, opt);
1059                 }
1060
1061                 public bool IsPrefix (string s, string target, int start, int length, CompareOptions opt)
1062                 {
1063                         SetOptions (opt);
1064                         return IsPrefix (s, target, start, length, 
1065                                 (opt & CompareOptions.StringSort) != 0, true);
1066                 }
1067
1068                 public bool IsPrefix (string s, string target, int start, int length, bool stringSort, bool skipHeadingExtenders)
1069                 {
1070                         bool consumed, dummy;
1071                         int ret = CompareInternal (s, start, length,
1072                                 target, 0, target.Length, stringSort,
1073                                 out consumed, out dummy, skipHeadingExtenders);
1074                         return consumed;
1075                 }
1076
1077                 // IsSuffix()
1078
1079                 public bool IsSuffix (string src, string target, CompareOptions opt)
1080                 {
1081                         return IsSuffix (src, target, src.Length - 1, src.Length, opt);
1082                 }
1083
1084                 public bool IsSuffix (string s, string target, int start, int length, CompareOptions opt)
1085                 {
1086                         // quick check : simple codepoint comparison
1087                         if (s.Length >= target.Length) {
1088                                 int si = start;
1089                                 int se = start - length;
1090                                 for (int i = target.Length - 1; si >= se && i >= 0; i--, si--)
1091                                         if (s [si] != target [i])
1092                                                 break;
1093                                 if (si == start + target.Length)
1094                                         return true;
1095                         }
1096
1097                         SetOptions (opt);
1098                         return IsSuffix (s, target, start, length,
1099                                 (opt & CompareOptions.StringSort) != 0);
1100                 }
1101
1102                 bool IsSuffix (string s, string t, int start, int length,
1103                         bool stringSort)
1104                 {
1105                         int tstart = 0;
1106                         for (;tstart < t.Length; tstart++)
1107                                 if (!IsIgnorable (t [tstart]))
1108                                         break;
1109                         if (tstart == t.Length)
1110                                 return true; // as if target is String.Empty.
1111
1112 #if false
1113 // This is still not working. If it is not likely to get working, then just remove it.
1114                         int si = start;
1115                         int send = start - length;
1116                         int ti = t.Length - 1;
1117                         int tend = -1;
1118
1119                         int sStep = start + 1;
1120                         int tStep = t.Length;
1121                         bool sourceConsumed, targetConsumed;
1122                         while (true) {
1123                                 for (; send < si; si--)
1124                                         if (!IsIgnorable (s [si]))
1125                                                 break;
1126                                 for (; tend < ti; ti--)
1127                                         if (!IsIgnorable (t [ti]))
1128                                                 break;
1129                                 if (tend == ti)
1130                                         break;
1131                                 for (; send < si; si--)
1132                                         if (IsSafe (s [si]))
1133                                                 break;
1134                                 for (; tend < ti; ti--)
1135                                         if (IsSafe (t [ti]))
1136                                                 break;
1137 Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, length, t, ti, tstart, sStep - si, tStep - ti);
1138                                 if (CompareInternal (s, si, sStep - si,
1139                                         t, ti, tStep - ti, stringSort,
1140                                         out targetConsumed, out sourceConsumed,
1141                                         true) != 0)
1142                                         return false;
1143                                 if (send == si)
1144                                         return false;
1145                                 sStep = si;
1146                                 tStep = ti;
1147                                 si--;
1148                                 ti--;
1149                         }
1150                         return true;
1151 #else
1152                         // FIXME: it is not efficient for very long target.
1153                         bool sourceConsumed, targetConsumed;
1154                         int mismatchCount = 0;
1155                         for (int i = 0; i < length; i++) {
1156                                 escape1.Source = escape2.Source = null;
1157                                 previousSortKey = previousSortKey2 = null;
1158                                 previousChar = previousChar2 = -1;
1159
1160                                 int ret = CompareInternal (s, start - i, i + 1,
1161                                         t, tstart, t.Length - tstart,
1162                                         stringSort, out targetConsumed,
1163                                         out sourceConsumed, true);
1164                                 if (ret == 0)
1165                                         return true;
1166                                 if (!sourceConsumed && targetConsumed)
1167                                         return false;
1168                                 if (!targetConsumed) {
1169                                         mismatchCount++;
1170                                         if (mismatchCount > Uni.MaxExpansionLength)
1171                                                 // The largest length of an
1172                                                 // expansion is 3, so if the
1173                                                 // target was not consumed more
1174                                                 // than 3 times, then the tail
1175                                                 // character does not match.
1176                                                 return false;
1177                                 }
1178                         }
1179                         return false;
1180 #endif
1181                 }
1182
1183                 #endregion
1184
1185                 #region IndexOf() / LastIndexOf()
1186
1187                 public int IndexOf (string s, string target, CompareOptions opt)
1188                 {
1189                         return IndexOf (s, target, 0, s.Length, opt);
1190                 }
1191
1192                 public int IndexOf (string s, string target, int start, int length, CompareOptions opt)
1193                 {
1194                         SetOptions (opt);
1195                         return IndexOf (s, target, start, length,
1196                                 (opt & CompareOptions.StringSort) != 0);
1197                 }
1198
1199                 public int IndexOf (string s, char target, CompareOptions opt)
1200                 {
1201                         return IndexOf (s, target, 0, s.Length, opt);
1202                 }
1203
1204                 public int IndexOf (string s, char target, int start, int length, CompareOptions opt)
1205                 {
1206                         SetOptions (opt);
1207
1208                         // If target is contraction, then use string search.
1209                         Contraction ct = GetContraction (target);
1210                         if (ct != null) {
1211                                 if (ct.Replacement != null)
1212                                         return IndexOf (s, ct.Replacement, start, length,
1213                                                 (opt & CompareOptions.StringSort) != 0);
1214                                 else
1215                                         return IndexOfSortKey (s, start, length, ct.SortKey, char.MinValue, -1, true);
1216                         } else {
1217                                 int ti = FilterOptions ((int) target);
1218                                 charSortKeyIndexTarget [0] = Category (ti);
1219                                 charSortKeyIndexTarget [1] = Level1 (ti);
1220                                 if (!ignoreNonSpace)
1221                                         charSortKeyIndexTarget [2] =
1222                                                 Level2 (ti, ExtenderType.None);
1223                                 charSortKeyIndexTarget [3] = Uni.Level3 (ti);
1224                                 return IndexOfSortKey (s, start, length,
1225                                         charSortKeyIndexTarget, target, ti,
1226                                         !Uni.HasSpecialWeight ((char) ti));
1227                         }
1228                 }
1229
1230                 // Searches target byte[] keydata
1231                 int IndexOfSortKey (string s, int start, int length, byte [] sortkey, char target, int ti, bool noLv4)
1232                 {
1233                         int end = start + length;
1234                         int idx = start;
1235                         while (idx < end) {
1236                                 int cur = idx;
1237                                 if (MatchesForward (s, ref idx, end, ti, sortkey, noLv4))
1238                                         return cur;
1239                         }
1240                         return -1;
1241                 }
1242
1243                 // Searches string. Search head character (or keydata when
1244                 // the head is contraction sortkey) and try IsPrefix().
1245                 int IndexOf (string s, string target, int start, int length, bool stringSort)
1246                 {
1247                         int tidx = 0;
1248                         for (; tidx < target.Length; tidx++)
1249                                 if (!IsIgnorable (target [tidx]))
1250                                         break;
1251                         if (tidx == target.Length)
1252                                 return start;
1253                         Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1254                         string replace = ct != null ? ct.Replacement : null;
1255                         byte [] sk = replace == null ? charSortKeyIndexTarget : null;
1256                         bool noLv4 = true;
1257                         char tc = char.MinValue;
1258                         int ti = -1;
1259                         if (ct != null && sk != null) {
1260                                 for (int i = 0; i < ct.SortKey.Length; i++)
1261                                         sk [i] = ct.SortKey [i];
1262                         } else if (sk != null) {
1263                                 tc = target [tidx];
1264                                 ti = FilterOptions (target [tidx]);
1265                                 sk [0] = Category (ti);
1266                                 sk [1] = Level1 (ti);
1267                                 if (!ignoreNonSpace)
1268                                         sk [2] = Level2 (ti, ExtenderType.None);
1269                                 sk [3] = Uni.Level3 (ti);
1270                                 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1271                         }
1272                         if (sk != null) {
1273                                 for (tidx++; tidx < target.Length; tidx++) {
1274                                         if (Category (target [tidx]) != 1)
1275                                                 break;
1276                                         if (sk [2] == 0)
1277                                                 sk [2] = 2;
1278                                         sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1279                                 }
1280                         }
1281
1282                         do {
1283                                 int idx = 0;
1284                                 if (replace != null)
1285                                         idx = IndexOf (s, replace, start, length, stringSort);
1286                                 else
1287                                         idx = IndexOfSortKey (s, start, length, sk, tc, ti, noLv4);
1288                                 if (idx < 0)
1289                                         return -1;
1290                                 length -= idx - start;
1291                                 start = idx;
1292                                 if (IsPrefix (s, target, start, length, stringSort, false))
1293                                         return idx;
1294                                 Contraction cts = GetContraction (s, start, length);
1295                                 if (cts != null) {
1296                                         start += cts.Source.Length;
1297                                         length -= cts.Source.Length;
1298                                 } else {
1299                                         start++;
1300                                         length--;
1301                                 }
1302                         } while (length > 0);
1303                         return -1;
1304                 }
1305
1306                 //
1307                 // There are the same number of IndexOf() related methods,
1308                 // with the same functionalities for each.
1309                 //
1310
1311                 public int LastIndexOf (string s, string target, CompareOptions opt)
1312                 {
1313                         return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1314                 }
1315
1316                 public int LastIndexOf (string s, string target, int start, int length, CompareOptions opt)
1317                 {
1318                         SetOptions (opt);
1319                         return LastIndexOf (s, target, start, length,
1320                                 (opt & CompareOptions.StringSort) != 0);
1321                 }
1322
1323                 public int LastIndexOf (string s, char target, CompareOptions opt)
1324                 {
1325                         return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1326                 }
1327
1328                 public int LastIndexOf (string s, char target, int start, int length, CompareOptions opt)
1329                 {
1330                         SetOptions (opt);
1331
1332                         // If target is contraction, then use string search.
1333                         Contraction ct = GetContraction (target);
1334                         if (ct != null) {
1335                                 if (ct.Replacement != null)
1336                                         return LastIndexOf (s, ct.Replacement, start, length, (opt & CompareOptions.StringSort) != 0);
1337                                 else
1338                                         return LastIndexOfSortKey (s, start, start, length, ct.SortKey, char.MinValue, -1, true);
1339                         }
1340                         else {
1341                                 int ti = FilterOptions ((int) target);
1342                                 charSortKeyIndexTarget [0] = Category (ti);
1343                                 charSortKeyIndexTarget [1] = Level1 (ti);
1344                                 if (!ignoreNonSpace)
1345                                         charSortKeyIndexTarget [2] = Level2 (ti, ExtenderType.None);
1346                                 charSortKeyIndexTarget [3] = Uni.Level3 (ti);
1347                                 return LastIndexOfSortKey (s, start, start, length, charSortKeyIndexTarget, target, ti, !Uni.HasSpecialWeight ((char) ti));
1348                         }
1349                 }
1350
1351                 // Searches target byte[] keydata
1352                 int LastIndexOfSortKey (string s, int start, int orgStart, int length, byte [] sortkey, char target, int ti, bool noLv4)
1353                 {
1354                         int end = start - length;
1355                         int idx = start;
1356                         while (idx > end) {
1357                                 int cur = idx;
1358                                 if (MatchesBackward (s, ref idx, end, orgStart, ti, sortkey, noLv4))
1359                                         return cur;
1360                         }
1361                         return -1;
1362                 }
1363
1364                 // Searches string. Search head character (or keydata when
1365                 // the head is contraction sortkey) and try IsPrefix().
1366                 int LastIndexOf (string s, string target, int start, int length, bool stringSort)
1367                 {
1368                         int orgStart = start;
1369                         int tidx = 0;
1370                         for (; tidx < target.Length; tidx++)
1371                                 if (!IsIgnorable (target [tidx]))
1372                                         break;
1373                         if (tidx == target.Length)
1374                                 return start;
1375                         Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1376                         string replace = ct != null ? ct.Replacement : null;
1377                         byte [] sk = replace == null ? charSortKeyIndexTarget : null;
1378
1379                         bool noLv4 = true;
1380                         char tc = char.MinValue;
1381                         int ti = -1;
1382                         if (ct != null && sk != null) {
1383                                 for (int i = 0; i < ct.SortKey.Length; i++)
1384                                         sk [i] = ct.SortKey [i];
1385                         } else if (sk != null) {
1386                                 tc = target [tidx];
1387                                 ti = FilterOptions (target [tidx]);
1388                                 sk [0] = Category (ti);
1389                                 sk [1] = Level1 (ti);
1390                                 if (!ignoreNonSpace)
1391                                         sk [2] = Level2 (ti, ExtenderType.None);
1392                                 sk [3] = Uni.Level3 (ti);
1393                                 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1394                         }
1395                         if (sk != null) {
1396                                 for (tidx++; tidx < target.Length; tidx++) {
1397                                         if (Category (target [tidx]) != 1)
1398                                                 break;
1399                                         if (sk [2] == 0)
1400                                                 sk [2] = 2;
1401                                         sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1402                                 }
1403                         }
1404
1405                         do {
1406                                 int idx = 0;
1407
1408                                 if (replace != null)
1409                                         idx = LastIndexOf (s, replace, start, length, stringSort);
1410                                 else
1411                                         idx = LastIndexOfSortKey (s, start, orgStart, length, sk, tc, ti, noLv4);
1412                                 if (idx < 0)
1413                                         return -1;
1414                                 length -= start - idx;
1415                                 start = idx;
1416
1417                                 if (IsPrefix (s, target, idx, orgStart - idx + 1, stringSort, false)) {
1418                                         for (;idx < orgStart; idx++)
1419                                                 if (!IsIgnorable (s [idx]))
1420                                                         break;
1421                                         return idx;
1422                                 }
1423                                 Contraction cts = GetContraction (s, idx, orgStart - idx + 1);
1424                                 if (cts != null) {
1425                                         start -= cts.Source.Length;
1426                                         length -= cts.Source.Length;
1427                                 } else {
1428                                         start--;
1429                                         length--;
1430                                 }
1431                         } while (length > 0);
1432                         return -1;
1433                 }
1434
1435                 private bool MatchesForward (string s, ref int idx, int end, int ti, byte [] sortkey, bool noLv4)
1436                 {
1437                         int si = -1;
1438                         ExtenderType ext = GetExtenderType (s [idx]);
1439                         Contraction ct = null;
1440                         if (ext == ExtenderType.None)
1441                                 ct = GetContraction (s, idx, end);
1442                         else if (previousChar < 0) {
1443                                 if (previousSortKey == null) {
1444                                         idx++;
1445                                         return false;
1446                                 }
1447                                 charSortKey = previousSortKey;
1448                         }
1449                         else
1450                                 si = FilterExtender (previousChar, ext);
1451                         // if lv4 exists, it never matches contraction
1452                         if (ct != null) {
1453                                 idx += ct.Source.Length;
1454                                 if (!noLv4)
1455                                         return false;
1456                                 if (ct.SortKey != null) {
1457                                         for (int i = 0; i < sortkey.Length; i++)
1458                                                 charSortKey [i] = sortkey [i];
1459                                         previousChar = -1;
1460                                         previousSortKey = charSortKey;
1461                                 } else {
1462                                         // Here is the core of LAMESPEC
1463                                         // described at the top of the source.
1464                                         int dummy = 0;
1465                                         return MatchesForward (ct.Replacement, ref dummy,
1466                                                 ct.Replacement.Length, ti, sortkey, noLv4);
1467                                 }
1468                         } else {
1469                                 if (si < 0)
1470                                         si = FilterOptions (s [idx]);
1471                                 charSortKey [0] = Category (si);
1472                                 charSortKey [1] = Level1 (si);
1473                                 if (!ignoreNonSpace)
1474                                         charSortKey [2] = Level2 (si, ext);
1475                                 charSortKey [3] = Uni.Level3 (si);
1476                                 if (charSortKey [0] != 1)
1477                                         previousChar = si;
1478                                 idx++;
1479                         }
1480                         for (; idx < end; idx++) {
1481                                 if (Category (s [idx]) != 1)
1482                                         break;
1483                                 if (ignoreNonSpace)
1484                                         continue;
1485                                 if (charSortKey [2] == 0)
1486                                                 charSortKey [2] = 2;
1487                                         charSortKey [2] = (byte) (charSortKey [2]
1488                                                 + Level2 (s [idx], ExtenderType.None));
1489                         }
1490
1491                         return MatchesPrimitive (charSortKey, si, ext, sortkey, ti, noLv4);
1492                 }
1493
1494                 private bool MatchesPrimitive (byte [] charSortKey, int si, ExtenderType ext, byte [] sortkey, int ti, bool noLv4)
1495                 {
1496                         if (charSortKey [0] != sortkey [0] ||
1497                                 charSortKey [1] != sortkey [1] ||
1498                                 (!ignoreNonSpace && charSortKey [2] != sortkey [2]) ||
1499                                 charSortKey [3] != sortkey [3])
1500                                 return false;
1501                         if (noLv4 && (si < 0 || !Uni.HasSpecialWeight ((char) si)))
1502                                 return true;
1503                         else if (noLv4)
1504                                 return false;
1505                         // Since target can never be an extender, if the source
1506                         // is an expander and it matters, then they never match.
1507                         if (!ignoreNonSpace && ext == ExtenderType.Conditional)
1508                                 return false;
1509                         if (Uni.IsJapaneseSmallLetter ((char) si) !=
1510                                 Uni.IsJapaneseSmallLetter ((char) ti) ||
1511                                 ToDashTypeValue (ext) !=
1512                                 // FIXME: we will have to specify correct value for target
1513                                 ToDashTypeValue (ExtenderType.None) ||
1514                                 !Uni.IsHiragana ((char) si) !=
1515                                 !Uni.IsHiragana ((char) ti) ||
1516                                 IsHalfKana ((char) si) !=
1517                                 IsHalfKana ((char) ti))
1518                                 return false;
1519                         return true;
1520                 }
1521
1522                 private bool MatchesBackward (string s, ref int idx, int end, int orgStart, int ti, byte [] sortkey, bool noLv4)
1523                 {
1524                         int cur = idx;
1525                         int si = -1;
1526                         ExtenderType ext = GetExtenderType (s [idx]);
1527                         // To handle extenders in source, we need to
1528                         // check next _primary_ character.
1529                         if (ext != ExtenderType.None) {
1530                                 byte diacritical = 0;
1531                                 for (int tmp = 0; ; tmp--) {
1532                                         if (tmp < 0) // heading extender
1533                                                 return false;
1534                                         if (IsIgnorable (s [tmp]))
1535                                                 continue;
1536                                         int tmpi = FilterOptions (s [tmp]);
1537                                         byte category = Category (tmpi);
1538                                         if (category == 1) {
1539                                                 diacritical = Level2 (tmpi, ExtenderType.None);
1540                                                 continue;
1541                                         }
1542                                         si = FilterExtender (tmpi, ext);
1543                                         charSortKey [0] = category;
1544                                         charSortKey [1] = Level1 (si);
1545                                         if (!ignoreNonSpace)
1546                                                 charSortKey [2] = Level2 (si, ext);
1547                                         charSortKey [3] = Uni.Level3 (si);
1548                                         if (ext != ExtenderType.Conditional &&
1549                                                 diacritical != 0)
1550                                                 charSortKey [2] =
1551                                                         (charSortKey [2] == 0) ?
1552                                                         (byte) (diacritical + 2) :
1553                                                         diacritical;
1554                                         break;
1555                                 }
1556                                 idx--;
1557                         }
1558                         Contraction ct = null;
1559                         if (ext == ExtenderType.None)
1560                                 ct = GetContraction (s, idx, end);
1561                         // if lv4 exists, it never matches contraction
1562                         if (ct != null) {
1563                                 idx -= ct.Source.Length;
1564                                 if (!noLv4)
1565                                         return false;
1566                                 if (ct.SortKey != null) {
1567                                         for (int i = 0; i < sortkey.Length; i++)
1568                                                 charSortKey [i] = sortkey [i];
1569                                         previousChar = -1;
1570                                         previousSortKey = charSortKey;
1571                                 } else {
1572                                         // Here is the core of LAMESPEC
1573                                         // described at the top of the source.
1574                                         int dummy = ct.Replacement.Length - 1;
1575                                         return MatchesBackward (ct.Replacement, 
1576                                                 ref dummy, dummy, -1, ti, sortkey, noLv4);
1577                                 }
1578                         } else if (ext == ExtenderType.None) {
1579                                 if (si < 0)
1580                                         si = FilterOptions (s [idx]);
1581                                 charSortKey [0] = Category (si);
1582                                 charSortKey [1] = Level1 (si);
1583                                 if (!ignoreNonSpace)
1584                                         charSortKey [2] = Level2 (si, ext);
1585                                 charSortKey [3] = Uni.Level3 (si);
1586                                 if (charSortKey [0] != 1)
1587                                         previousChar = si;
1588                                 idx--;
1589                         }
1590                         if (ext == ExtenderType.None) {
1591                                 for (int tmp = cur + 1; tmp < orgStart; tmp++) {
1592                                         if (Category (s [tmp]) != 1)
1593                                                 break;
1594                                         if (ignoreNonSpace)
1595                                                 continue;
1596                                         if (charSortKey [2] == 0)
1597                                                         charSortKey [2] = 2;
1598                                         charSortKey [2] = (byte) (charSortKey [2]
1599                                                 + Level2 (s [tmp], ExtenderType.None));
1600                                 }
1601                         }
1602                         return MatchesPrimitive (charSortKey, si, ext, sortkey, ti, noLv4);
1603                 }
1604                 #endregion
1605         }
1606 }