2005-08-04 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / SimpleCollator.cs
1 //
2 // SimpleCollator.cs : the core collator implementation
3 //
4 // Author:
5 //      Atsushi Enomoto  <atsushi@ximian.com>
6 //
7 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
8 //
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
16 // 
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
19 // 
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 //
28
29 //
30 // Here's a summary for supporting contractions, expansions and diacritical 
31 // remappings.
32 //
33 // Diacritical mapping is a simple tailoring rule that "remaps" diacritical
34 // weight value from one to another. For now it applies to all range of
35 // characters, but at some stage we might need to limit the remapping ranges.
36 //
37 // A Contraction consists of a string (actually char[]) and a sortkey entry
38 // (i.e. byte[]). It indicates that a sequence of characters is interpreted
39 // as a single character that has the mapped sortkey value. There is no
40 // character which goes across "special rules". When the collator encountered
41 // such a sequence of characters, it just returns the sortkey value without
42 // further replacement.
43 //
44 // Since it is still likely to happen that a contraction sequence matches
45 // other character than the identical sequence (depending on CompareOptions
46 // and of course, defined sortkey value itself), comparison cannot be done
47 // at source char[] level.
48 //
49 // In IndexOf(), the first character in the target string or the target char
50 // itself is turned into sortkey bytes. If the character has a contraction and
51 // that is sortkey map, then it is used instead. If the contraction exists and
52 // that is replacement map, then the first character of the replacement string
53 // is searched instead. IndexOf() always searches only for the top character,
54 // and if it returned negative value, then it returns -1. Otherwise, it then
55 // tries IsPrefix() from that location. If it returns true, then it returns
56 // the index.
57 //
58 // LAMESPEC: IndexOf() is lame as a whole API. It never matches in the middle
59 // of expansion and there is no proper way to return such indexes within
60 // a single int return value.
61 //
62 // For example, try below in .NET:
63 //      IndexOf("\u00E6", "a")
64 //      IndexOf("\u00E6", "e")
65 //
66
67
68 using System;
69 using System.Collections;
70 using System.Globalization;
71
72 using Uni = Mono.Globalization.Unicode.MSCompatUnicodeTable;
73 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
74
75 namespace Mono.Globalization.Unicode
76 {
77         internal class SimpleCollator
78         {
79                 static SimpleCollator invariant =
80                         new SimpleCollator (CultureInfo.InvariantCulture);
81
82                 SortKeyBuffer buf;
83                 // CompareOptions expanded.
84                 bool ignoreNonSpace; // used in IndexOf()
85                 bool ignoreSymbols;
86                 bool ignoreWidth;
87                 bool ignoreCase;
88                 bool ignoreKanaType;
89                 TextInfo textInfo; // for ToLower().
90                 bool frenchSort;
91                 unsafe readonly byte* cjkCatTable;
92                 unsafe readonly byte* cjkLv1Table;
93                 readonly CodePointIndexer cjkIndexer;
94                 unsafe readonly byte* cjkLv2Table;
95                 readonly CodePointIndexer cjkLv2Indexer;
96                 readonly int lcid;
97                 readonly Contraction [] contractions;
98                 readonly Level2Map [] level2Maps;
99
100                 // This flag marks characters as "unsafe", where the character
101                 // could be used as part of a contraction (whose length > 1).
102                 readonly byte [] unsafeFlags;
103
104                 const int UnsafeFlagLength = 0x300 / 8;
105
106                 // temporary sortkey buffer for index search/comparison
107                 byte [] charSortKey = new byte [4];
108                 byte [] charSortKey2 = new byte [4];
109                 byte [] charSortKeyIndexTarget = new byte [4];
110
111                 #region .ctor() and split functions
112
113                 public SimpleCollator (CultureInfo culture)
114                 {
115                         lcid = culture.LCID;
116                         textInfo = culture.TextInfo;
117                         buf = new SortKeyBuffer (culture.LCID);
118
119                         unsafe {
120                                 SetCJKTable (culture, ref cjkIndexer,
121                                         ref cjkCatTable, ref cjkLv1Table,
122                                         ref cjkLv2Indexer, ref cjkLv2Table);
123                         }
124
125                         // Get tailoring info
126                         TailoringInfo t = null;
127                         for (CultureInfo ci = culture; ci.LCID != 127; ci = ci.Parent) {
128                                 t = Uni.GetTailoringInfo (ci.LCID);
129                                 if (t != null)
130                                         break;
131                         }
132                         if (t == null) // then use invariant
133                                 t = Uni.GetTailoringInfo (127);
134
135                         frenchSort = t.FrenchSort;
136                         Uni.BuildTailoringTables (culture, t, ref contractions,
137                                 ref level2Maps);
138                         unsafeFlags = new byte [UnsafeFlagLength];
139                         foreach (Contraction c in contractions)
140                                 if (c.Source.Length > 1)
141                                         foreach (char ch in c.Source)
142                                                 unsafeFlags [(int) ch / 8 ]
143                                                         |= (byte) ((int) ch % 8);
144                         if (lcid != 127)
145                                 foreach (Contraction c in invariant.contractions)
146                                         if (c.Source.Length > 1)
147                                                 foreach (char ch in c.Source)
148                                                         unsafeFlags [(int) ch / 8 ] 
149                                                                 |= (byte) ((int) ch % 8);
150
151                         // FIXME: Since tailorings are mostly for latin
152                         // (and in some cases Cyrillic) characters, it would
153                         // be much better for performance to store "start 
154                         // indexes" for > 370 (culture-specific letters).
155
156 /*
157 // dump tailoring table
158 Console.WriteLine ("******** building table for {0} : c - {1} d - {2}",
159 culture.LCID, contractions.Length, level2Maps.Length);
160 foreach (Contraction c in contractions) {
161 foreach (char cc in c.Source)
162 Console.Write ("{0:X4} ", (int) cc);
163 Console.WriteLine (" -> '{0}'", c.Replacement);
164 }
165 */
166                 }
167
168                 unsafe private void SetCJKTable (
169                         CultureInfo culture, ref CodePointIndexer cjkIndexer,
170                         ref byte* catTable, ref byte* lv1Table,
171                         ref CodePointIndexer lv2Indexer, ref byte* lv2Table)
172                 {
173                         string name = GetNeutralCulture (culture).Name;
174
175                         Uni.FillCJK (name, ref cjkIndexer, ref catTable,
176                                 ref lv1Table, ref lv2Indexer, ref lv2Table);
177                 }
178
179                 static CultureInfo GetNeutralCulture (CultureInfo info)
180                 {
181                         CultureInfo ret = info;
182                         while (ret.Parent != null && ret.Parent.LCID != 127)
183                                 ret = ret.Parent;
184                         return ret;
185                 }
186
187                 #endregion
188
189                 unsafe byte Category (int cp)
190                 {
191                         if (cp < 0x3000 || cjkCatTable == null)
192                                 return Uni.Category (cp);
193                         int idx = cjkIndexer.ToIndex (cp);
194                         return idx < 0 ? Uni.Category (cp) : cjkCatTable [idx];
195                 }
196
197                 unsafe byte Level1 (int cp)
198                 {
199                         if (cp < 0x3000 || cjkLv1Table == null)
200                                 return Uni.Level1 (cp);
201                         int idx = cjkIndexer.ToIndex (cp);
202                         return idx < 0 ? Uni.Level1 (cp) : cjkLv1Table [idx];
203                 }
204
205                 unsafe byte Level2 (int cp, ExtenderType ext)
206                 {
207                         if (ext == ExtenderType.Buggy)
208                                 return 5;
209                         else if (ext == ExtenderType.Conditional)
210                                 return 0;
211
212                         if (cp < 0x3000 || cjkLv2Table == null)
213                                 return Uni.Level2 (cp);
214                         int idx = cjkLv2Indexer.ToIndex (cp);
215                         byte ret = idx < 0 ? (byte) 0 : cjkLv2Table [idx];
216                         if (ret != 0)
217                                 return ret;
218                         ret = Uni.Level2 (cp);
219                         if (level2Maps.Length == 0)
220                                 return ret;
221                         for (int i = 0; i < level2Maps.Length; i++) {
222                                 if (level2Maps [i].Source == ret)
223                                         return level2Maps [i].Replace;
224                                 else if (level2Maps [i].Source > ret)
225                                         break;
226                         }
227                         return ret;
228                 }
229
230                 bool IsHalfKana (int cp)
231                 {
232                         return ignoreWidth || Uni.IsHalfWidthKana ((char) cp);
233                 }
234
235                 void SetOptions (CompareOptions options)
236                 {
237                         this.ignoreNonSpace = (options & CompareOptions.IgnoreNonSpace) != 0;
238                         this.ignoreSymbols = (options & CompareOptions.IgnoreSymbols) != 0;
239                         this.ignoreWidth = (options & CompareOptions.IgnoreWidth) != 0;
240                         this.ignoreCase = (options & CompareOptions.IgnoreCase) != 0;
241                         this.ignoreKanaType = (options & CompareOptions.IgnoreKanaType) != 0;
242                         previousChar = previousChar2 = -1;
243                         previousSortKey = previousSortKey2 = null;
244                         escape1.Source = escape2.Source = null;
245                 }
246
247                 Contraction GetContraction (string s, int start, int end)
248                 {
249                         Contraction c = GetContraction (s, start, end, contractions);
250                         if (c != null || lcid == 127)
251                                 return c;
252                         return GetContraction (s, start, end, invariant.contractions);
253                 }
254
255                 Contraction GetContraction (string s, int start, int end, Contraction [] clist)
256                 {
257                         for (int i = 0; i < clist.Length; i++) {
258                                 Contraction ct = clist [i];
259                                 int diff = ct.Source [0] - s [start];
260                                 if (diff > 0)
261                                         return null; // it's already sorted
262                                 else if (diff < 0)
263                                         continue;
264                                 char [] chars = ct.Source;
265                                 if (end - start < chars.Length)
266                                         continue;
267                                 bool match = true;
268                                 for (int n = 0; n < chars.Length; n++)
269                                         if (s [start + n] != chars [n]) {
270                                                 match = false;
271                                                 break;
272                                         }
273                                 if (match)
274                                         return ct;
275                         }
276                         return null;
277                 }
278
279                 Contraction GetTailContraction (string s, int start, int end)
280                 {
281                         Contraction c = GetTailContraction (s, start, end, contractions);
282                         if (c != null || lcid == 127)
283                                 return c;
284                         return GetTailContraction (s, start, end, invariant.contractions);
285                 }
286
287                 Contraction GetTailContraction (string s, int start, int end, Contraction [] clist)
288                 {
289                         for (int i = 0; i < clist.Length; i++) {
290                                 Contraction ct = clist [i];
291                                 int diff = ct.Source [0] - s [end];
292                                 if (diff > 0)
293                                         return null; // it's already sorted
294                                 else if (diff < 0)
295                                         continue;
296                                 char [] chars = ct.Source;
297                                 if (start - end + 1 < chars.Length)
298                                         continue;
299                                 bool match = true;
300                                 int offset = start - chars.Length + 1;
301                                 for (int n = 0; n < chars.Length; n++)
302                                         if (s [offset + n] != chars [n]) {
303                                                 match = false;
304                                                 break;
305                                         }
306                                 if (match)
307                                         return ct;
308                         }
309                         return null;
310                 }
311
312                 Contraction GetContraction (char c)
313                 {
314                         Contraction ct = GetContraction (c, contractions);
315                         if (ct != null || lcid == 127)
316                                 return ct;
317                         return GetContraction (c, invariant.contractions);
318                 }
319
320                 Contraction GetContraction (char c, Contraction [] clist)
321                 {
322                         for (int i = 0; i < clist.Length; i++) {
323                                 Contraction ct = clist [i];
324                                 if (ct.Source [0] > c)
325                                         return null; // it's already sorted
326                                 if (ct.Source [0] == c && ct.Source.Length == 1)
327                                         return ct;
328                         }
329                         return null;
330                 }
331
332                 int FilterOptions (int i)
333                 {
334                         if (ignoreWidth) {
335                                 int x = Uni.ToWidthCompat (i);
336                                 if (x != 0)
337                                         i = x;
338                         }
339                         if (ignoreCase)
340                                 i = textInfo.ToLower ((char) i);
341                         if (ignoreKanaType)
342                                 i = Uni.ToKanaTypeInsensitive (i);
343                         return i;
344                 }
345
346                 int previousChar = -1;
347                 byte [] previousSortKey = null;
348                 int previousChar2 = -1;
349                 byte [] previousSortKey2 = null;
350
351                 enum ExtenderType {
352                         None,
353                         Simple,
354                         Voiced,
355                         Conditional,
356                         Buggy,
357                 }
358
359                 ExtenderType GetExtenderType (int i)
360                 {
361                         // LAMESPEC: Windows expects true for U+3005, but 
362                         // sometimes it does not represent to repeat just
363                         // one character.
364                         // Windows also expects true for U+3031 and U+3032,
365                         // but they should *never* repeat one character.
366
367                         // U+2015 becomes an extender only when it is Japanese
368                         if (i == 0x2015)
369                                 return lcid == 16 ? ExtenderType.Conditional :
370                                         ExtenderType.None;
371
372                         if (i < 0x3005 || i > 0xFF70)
373                                 return ExtenderType.None;
374                         if (i >= 0xFE7C) {
375                                 switch (i) {
376                                 case 0xFE7C:
377                                 case 0xFE7D:
378                                         return ExtenderType.Simple;
379                                 case 0xFF70:
380                                         return ExtenderType.Conditional;
381                                 case 0xFF9E:
382                                 case 0xFF9F:
383                                         return ExtenderType.Voiced;
384                                 }
385                         }
386                         if (i > 0x30FE)
387                                 return ExtenderType.None;
388                         switch (i) {
389                         case 0x3005: // LAMESPEC: see above.
390                                 return ExtenderType.Buggy;
391                         case 0x3031: // LAMESPEC: see above.
392                         case 0x3032: // LAMESPEC: see above.
393                         case 0x309D:
394                         case 0x30FD:
395                                 return ExtenderType.Simple;
396                         case 0x309E:
397                         case 0x30FE:
398                                 return ExtenderType.Voiced;
399                         case 0x30FC:
400                                 return ExtenderType.Conditional;
401                         default:
402                                 return ExtenderType.None;
403                         }
404                 }
405
406                 byte ToDashTypeValue (ExtenderType ext)
407                 {
408                         if (ignoreNonSpace) // LAMESPEC: huh, why?
409                                 return 3;
410                         switch (ext) {
411                         case ExtenderType.None:
412                                 return 3;
413                         case ExtenderType.Conditional:
414                                 return 5;
415                         default:
416                                 return 4;
417                         }
418                 }
419
420                 int FilterExtender (int i, ExtenderType ext)
421                 {
422                         if (ext == ExtenderType.Conditional &&
423                                 Uni.HasSpecialWeight ((char) i)) {
424                                 bool half = IsHalfKana ((char) i);
425                                 bool katakana = !Uni.IsHiragana ((char) i);
426                                 switch (Level1 (i) & 7) {
427                                 case 2:
428                                         return half ? 0xFF71 : katakana ? 0x30A2 : 0x3042;
429                                 case 3:
430                                         return half ? 0xFF72 : katakana ? 0x30A4 : 0x3044;
431                                 case 4:
432                                         return half ? 0xFF73 : katakana ? 0x30A6 : 0x3046;
433                                 case 5:
434                                         return half ? 0xFF74 : katakana ? 0x30A8 : 0x3048;
435                                 case 6:
436                                         return half ? 0xFF75 : katakana ? 0x30AA : 0x304A;
437                                 }
438                         }
439                         return i;
440                 }
441
442                 bool IsIgnorable (int i)
443                 {
444                         return Uni.IsIgnorable (i) ||
445                                 ignoreSymbols && Uni.IsIgnorableSymbol (i) ||
446                                 ignoreNonSpace && Uni.IsIgnorableNonSpacing (i);
447                 }
448
449                 bool IsSafe (int i)
450                 {
451                         return i / 8 >= unsafeFlags.Length ? true : (unsafeFlags [i / 8] & (1 << (i % 8))) == 0;
452                 }
453
454                 #region GetSortKey()
455
456                 public SortKey GetSortKey (string s)
457                 {
458                         return GetSortKey (s, CompareOptions.None);
459                 }
460
461                 public SortKey GetSortKey (string s, CompareOptions options)
462                 {
463                         return GetSortKey (s, 0, s.Length, options);
464                 }
465
466                 public SortKey GetSortKey (string s, int start, int length, CompareOptions options)
467                 {
468                         SetOptions (options);
469
470                         buf.Initialize (options, lcid, s, frenchSort);
471                         int end = start + length;
472                         GetSortKey (s, start, end);
473                         return buf.GetResultAndReset ();
474                 }
475
476                 void GetSortKey (string s, int start, int end)
477                 {
478                         for (int n = start; n < end; n++) {
479                                 int i = s [n];
480
481                                 ExtenderType ext = GetExtenderType (i);
482                                 if (ext != ExtenderType.None) {
483                                         i = FilterExtender (previousChar, ext);
484                                         if (i >= 0)
485                                                 FillSortKeyRaw (i, ext);
486                                         else if (previousSortKey != null) {
487                                                 byte [] b = previousSortKey;
488                                                 buf.AppendNormal (
489                                                         b [0],
490                                                         b [1],
491                                                         b [2] != 1 ? b [2] : Level2 (i, ext),
492                                                         b [3] != 1 ? b [3] : Uni.Level3 (i));
493                                         }
494                                         // otherwise do nothing.
495                                         // (if the extender is the first char
496                                         // in the string, then just ignore.)
497                                         continue;
498                                 }
499
500                                 if (IsIgnorable (i))
501                                         continue;
502                                 i = FilterOptions (i);
503
504                                 Contraction ct = GetContraction (s, n, end);
505                                 if (ct != null) {
506                                         if (ct.Replacement != null) {
507                                                 GetSortKey (ct.Replacement, 0, ct.Replacement.Length);
508                                         } else {
509                                                 byte [] b = ct.SortKey;
510                                                 buf.AppendNormal (
511                                                         b [0],
512                                                         b [1],
513                                                         b [2] != 1 ? b [2] : Level2 (i, ext),
514                                                         b [3] != 1 ? b [3] : Uni.Level3 (i));
515                                                 previousSortKey = b;
516                                                 previousChar = -1;
517                                         }
518                                         n += ct.Source.Length - 1;
519                                 }
520                                 else {
521                                         if (!Uni.IsIgnorableNonSpacing (i))
522                                                 previousChar = i;
523                                         FillSortKeyRaw (i, ExtenderType.None);
524                                 }
525                         }
526                 }
527
528                 void FillSortKeyRaw (int i, ExtenderType ext)
529                 {
530                         if (0x3400 <= i && i <= 0x4DB5) {
531                                 int diff = i - 0x3400;
532                                 buf.AppendCJKExtension (
533                                         (byte) (0x10 + diff / 254),
534                                         (byte) (diff % 254 + 2));
535                                 return;
536                         }
537
538                         UnicodeCategory uc = char.GetUnicodeCategory ((char) i);
539                         switch (uc) {
540                         case UnicodeCategory.PrivateUse:
541                                 int diff = i - 0xE000;
542                                 buf.AppendNormal (
543                                         (byte) (0xE5 + diff / 254),
544                                         (byte) (diff % 254 + 2),
545                                         0,
546                                         0);
547                                 return;
548                         case UnicodeCategory.Surrogate:
549                                 FillSurrogateSortKeyRaw (i);
550                                 return;
551                         }
552
553                         byte level2 = Level2 (i, ext);
554                         if (Uni.HasSpecialWeight ((char) i)) {
555                                 byte level1 = Level1 (i);
556                                 buf.AppendKana (
557                                         Category (i),
558                                         level1,
559                                         level2,
560                                         Uni.Level3 (i),
561                                         Uni.IsJapaneseSmallLetter ((char) i),
562                                         ToDashTypeValue (ext),
563                                         !Uni.IsHiragana ((char) i),
564                                         IsHalfKana ((char) i)
565                                         );
566                                 if (!ignoreNonSpace && ext == ExtenderType.Voiced)
567                                         // Append voice weight
568                                         buf.AppendNormal (1, 1, 1, 0);
569                         }
570                         else
571                                 buf.AppendNormal (
572                                         Category (i),
573                                         Level1 (i),
574                                         level2,
575                                         Uni.Level3 (i));
576                 }
577
578                 void FillSurrogateSortKeyRaw (int i)
579                 {
580                         int diffbase = 0;
581                         int segment = 0;
582                         byte lower = 0;
583
584                         if (i < 0xD840) {
585                                 diffbase = 0xD800;
586                                 segment = 0x41;
587                                 lower = (byte) ((i == 0xD800) ? 0x3E : 0x3F);
588                         } else if (0xD840 <= i && i < 0xD880) {
589                                 diffbase = 0xD840;
590                                 segment = 0xF2;
591                                 lower = 0x3E;
592                         } else if (0xDB80 <= i && i < 0xDC00) {
593                                 diffbase = 0xDB80 - 0x40;
594                                 segment = 0xFE;
595                                 lower = 0x3E;
596                         } else {
597                                 diffbase = 0xDC00 - 0xF8 + 2;
598                                 segment = 0x41;
599                                 lower = 0x3F;
600                         }
601                         int diff = i - diffbase;
602
603                         buf.AppendNormal (
604                                 (byte) (segment + diff / 254),
605                                 (byte) (diff % 254 + 2),
606                                 lower,
607                                 lower);
608                 }
609
610                 #endregion
611
612                 #region Compare()
613
614                 public int Compare (string s1, string s2)
615                 {
616                         return Compare (s1, s2, CompareOptions.None);
617                 }
618
619                 public int Compare (string s1, string s2, CompareOptions options)
620                 {
621                         return Compare (s1, 0, s1.Length, s2, 0, s2.Length, options);
622                 }
623
624                 class Escape
625                 {
626                         public string Source;
627                         public int Index;
628                         public int Start;
629                         public int End;
630                         public int Optional;
631                 }
632
633                 // Those instances are reused not to invoke instantiation
634                 // during Compare().
635                 Escape escape1 = new Escape ();
636                 Escape escape2 = new Escape ();
637
638                 private int CompareOrdinal (string s1, int idx1, int len1,
639                         string s2, int idx2, int len2)
640                 {
641                         int min = len1 < len2 ? len1 : len2;
642                         int end1 = idx1 + min;
643                         int end2 = idx2 + min;
644                         for (int i1 = idx1, i2 = idx2;
645                                 i1 < end1 && i2 < end2; i1++, i2++)
646                                 if (s1 [i1] != s2 [i2])
647                                         return s1 [i1] - s2 [i2];
648                         return len1 == len2 ? 0 :
649                                 len1 == min ? - 1 : 1;
650                 }
651
652                 public int Compare (string s1, int idx1, int len1,
653                         string s2, int idx2, int len2, CompareOptions options)
654                 {
655                         // quick equality check
656                         if (idx1 == idx2 && len1 == len2 &&
657                                 Object.ReferenceEquals (s1, s2))
658                                 return 0;
659                         // FIXME: this should be done inside Compare() at
660                         // any time.
661 //                      int ord = CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
662 //                      if (ord == 0)
663 //                              return 0;
664                         if (options == CompareOptions.Ordinal)
665                                 return CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
666
667 #if false // stable easy version, depends on GetSortKey().
668                         SortKey sk1 = GetSortKey (s1, idx1, len1, options);
669                         SortKey sk2 = GetSortKey (s2, idx2, len2, options);
670                         byte [] d1 = sk1.KeyData;
671                         byte [] d2 = sk2.KeyData;
672                         int len = d1.Length > d2.Length ? d2.Length : d1.Length;
673                         for (int i = 0; i < len; i++)
674                                 if (d1 [i] != d2 [i])
675                                         return d1 [i] < d2 [i] ? -1 : 1;
676                         return d1.Length == d2.Length ? 0 : d1.Length < d2.Length ? -1 : 1;
677 #else
678                         SetOptions (options);
679                         bool dummy, dummy2;
680                         int ret = CompareInternal (s1, idx1, len1, s2, idx2, len2, (options & CompareOptions.StringSort) != 0, out dummy, out dummy2, true);
681                         return ret == 0 ? 0 : ret < 0 ? -1 : 1;
682 #endif
683                 }
684
685                 int CompareInternal (string s1, int idx1, int len1, string s2,
686                         int idx2, int len2, bool stringSort,
687                         out bool targetConsumed, out bool sourceConsumed,
688                         bool skipHeadingExtenders)
689                 {
690                         int start1 = idx1;
691                         int start2 = idx2;
692                         int end1 = idx1 + len1;
693                         int end2 = idx2 + len2;
694                         targetConsumed = false;
695                         sourceConsumed = false;
696
697                         // It holds final result that comes from the comparison
698                         // at level 2 or lower. Even if Compare() found the
699                         // difference at level 2 or lower, it still has to
700                         // continue level 1 comparison. FinalResult is used
701                         // when there was no further differences.
702                         int finalResult = 0;
703                         // It holds the comparison level to do. It starts from
704                         // 5, and becomes 1 when we do primary-only comparison.
705                         int currentLevel = 5;
706
707                         int lv5At1 = -1;
708                         int lv5At2 = -1;
709                         int lv5Value1 = 0;
710                         int lv5Value2 = 0;
711
712                         // Skip heading extenders
713                         if (skipHeadingExtenders) {
714                                 for (; idx1 < end1; idx1++)
715                                         if (GetExtenderType (s1 [idx1]) == ExtenderType.None)
716                                                 break;
717                                 for (; idx2 < end2; idx2++)
718                                         if (GetExtenderType (s2 [idx2]) == ExtenderType.None)
719                                                 break;
720                         }
721
722                         ExtenderType ext1 = ExtenderType.None;
723                         ExtenderType ext2 = ExtenderType.None;
724
725                         int quickCheckPos1 = idx1;
726                         int quickCheckPos2 = idx2;
727
728                         while (true) {
729                                 for (; idx1 < end1; idx1++)
730                                         if (!IsIgnorable (s1 [idx1]))
731                                                 break;
732                                 for (; idx2 < end2; idx2++)
733                                         if (!IsIgnorable (s2 [idx2]))
734                                                 break;
735
736                                 if (idx1 >= end1) {
737                                         if (escape1.Source == null)
738                                                 break;
739                                         s1 = escape1.Source;
740                                         start1 = escape1.Start;
741                                         idx1 = escape1.Index;
742                                         end1 = escape1.End;
743                                         quickCheckPos1 = escape1.Optional;
744                                         escape1.Source = null;
745                                         continue;
746                                 }
747                                 if (idx2 >= end2) {
748                                         if (escape2.Source == null)
749                                                 break;
750                                         s2 = escape2.Source;
751                                         start2 = escape2.Start;
752                                         idx2 = escape2.Index;
753                                         end2 = escape2.End;
754                                         quickCheckPos2 = escape2.Optional;
755                                         escape2.Source = null;
756                                         continue;
757                                 }
758 #if true
759 // If comparison is unstable, then this part is one of the most doubtful part.
760 // Here we run quick codepoint comparison and run back to "stable" area to
761 // compare characters.
762
763                                 // Strictly to say, even the first character
764                                 // could be compared here, but it messed
765                                 // backward step, so I just avoided mess.
766                                 if (quickCheckPos1 < idx1 && quickCheckPos2 < idx2) {
767                                         while (idx1 < end1 && idx2 < end2 &&
768                                                 s1 [idx1] == s2 [idx2]) {
769                                                 idx1++;
770                                                 idx2++;
771                                         }
772                                         if (idx1 == end1 || idx2 == end2)
773                                                 continue; // check replacement
774
775                                         int backwardEnd1 = quickCheckPos1;
776                                         int backwardEnd2 = quickCheckPos2;
777                                         quickCheckPos1 = idx1;
778                                         quickCheckPos2 = idx2;
779
780                                         idx1--;
781                                         idx2--;
782
783                                         for (;idx1 > backwardEnd1; idx1--)
784                                                 if (Category (s1 [idx1]) != 1)
785                                                         break;
786                                         for (;idx2 > backwardEnd2; idx2--)
787                                                 if (Category (s2 [idx2]) != 1)
788                                                         break;
789                                         for (;idx1 > backwardEnd1; idx1--)
790                                                 if (IsSafe (s1 [idx1]))
791                                                         break;
792                                         for (;idx2 > backwardEnd2; idx2--)
793                                                 if (IsSafe (s2 [idx2]))
794                                                         break;
795                                 }
796 #endif
797
798                                 int cur1 = idx1;
799                                 int cur2 = idx2;
800                                 byte [] sk1 = null;
801                                 byte [] sk2 = null;
802                                 int i1 = FilterOptions (s1 [idx1]);
803                                 int i2 = FilterOptions (s2 [idx2]);
804                                 bool special1 = false;
805                                 bool special2 = false;
806
807                                 // If current character is an expander, then
808                                 // repeat the previous character.
809                                 ext1 = GetExtenderType (i1);
810                                 if (ext1 != ExtenderType.None) {
811                                         if (previousChar < 0) {
812                                                 if (previousSortKey == null) {
813                                                         // nothing to extend
814                                                         idx1++;
815                                                         continue;
816                                                 }
817                                                 sk1 = previousSortKey;
818                                         }
819                                         else
820                                                 i1 = FilterExtender (previousChar, ext1);
821                                 }
822                                 ext2 = GetExtenderType (i2);
823                                 if (ext2 != ExtenderType.None) {
824                                         if (previousChar2 < 0) {
825                                                 if (previousSortKey2 == null) {
826                                                         // nothing to extend
827                                                         idx2++;
828                                                         continue;
829                                                 }
830                                                 sk2 = previousSortKey2;
831                                         }
832                                         else
833                                                 i2 = FilterExtender (previousChar2, ext2);
834                                 }
835
836                                 byte cat1 = Category (i1);
837                                 byte cat2 = Category (i2);
838
839                                 // Handle special weight characters
840                                 if (!stringSort && currentLevel > 4) {
841                                         if (cat1 == 6) {
842                                                 lv5At1 = escape1.Source != null ?
843                                                         escape1.Index - escape1.Start :
844                                                         cur1 - start1;
845                                                 // here Windows has a bug that it does
846                                                 // not consider thirtiary weight.
847                                                 lv5Value1 = Level1 (i1) << 8 + Uni.Level3 (i1);
848                                                 previousChar = i1;
849                                                 idx1++;
850                                         }
851                                         if (cat2 == 6) {
852                                                 lv5At2 = escape2.Source != null ?
853                                                         escape2.Index - escape2.Start :
854                                                         cur2 - start2;
855                                                 // here Windows has a bug that it does
856                                                 // not consider thirtiary weight.
857                                                 lv5Value2 = Level1 (i2) << 8 + Uni.Level3 (i2);
858                                                 previousChar2 = i2;
859                                                 idx2++;
860                                         }
861                                         if (cat1 == 6 || cat2 == 6) {
862                                                 currentLevel = 4;
863                                                 continue;
864                                         }
865                                 }
866
867                                 Contraction ct1 = null;
868                                 if (ext1 == ExtenderType.None)
869                                         ct1 = GetContraction (s1, idx1, end1);
870
871                                 int offset1 = 1;
872                                 if (sk1 != null)
873                                         offset1 = 1;
874                                 else if (ct1 != null) {
875                                         offset1 = ct1.Source.Length;
876                                         if (ct1.SortKey != null) {
877                                                 sk1 = charSortKey;
878                                                 for (int i = 0; i < ct1.SortKey.Length; i++)
879                                                         sk1 [i] = ct1.SortKey [i];
880                                                 previousChar = -1;
881                                                 previousSortKey = sk1;
882                                         }
883                                         else if (escape1.Source == null) {
884                                                 escape1.Source = s1;
885                                                 escape1.Start = start1;
886                                                 escape1.Index = cur1 + ct1.Source.Length;
887                                                 escape1.End = end1;
888                                                 escape1.Optional = quickCheckPos1;
889                                                 s1 = ct1.Replacement;
890                                                 idx1 = 0;
891                                                 start1 = 0;
892                                                 end1 = s1.Length;
893                                                 quickCheckPos1 = 0;
894                                                 continue;
895                                         }
896                                 }
897                                 else {
898                                         sk1 = charSortKey;
899                                         sk1 [0] = cat1;
900                                         sk1 [1] = Level1 (i1);
901                                         if (!ignoreNonSpace && currentLevel > 1)
902                                                 sk1 [2] = Level2 (i1, ext1);
903                                         if (currentLevel > 2)
904                                                 sk1 [3] = Uni.Level3 (i1);
905                                         if (currentLevel > 3)
906                                                 special1 = Uni.HasSpecialWeight ((char) i1);
907                                         if (cat1 > 1)
908                                                 previousChar = i1;
909                                 }
910
911                                 Contraction ct2 = null;
912                                 if (ext2 == ExtenderType.None)
913                                         ct2 = GetContraction (s2, idx2, end2);
914
915                                 if (sk2 != null)
916                                         idx2++;
917                                 else if (ct2 != null) {
918                                         idx2 += ct2.Source.Length;
919                                         if (ct2.SortKey != null) {
920                                                 sk2 = charSortKey2;
921                                                 for (int i = 0; i < ct2.SortKey.Length; i++)
922                                                         sk2 [i] = ct2.SortKey [i];
923                                                 previousChar2 = -1;
924                                                 previousSortKey2 = sk2;
925                                         }
926                                         else if (escape2.Source == null) {
927                                                 escape2.Source = s2;
928                                                 escape2.Start = start2;
929                                                 escape2.Index = cur2 + ct2.Source.Length;
930                                                 escape2.End = end2;
931                                                 escape2.Optional = quickCheckPos2;
932                                                 s2 = ct2.Replacement;
933                                                 idx2 = 0;
934                                                 start2 = 0;
935                                                 end2 = s2.Length;
936                                                 quickCheckPos2 = 0;
937                                                 continue;
938                                         }
939                                 }
940                                 else {
941                                         sk2 = charSortKey2;
942                                         sk2 [0] = cat2;
943                                         sk2 [1] = Level1 (i2);
944                                         if (!ignoreNonSpace && currentLevel > 1)
945                                                 sk2 [2] = Level2 (i2, ext2);
946                                         if (currentLevel > 2)
947                                                 sk2 [3] = Uni.Level3 (i2);
948                                         if (currentLevel > 3)
949                                                 special2 = Uni.HasSpecialWeight ((char) i2);
950                                         if (cat2 > 1)
951                                                 previousChar2 = i2;
952                                         idx2++;
953                                 }
954
955                                 // Add offset here so that it does not skip
956                                 // idx1 while s2 has a replacement.
957                                 idx1 += offset1;
958
959                                 // add diacritical marks in s1 here
960                                 if (!ignoreNonSpace) {
961                                         while (idx1 < end1) {
962                                                 if (Category (s1 [idx1]) != 1)
963                                                         break;
964                                                 if (sk1 [2] == 0)
965                                                         sk1 [2] = 2;
966                                                 sk1 [2] = (byte) (sk1 [2] + 
967                                                         Level2 (s1 [idx1], ExtenderType.None));
968                                                 idx1++;
969                                         }
970
971                                         // add diacritical marks in s2 here
972                                         while (idx2 < end2) {
973                                                 if (Category (s2 [idx2]) != 1)
974                                                         break;
975                                                 if (sk2 [2] == 0)
976                                                         sk2 [2] = 2;
977                                                 sk2 [2] = (byte) (sk2 [2] + 
978                                                         Level2 (s2 [idx2], ExtenderType.None));
979                                                 idx2++;
980                                         }
981                                 }
982
983                                 int ret = sk1 [0] - sk2 [0];
984                                 ret = ret != 0 ? ret : sk1 [1] - sk2 [1];
985                                 if (ret != 0)
986                                         return ret;
987                                 if (currentLevel == 1)
988                                         continue;
989                                 if (!ignoreNonSpace) {
990                                         ret = sk1 [2] - sk2 [2];
991                                         if (ret != 0) {
992                                                 finalResult = ret;
993                                                 currentLevel = frenchSort ? 2 : 1;
994                                                 continue;
995                                         }
996                                 }
997                                 if (currentLevel == 2)
998                                         continue;
999                                 ret = sk1 [3] - sk2 [3];
1000                                 if (ret != 0) {
1001                                         finalResult = ret;
1002                                         currentLevel = 2;
1003                                         continue;
1004                                 }
1005                                 if (currentLevel == 3)
1006                                         continue;
1007                                 if (special1 != special2) {
1008                                         finalResult = special1 ? 1 : -1;
1009                                         currentLevel = 3;
1010                                         continue;
1011                                 }
1012                                 if (special1) {
1013                                         ret = CompareFlagPair (
1014                                                 !Uni.IsJapaneseSmallLetter ((char) i1),
1015                                                 !Uni.IsJapaneseSmallLetter ((char) i2));
1016                                         ret = ret != 0 ? ret :
1017                                                 ToDashTypeValue (ext1) -
1018                                                 ToDashTypeValue (ext2);
1019                                         ret = ret != 0 ? ret : CompareFlagPair (
1020                                                 Uni.IsHiragana ((char) i1),
1021                                                 Uni.IsHiragana ((char) i2));
1022                                         ret = ret != 0 ? ret : CompareFlagPair (
1023                                                 !IsHalfKana ((char) i1),
1024                                                 !IsHalfKana ((char) i2));
1025                                         if (ret != 0) {
1026                                                 finalResult = ret;
1027                                                 currentLevel = 3;
1028                                                 continue;
1029                                         }
1030                                 }
1031                         }
1032
1033                         // If there were only level 3 or lower differences,
1034                         // then we still have to find diacritical differences
1035                         // if any.
1036                         if (!ignoreNonSpace &&
1037                                 finalResult != 0 && currentLevel > 2) {
1038                                 while (idx1 < end1 && idx2 < end2) {
1039                                         if (!Uni.IsIgnorableNonSpacing (s1 [idx1]))
1040                                                 break;
1041                                         if (!Uni.IsIgnorableNonSpacing (s2 [idx2]))
1042                                                 break;
1043                                         finalResult = Level2 (FilterOptions ((s1 [idx1])), ext1) - Level2 (FilterOptions (s2 [idx2]), ext2);
1044                                         if (finalResult != 0)
1045                                                 break;
1046                                         idx1++;
1047                                         idx2++;
1048                                         // they should work only for the first character
1049                                         ext1 = ExtenderType.None;
1050                                         ext2 = ExtenderType.None;
1051                                 }
1052                         }
1053                         if (currentLevel == 1 && finalResult != 0) {
1054                                 while (idx1 < end1)
1055                                         if (Uni.IsIgnorableNonSpacing (s1 [idx1]))
1056                                                 idx1++;
1057                                 while (idx2 < end2)
1058                                         if (Uni.IsIgnorableNonSpacing (s2 [idx2]))
1059                                                 idx2++;
1060                         }
1061                         // we still have to handle level 5
1062                         if (finalResult == 0) {
1063                                 finalResult = lv5At1 - lv5At2;
1064                                 if (finalResult == 0)
1065                                         finalResult = lv5Value1 - lv5Value2;
1066                         }
1067                         if (finalResult == 0) {
1068                                 if (idx2 == end2)
1069                                         targetConsumed = true;
1070                                 if (idx1 == end1)
1071                                         sourceConsumed = true;
1072                         }
1073                         return idx1 != end1 ? 1 : idx2 == end2 ? finalResult : -1;
1074                 }
1075
1076                 int CompareFlagPair (bool b1, bool b2)
1077                 {
1078                         return b1 == b2 ? 0 : b1 ? 1 : -1;
1079                 }
1080
1081                 #endregion
1082
1083                 #region IsPrefix() and IsSuffix()
1084
1085                 public bool IsPrefix (string src, string target, CompareOptions opt)
1086                 {
1087                         return IsPrefix (src, target, 0, src.Length, opt);
1088                 }
1089
1090                 public bool IsPrefix (string s, string target, int start, int length, CompareOptions opt)
1091                 {
1092                         SetOptions (opt);
1093                         return IsPrefix (s, target, start, length, 
1094                                 (opt & CompareOptions.StringSort) != 0, true);
1095                 }
1096
1097                 public bool IsPrefix (string s, string target, int start, int length, bool stringSort, bool skipHeadingExtenders)
1098                 {
1099                         bool consumed, dummy;
1100                         int ret = CompareInternal (s, start, length,
1101                                 target, 0, target.Length, stringSort,
1102                                 out consumed, out dummy, skipHeadingExtenders);
1103                         return consumed;
1104                 }
1105
1106                 // IsSuffix()
1107
1108                 public bool IsSuffix (string src, string target, CompareOptions opt)
1109                 {
1110                         return IsSuffix (src, target, src.Length - 1, src.Length, opt);
1111                 }
1112
1113                 public bool IsSuffix (string s, string target, int start, int length, CompareOptions opt)
1114                 {
1115                         // quick check : simple codepoint comparison
1116                         if (s.Length >= target.Length) {
1117                                 int si = start;
1118                                 int se = start - length;
1119                                 for (int i = target.Length - 1; si >= se && i >= 0; i--, si--)
1120                                         if (s [si] != target [i])
1121                                                 break;
1122                                 if (si == start + target.Length)
1123                                         return true;
1124                         }
1125
1126                         SetOptions (opt);
1127                         return IsSuffix (s, target, start, length,
1128                                 (opt & CompareOptions.StringSort) != 0);
1129                 }
1130
1131                 bool IsSuffix (string s, string t, int start, int length,
1132                         bool stringSort)
1133                 {
1134                         int tstart = 0;
1135                         for (;tstart < t.Length; tstart++)
1136                                 if (!IsIgnorable (t [tstart]))
1137                                         break;
1138                         if (tstart == t.Length)
1139                                 return true; // as if target is String.Empty.
1140
1141 #if false
1142 // This is still not working. If it is not likely to get working, then just remove it.
1143                         int si = start;
1144                         int send = start - length;
1145                         int ti = t.Length - 1;
1146                         int tend = -1;
1147
1148                         int sStep = start + 1;
1149                         int tStep = t.Length;
1150                         bool sourceConsumed, targetConsumed;
1151                         while (true) {
1152                                 for (; send < si; si--)
1153                                         if (!IsIgnorable (s [si]))
1154                                                 break;
1155                                 for (; tend < ti; ti--)
1156                                         if (!IsIgnorable (t [ti]))
1157                                                 break;
1158                                 if (tend == ti)
1159                                         break;
1160                                 for (; send < si; si--)
1161                                         if (IsSafe (s [si]))
1162                                                 break;
1163                                 for (; tend < ti; ti--)
1164                                         if (IsSafe (t [ti]))
1165                                                 break;
1166 Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, length, t, ti, tstart, sStep - si, tStep - ti);
1167                                 if (CompareInternal (s, si, sStep - si,
1168                                         t, ti, tStep - ti, stringSort,
1169                                         out targetConsumed, out sourceConsumed,
1170                                         true) != 0)
1171                                         return false;
1172                                 if (send == si)
1173                                         return false;
1174                                 sStep = si;
1175                                 tStep = ti;
1176                                 si--;
1177                                 ti--;
1178                         }
1179                         return true;
1180 #else
1181                         // FIXME: it is not efficient for very long target.
1182                         bool sourceConsumed, targetConsumed;
1183                         int mismatchCount = 0;
1184                         for (int i = 0; i < length; i++) {
1185                                 escape1.Source = escape2.Source = null;
1186                                 previousSortKey = previousSortKey2 = null;
1187                                 previousChar = previousChar2 = -1;
1188
1189                                 int ret = CompareInternal (s, start - i, i + 1,
1190                                         t, tstart, t.Length - tstart,
1191                                         stringSort, out targetConsumed,
1192                                         out sourceConsumed, true);
1193                                 if (ret == 0)
1194                                         return true;
1195                                 if (!sourceConsumed && targetConsumed)
1196                                         return false;
1197                                 if (!targetConsumed) {
1198                                         mismatchCount++;
1199                                         if (mismatchCount > Uni.MaxExpansionLength)
1200                                                 // The largest length of an
1201                                                 // expansion is 3, so if the
1202                                                 // target was not consumed more
1203                                                 // than 3 times, then the tail
1204                                                 // character does not match.
1205                                                 return false;
1206                                 }
1207                         }
1208                         return false;
1209 #endif
1210                 }
1211
1212                 #endregion
1213
1214                 #region IndexOf() / LastIndexOf()
1215
1216                 public int IndexOf (string s, string target, CompareOptions opt)
1217                 {
1218                         return IndexOf (s, target, 0, s.Length, opt);
1219                 }
1220
1221                 public int IndexOf (string s, string target, int start, int length, CompareOptions opt)
1222                 {
1223                         SetOptions (opt);
1224                         return IndexOf (s, target, start, length,
1225                                 (opt & CompareOptions.StringSort) != 0);
1226                 }
1227
1228                 public int IndexOf (string s, char target, CompareOptions opt)
1229                 {
1230                         return IndexOf (s, target, 0, s.Length, opt);
1231                 }
1232
1233                 public int IndexOf (string s, char target, int start, int length, CompareOptions opt)
1234                 {
1235                         SetOptions (opt);
1236
1237                         // If target is contraction, then use string search.
1238                         Contraction ct = GetContraction (target);
1239                         if (ct != null) {
1240                                 if (ct.Replacement != null)
1241                                         return IndexOf (s, ct.Replacement, start, length,
1242                                                 (opt & CompareOptions.StringSort) != 0);
1243                                 else
1244                                         return IndexOfSortKey (s, start, length, ct.SortKey, char.MinValue, -1, true);
1245                         } else {
1246                                 int ti = FilterOptions ((int) target);
1247                                 charSortKeyIndexTarget [0] = Category (ti);
1248                                 charSortKeyIndexTarget [1] = Level1 (ti);
1249                                 if (!ignoreNonSpace)
1250                                         charSortKeyIndexTarget [2] =
1251                                                 Level2 (ti, ExtenderType.None);
1252                                 charSortKeyIndexTarget [3] = Uni.Level3 (ti);
1253                                 return IndexOfSortKey (s, start, length,
1254                                         charSortKeyIndexTarget, target, ti,
1255                                         !Uni.HasSpecialWeight ((char) ti));
1256                         }
1257                 }
1258
1259                 // Searches target byte[] keydata
1260                 int IndexOfSortKey (string s, int start, int length, byte [] sortkey, char target, int ti, bool noLv4)
1261                 {
1262                         int end = start + length;
1263                         int idx = start;
1264                         while (idx < end) {
1265                                 int cur = idx;
1266                                 if (MatchesForward (s, ref idx, end, ti, sortkey, noLv4))
1267                                         return cur;
1268                         }
1269                         return -1;
1270                 }
1271
1272                 // Searches string. Search head character (or keydata when
1273                 // the head is contraction sortkey) and try IsPrefix().
1274                 int IndexOf (string s, string target, int start, int length, bool stringSort)
1275                 {
1276                         int tidx = 0;
1277                         for (; tidx < target.Length; tidx++)
1278                                 if (!IsIgnorable (target [tidx]))
1279                                         break;
1280                         if (tidx == target.Length)
1281                                 return start;
1282                         Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1283                         string replace = ct != null ? ct.Replacement : null;
1284                         byte [] sk = replace == null ? charSortKeyIndexTarget : null;
1285                         bool noLv4 = true;
1286                         char tc = char.MinValue;
1287                         int ti = -1;
1288                         if (ct != null && sk != null) {
1289                                 for (int i = 0; i < ct.SortKey.Length; i++)
1290                                         sk [i] = ct.SortKey [i];
1291                         } else if (sk != null) {
1292                                 tc = target [tidx];
1293                                 ti = FilterOptions (target [tidx]);
1294                                 sk [0] = Category (ti);
1295                                 sk [1] = Level1 (ti);
1296                                 if (!ignoreNonSpace)
1297                                         sk [2] = Level2 (ti, ExtenderType.None);
1298                                 sk [3] = Uni.Level3 (ti);
1299                                 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1300                         }
1301                         if (sk != null) {
1302                                 for (tidx++; tidx < target.Length; tidx++) {
1303                                         if (Category (target [tidx]) != 1)
1304                                                 break;
1305                                         if (sk [2] == 0)
1306                                                 sk [2] = 2;
1307                                         sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1308                                 }
1309                         }
1310
1311                         do {
1312                                 int idx = 0;
1313                                 if (replace != null)
1314                                         idx = IndexOf (s, replace, start, length, stringSort);
1315                                 else
1316                                         idx = IndexOfSortKey (s, start, length, sk, tc, ti, noLv4);
1317                                 if (idx < 0)
1318                                         return -1;
1319                                 length -= idx - start;
1320                                 start = idx;
1321                                 if (IsPrefix (s, target, start, length, stringSort, false))
1322                                         return idx;
1323                                 Contraction cts = GetContraction (s, start, length);
1324                                 if (cts != null) {
1325                                         start += cts.Source.Length;
1326                                         length -= cts.Source.Length;
1327                                 } else {
1328                                         start++;
1329                                         length--;
1330                                 }
1331                         } while (length > 0);
1332                         return -1;
1333                 }
1334
1335                 //
1336                 // There are the same number of IndexOf() related methods,
1337                 // with the same functionalities for each.
1338                 //
1339
1340                 public int LastIndexOf (string s, string target, CompareOptions opt)
1341                 {
1342                         return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1343                 }
1344
1345                 public int LastIndexOf (string s, string target, int start, int length, CompareOptions opt)
1346                 {
1347                         SetOptions (opt);
1348                         return LastIndexOf (s, target, start, length,
1349                                 (opt & CompareOptions.StringSort) != 0);
1350                 }
1351
1352                 public int LastIndexOf (string s, char target, CompareOptions opt)
1353                 {
1354                         return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1355                 }
1356
1357                 public int LastIndexOf (string s, char target, int start, int length, CompareOptions opt)
1358                 {
1359                         SetOptions (opt);
1360
1361                         // If target is contraction, then use string search.
1362                         Contraction ct = GetContraction (target);
1363                         if (ct != null) {
1364                                 if (ct.Replacement != null)
1365                                         return LastIndexOf (s, ct.Replacement, start, length, (opt & CompareOptions.StringSort) != 0);
1366                                 else
1367                                         return LastIndexOfSortKey (s, start, start, length, ct.SortKey, char.MinValue, -1, true);
1368                         }
1369                         else {
1370                                 int ti = FilterOptions ((int) target);
1371                                 charSortKeyIndexTarget [0] = Category (ti);
1372                                 charSortKeyIndexTarget [1] = Level1 (ti);
1373                                 if (!ignoreNonSpace)
1374                                         charSortKeyIndexTarget [2] = Level2 (ti, ExtenderType.None);
1375                                 charSortKeyIndexTarget [3] = Uni.Level3 (ti);
1376                                 return LastIndexOfSortKey (s, start, start, length, charSortKeyIndexTarget, target, ti, !Uni.HasSpecialWeight ((char) ti));
1377                         }
1378                 }
1379
1380                 // Searches target byte[] keydata
1381                 int LastIndexOfSortKey (string s, int start, int orgStart, int length, byte [] sortkey, char target, int ti, bool noLv4)
1382                 {
1383                         int end = start - length;
1384                         int idx = start;
1385                         while (idx > end) {
1386                                 int cur = idx;
1387                                 if (MatchesBackward (s, ref idx, end, orgStart, ti, sortkey, noLv4))
1388                                         return cur;
1389                         }
1390                         return -1;
1391                 }
1392
1393                 // Searches string. Search head character (or keydata when
1394                 // the head is contraction sortkey) and try IsPrefix().
1395                 int LastIndexOf (string s, string target, int start, int length, bool stringSort)
1396                 {
1397                         int orgStart = start;
1398                         int tidx = 0;
1399                         for (; tidx < target.Length; tidx++)
1400                                 if (!IsIgnorable (target [tidx]))
1401                                         break;
1402                         if (tidx == target.Length)
1403                                 return start;
1404                         Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1405                         string replace = ct != null ? ct.Replacement : null;
1406                         byte [] sk = replace == null ? charSortKeyIndexTarget : null;
1407
1408                         bool noLv4 = true;
1409                         char tc = char.MinValue;
1410                         int ti = -1;
1411                         if (ct != null && sk != null) {
1412                                 for (int i = 0; i < ct.SortKey.Length; i++)
1413                                         sk [i] = ct.SortKey [i];
1414                         } else if (sk != null) {
1415                                 tc = target [tidx];
1416                                 ti = FilterOptions (target [tidx]);
1417                                 sk [0] = Category (ti);
1418                                 sk [1] = Level1 (ti);
1419                                 if (!ignoreNonSpace)
1420                                         sk [2] = Level2 (ti, ExtenderType.None);
1421                                 sk [3] = Uni.Level3 (ti);
1422                                 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1423                         }
1424                         if (sk != null) {
1425                                 for (tidx++; tidx < target.Length; tidx++) {
1426                                         if (Category (target [tidx]) != 1)
1427                                                 break;
1428                                         if (sk [2] == 0)
1429                                                 sk [2] = 2;
1430                                         sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1431                                 }
1432                         }
1433
1434                         do {
1435                                 int idx = 0;
1436
1437                                 if (replace != null)
1438                                         idx = LastIndexOf (s, replace, start, length, stringSort);
1439                                 else
1440                                         idx = LastIndexOfSortKey (s, start, orgStart, length, sk, tc, ti, noLv4);
1441                                 if (idx < 0)
1442                                         return -1;
1443                                 length -= start - idx;
1444                                 start = idx;
1445
1446                                 if (IsPrefix (s, target, idx, orgStart - idx + 1, stringSort, false)) {
1447                                         for (;idx < orgStart; idx++)
1448                                                 if (!IsIgnorable (s [idx]))
1449                                                         break;
1450                                         return idx;
1451                                 }
1452                                 Contraction cts = GetContraction (s, idx, orgStart - idx + 1);
1453                                 if (cts != null) {
1454                                         start -= cts.Source.Length;
1455                                         length -= cts.Source.Length;
1456                                 } else {
1457                                         start--;
1458                                         length--;
1459                                 }
1460                         } while (length > 0);
1461                         return -1;
1462                 }
1463
1464                 private bool MatchesForward (string s, ref int idx, int end, int ti, byte [] sortkey, bool noLv4)
1465                 {
1466                         int si = -1;
1467                         ExtenderType ext = GetExtenderType (s [idx]);
1468                         Contraction ct = null;
1469                         if (ext == ExtenderType.None)
1470                                 ct = GetContraction (s, idx, end);
1471                         else if (previousChar < 0) {
1472                                 if (previousSortKey == null) {
1473                                         idx++;
1474                                         return false;
1475                                 }
1476                                 charSortKey = previousSortKey;
1477                         }
1478                         else
1479                                 si = FilterExtender (previousChar, ext);
1480                         // if lv4 exists, it never matches contraction
1481                         if (ct != null) {
1482                                 idx += ct.Source.Length;
1483                                 if (!noLv4)
1484                                         return false;
1485                                 if (ct.SortKey != null) {
1486                                         for (int i = 0; i < sortkey.Length; i++)
1487                                                 charSortKey [i] = sortkey [i];
1488                                         previousChar = -1;
1489                                         previousSortKey = charSortKey;
1490                                 } else {
1491                                         // Here is the core of LAMESPEC
1492                                         // described at the top of the source.
1493                                         int dummy = 0;
1494                                         return MatchesForward (ct.Replacement, ref dummy,
1495                                                 ct.Replacement.Length, ti, sortkey, noLv4);
1496                                 }
1497                         } else {
1498                                 if (si < 0)
1499                                         si = FilterOptions (s [idx]);
1500                                 idx++;
1501                                 charSortKey [0] = Category (si);
1502                                 bool noMatch = false;
1503                                 if (sortkey [0] == charSortKey [0])
1504                                         charSortKey [1] = Level1 (si);
1505                                 else
1506                                         noMatch = true;
1507                                 if (!ignoreNonSpace && sortkey [1] == charSortKey [1])
1508                                         charSortKey [2] = Level2 (si, ext);
1509                                 else if (!ignoreNonSpace)
1510                                         noMatch = true;
1511                                 if (noMatch) {
1512                                         for (; idx < end; idx++) {
1513                                                 if (Category (s [idx]) != 1)
1514                                                         break;
1515                                         }
1516                                         return false;
1517                                 }
1518                                 charSortKey [3] = Uni.Level3 (si);
1519                                 if (charSortKey [0] != 1)
1520                                         previousChar = si;
1521                         }
1522                         for (; idx < end; idx++) {
1523                                 if (Category (s [idx]) != 1)
1524                                         break;
1525                                 if (ignoreNonSpace)
1526                                         continue;
1527                                 if (charSortKey [2] == 0)
1528                                                 charSortKey [2] = 2;
1529                                         charSortKey [2] = (byte) (charSortKey [2]
1530                                                 + Level2 (s [idx], ExtenderType.None));
1531                         }
1532
1533                         return MatchesPrimitive (charSortKey, si, ext, sortkey, ti, noLv4);
1534                 }
1535
1536                 private bool MatchesPrimitive (byte [] charSortKey, int si, ExtenderType ext, byte [] sortkey, int ti, bool noLv4)
1537                 {
1538                         if (charSortKey [0] != sortkey [0] ||
1539                                 charSortKey [1] != sortkey [1] ||
1540                                 (!ignoreNonSpace && charSortKey [2] != sortkey [2]) ||
1541                                 charSortKey [3] != sortkey [3])
1542                                 return false;
1543                         if (noLv4 && (si < 0 || !Uni.HasSpecialWeight ((char) si)))
1544                                 return true;
1545                         else if (noLv4)
1546                                 return false;
1547                         // Since target can never be an extender, if the source
1548                         // is an expander and it matters, then they never match.
1549                         if (!ignoreNonSpace && ext == ExtenderType.Conditional)
1550                                 return false;
1551                         if (Uni.IsJapaneseSmallLetter ((char) si) !=
1552                                 Uni.IsJapaneseSmallLetter ((char) ti) ||
1553                                 ToDashTypeValue (ext) !=
1554                                 // FIXME: we will have to specify correct value for target
1555                                 ToDashTypeValue (ExtenderType.None) ||
1556                                 !Uni.IsHiragana ((char) si) !=
1557                                 !Uni.IsHiragana ((char) ti) ||
1558                                 IsHalfKana ((char) si) !=
1559                                 IsHalfKana ((char) ti))
1560                                 return false;
1561                         return true;
1562                 }
1563
1564                 private bool MatchesBackward (string s, ref int idx, int end, int orgStart, int ti, byte [] sortkey, bool noLv4)
1565                 {
1566                         int cur = idx;
1567                         int si = -1;
1568                         ExtenderType ext = GetExtenderType (s [idx]);
1569                         // To handle extenders in source, we need to
1570                         // check next _primary_ character.
1571                         if (ext != ExtenderType.None) {
1572                                 byte diacritical = 0;
1573                                 for (int tmp = 0; ; tmp--) {
1574                                         if (tmp < 0) // heading extender
1575                                                 return false;
1576                                         if (IsIgnorable (s [tmp]))
1577                                                 continue;
1578                                         int tmpi = FilterOptions (s [tmp]);
1579                                         byte category = Category (tmpi);
1580                                         if (category == 1) {
1581                                                 diacritical = Level2 (tmpi, ExtenderType.None);
1582                                                 continue;
1583                                         }
1584                                         si = FilterExtender (tmpi, ext);
1585                                         charSortKey [0] = category;
1586                                         charSortKey [1] = Level1 (si);
1587                                         if (!ignoreNonSpace)
1588                                                 charSortKey [2] = Level2 (si, ext);
1589                                         charSortKey [3] = Uni.Level3 (si);
1590                                         if (ext != ExtenderType.Conditional &&
1591                                                 diacritical != 0)
1592                                                 charSortKey [2] =
1593                                                         (charSortKey [2] == 0) ?
1594                                                         (byte) (diacritical + 2) :
1595                                                         diacritical;
1596                                         break;
1597                                 }
1598                                 idx--;
1599                         }
1600                         Contraction ct = null;
1601                         if (ext == ExtenderType.None)
1602                                 ct = GetContraction (s, idx, end);
1603                         // if lv4 exists, it never matches contraction
1604                         if (ct != null) {
1605                                 idx -= ct.Source.Length;
1606                                 if (!noLv4)
1607                                         return false;
1608                                 if (ct.SortKey != null) {
1609                                         for (int i = 0; i < sortkey.Length; i++)
1610                                                 charSortKey [i] = sortkey [i];
1611                                         previousChar = -1;
1612                                         previousSortKey = charSortKey;
1613                                 } else {
1614                                         // Here is the core of LAMESPEC
1615                                         // described at the top of the source.
1616                                         int dummy = ct.Replacement.Length - 1;
1617                                         return MatchesBackward (ct.Replacement, 
1618                                                 ref dummy, dummy, -1, ti, sortkey, noLv4);
1619                                 }
1620                         } else if (ext == ExtenderType.None) {
1621                                 if (si < 0)
1622                                         si = FilterOptions (s [idx]);
1623                                 idx--;
1624                                 bool noMatch = false;
1625                                 charSortKey [0] = Category (si);
1626                                 if (charSortKey [0] == sortkey [0])
1627                                         charSortKey [1] = Level1 (si);
1628                                 else
1629                                         noMatch = true;
1630                                 if (!ignoreNonSpace && charSortKey [1] == sortkey [1])
1631                                         charSortKey [2] = Level2 (si, ext);
1632                                 else if (!ignoreNonSpace)
1633                                         noMatch = true;
1634                                 if (noMatch)
1635                                         return false;
1636                                 charSortKey [3] = Uni.Level3 (si);
1637                                 if (charSortKey [0] != 1)
1638                                         previousChar = si;
1639                         }
1640                         if (ext == ExtenderType.None) {
1641                                 for (int tmp = cur + 1; tmp < orgStart; tmp++) {
1642                                         if (Category (s [tmp]) != 1)
1643                                                 break;
1644                                         if (ignoreNonSpace)
1645                                                 continue;
1646                                         if (charSortKey [2] == 0)
1647                                                         charSortKey [2] = 2;
1648                                         charSortKey [2] = (byte) (charSortKey [2]
1649                                                 + Level2 (s [tmp], ExtenderType.None));
1650                                 }
1651                         }
1652                         return MatchesPrimitive (charSortKey, si, ext, sortkey, ti, noLv4);
1653                 }
1654                 #endregion
1655         }
1656 }