* Makefile: Build the make-map.exe in Mono.Unix.Native; add /nowarn:0618 to
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / SimpleCollator.cs
1 //
2 // SimpleCollator.cs : the core collator implementation
3 //
4 // Author:
5 //      Atsushi Enomoto  <atsushi@ximian.com>
6 //
7 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
8 //
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
16 // 
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
19 // 
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 //
28
29 //
30 // Here's a summary for supporting contractions, expansions and diacritical 
31 // remappings.
32 //
33 // Diacritical mapping is a simple tailoring rule that "remaps" diacritical
34 // weight value from one to another. For now it applies to all range of
35 // characters, but at some stage we might need to limit the remapping ranges.
36 //
37 // A Contraction consists of a string (actually char[]) and a sortkey entry
38 // (i.e. byte[]). It indicates that a sequence of characters is interpreted
39 // as a single character that has the mapped sortkey value. There is no
40 // character which goes across "special rules". When the collator encountered
41 // such a sequence of characters, it just returns the sortkey value without
42 // further replacement.
43 //
44 // Since it is still likely to happen that a contraction sequence matches
45 // other character than the identical sequence (depending on CompareOptions
46 // and of course, defined sortkey value itself), comparison cannot be done
47 // at source char[] level.
48 //
49 // In IndexOf(), the first character in the target string or the target char
50 // itself is turned into sortkey bytes. If the character has a contraction and
51 // that is sortkey map, then it is used instead. If the contraction exists and
52 // that is replacement map, then the first character of the replacement string
53 // is searched instead. IndexOf() always searches only for the top character,
54 // and if it returned negative value, then it returns -1. Otherwise, it then
55 // tries IsPrefix() from that location. If it returns true, then it returns
56 // the index.
57 //
58 // LAMESPEC: IndexOf() is lame as a whole API. It never matches in the middle
59 // of expansion and there is no proper way to return such indexes within
60 // a single int return value.
61 //
62 // For example, try below in .NET:
63 //      IndexOf("\u00E6", "a")
64 //      IndexOf("\u00E6", "e")
65 //
66
67
68 using System;
69 using System.Collections;
70 using System.Globalization;
71
72 using Uni = Mono.Globalization.Unicode.MSCompatUnicodeTable;
73 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
74 using COpt = System.Globalization.CompareOptions;
75
76 namespace Mono.Globalization.Unicode
77 {
78         internal class SimpleCollator
79         {
80                 struct PreviousInfo
81                 {
82                         public int Code;
83                         public byte [] SortKey;
84
85                         public PreviousInfo (bool dummy)
86                         {
87                                 Code = -1;
88                                 SortKey = null;
89                         }
90                 }
91
92                 struct Escape
93                 {
94                         public string Source;
95                         public int Index;
96                         public int Start;
97                         public int End;
98                         public int Optional;
99                 }
100
101                 static SimpleCollator invariant =
102                         new SimpleCollator (CultureInfo.InvariantCulture);
103
104                 readonly TextInfo textInfo; // for ToLower().
105                 readonly bool frenchSort;
106                 unsafe readonly byte* cjkCatTable;
107                 unsafe readonly byte* cjkLv1Table;
108                 readonly CodePointIndexer cjkIndexer;
109                 unsafe readonly byte* cjkLv2Table;
110                 readonly CodePointIndexer cjkLv2Indexer;
111                 readonly int lcid;
112                 readonly Contraction [] contractions;
113                 readonly Level2Map [] level2Maps;
114
115                 // This flag marks characters as "unsafe", where the character
116                 // could be used as part of a contraction (whose length > 1).
117                 readonly byte [] unsafeFlags;
118
119                 const int UnsafeFlagLength = 0x300 / 8;
120
121 //              readonly byte [] contractionFlags = new byte [16];
122
123                 // This array is internally used inside IndexOf() to store
124                 // "no need to check" ASCII characters.
125                 //
126                 // Now that it should be thread safe, this array is allocated
127                 // at every time.
128 //              byte [] checkedFlags = new byte [128 / 8];
129
130                 #region .ctor() and split functions
131
132                 public SimpleCollator (CultureInfo culture)
133                 {
134                         lcid = culture.LCID;
135                         textInfo = culture.TextInfo;
136
137                         unsafe {
138                                 SetCJKTable (culture, ref cjkIndexer,
139                                         ref cjkCatTable, ref cjkLv1Table,
140                                         ref cjkLv2Indexer, ref cjkLv2Table);
141                         }
142
143                         // Get tailoring info
144                         TailoringInfo t = null;
145                         for (CultureInfo ci = culture; ci.LCID != 127; ci = ci.Parent) {
146                                 t = Uni.GetTailoringInfo (ci.LCID);
147                                 if (t != null)
148                                         break;
149                         }
150                         if (t == null) // then use invariant
151                                 t = Uni.GetTailoringInfo (127);
152
153                         frenchSort = t.FrenchSort;
154                         Uni.BuildTailoringTables (culture, t, ref contractions,
155                                 ref level2Maps);
156                         unsafeFlags = new byte [UnsafeFlagLength];
157                         foreach (Contraction c in contractions) {
158                                 if (c.Source.Length > 1)
159                                         foreach (char ch in c.Source)
160                                                 unsafeFlags [(int) ch / 8 ]
161                                                         |= (byte) (1 << ((int) ch & 7));
162 //                              for (int i = 0; i < c.Source.Length; i++) {
163 //                                      int ch = c.Source [i];
164 //                                      if (ch > 127)
165 //                                              continue;
166 //                                      contractionFlags [ch / 8] |= (byte) (1 << (ch & 7));
167 //                              }
168                         }
169                         if (lcid != 127)
170                                 foreach (Contraction c in invariant.contractions) {
171                                         if (c.Source.Length > 1)
172                                                 foreach (char ch in c.Source)
173                                                         unsafeFlags [(int) ch / 8 ] 
174                                                                 |= (byte) (1 << ((int) ch & 7));
175 //                                      for (int i = 0; i < c.Source.Length; i++) {
176 //                                              int ch = c.Source [i];
177 //                                              if (ch > 127)
178 //                                                      continue;
179 //                                              contractionFlags [ch / 8] |= (byte) (1 << (ch & 7));
180 //                                      }
181                                 }
182
183                         // FIXME: Since tailorings are mostly for latin
184                         // (and in some cases Cyrillic) characters, it would
185                         // be much better for performance to store "start 
186                         // indexes" for > 370 (culture-specific letters).
187
188 /*
189 // dump tailoring table
190 Console.WriteLine ("******** building table for {0} : c - {1} d - {2}",
191 culture.LCID, contractions.Length, level2Maps.Length);
192 foreach (Contraction c in contractions) {
193 foreach (char cc in c.Source)
194 Console.Write ("{0:X4} ", (int) cc);
195 Console.WriteLine (" -> '{0}'", c.Replacement);
196 }
197 */
198                 }
199
200                 unsafe private void SetCJKTable (
201                         CultureInfo culture, ref CodePointIndexer cjkIndexer,
202                         ref byte* catTable, ref byte* lv1Table,
203                         ref CodePointIndexer lv2Indexer, ref byte* lv2Table)
204                 {
205                         string name = GetNeutralCulture (culture).Name;
206
207                         Uni.FillCJK (name, ref cjkIndexer, ref catTable,
208                                 ref lv1Table, ref lv2Indexer, ref lv2Table);
209                 }
210
211                 static CultureInfo GetNeutralCulture (CultureInfo info)
212                 {
213                         CultureInfo ret = info;
214                         while (ret.Parent != null && ret.Parent.LCID != 127)
215                                 ret = ret.Parent;
216                         return ret;
217                 }
218
219                 #endregion
220
221                 unsafe byte Category (int cp)
222                 {
223                         if (cp < 0x3000 || cjkCatTable == null)
224                                 return Uni.Category (cp);
225                         int idx = cjkIndexer.ToIndex (cp);
226                         return idx < 0 ? Uni.Category (cp) : cjkCatTable [idx];
227                 }
228
229                 unsafe byte Level1 (int cp)
230                 {
231                         if (cp < 0x3000 || cjkLv1Table == null)
232                                 return Uni.Level1 (cp);
233                         int idx = cjkIndexer.ToIndex (cp);
234                         return idx < 0 ? Uni.Level1 (cp) : cjkLv1Table [idx];
235                 }
236
237                 unsafe byte Level2 (int cp, ExtenderType ext)
238                 {
239                         if (ext == ExtenderType.Buggy)
240                                 return 5;
241                         else if (ext == ExtenderType.Conditional)
242                                 return 0;
243
244                         if (cp < 0x3000 || cjkLv2Table == null)
245                                 return Uni.Level2 (cp);
246                         int idx = cjkLv2Indexer.ToIndex (cp);
247                         byte ret = idx < 0 ? (byte) 0 : cjkLv2Table [idx];
248                         if (ret != 0)
249                                 return ret;
250                         ret = Uni.Level2 (cp);
251                         if (level2Maps.Length == 0)
252                                 return ret;
253                         for (int i = 0; i < level2Maps.Length; i++) {
254                                 if (level2Maps [i].Source == ret)
255                                         return level2Maps [i].Replace;
256                                 else if (level2Maps [i].Source > ret)
257                                         break;
258                         }
259                         return ret;
260                 }
261
262                 static bool IsHalfKana (int cp, COpt opt)
263                 {
264                         return (opt & COpt.IgnoreWidth) != 0 ||
265                                 Uni.IsHalfWidthKana ((char) cp);
266                 }
267
268                 Contraction GetContraction (string s, int start, int end)
269                 {
270 //                      int si = s [start];
271 //                      if (si < 128 && (contractionFlags [si / 8] & (1 << (si & 7))) == 0)
272 //                              return null;
273                         Contraction c = GetContraction (s, start, end, contractions);
274                         if (c != null || lcid == 127)
275                                 return c;
276                         return GetContraction (s, start, end, invariant.contractions);
277                 }
278
279                 Contraction GetContraction (string s, int start, int end, Contraction [] clist)
280                 {
281                         for (int i = 0; i < clist.Length; i++) {
282                                 Contraction ct = clist [i];
283                                 int diff = ct.Source [0] - s [start];
284                                 if (diff > 0)
285                                         return null; // it's already sorted
286                                 else if (diff < 0)
287                                         continue;
288                                 char [] chars = ct.Source;
289                                 if (end - start < chars.Length)
290                                         continue;
291                                 bool match = true;
292                                 for (int n = 0; n < chars.Length; n++)
293                                         if (s [start + n] != chars [n]) {
294                                                 match = false;
295                                                 break;
296                                         }
297                                 if (match)
298                                         return ct;
299                         }
300                         return null;
301                 }
302
303                 Contraction GetTailContraction (string s, int start, int end)
304                 {
305 //                      int si = s [end - 1];
306 //                      if (si < 128 && (contractionFlags [si / 8] & (1 << (si & 7))) == 0)
307 //                              return null;
308                         Contraction c = GetTailContraction (s, start, end, contractions);
309                         if (c != null || lcid == 127)
310                                 return c;
311                         return GetTailContraction (s, start, end, invariant.contractions);
312                 }
313
314                 Contraction GetTailContraction (string s, int start, int end, Contraction [] clist)
315                 {
316                         for (int i = 0; i < clist.Length; i++) {
317                                 Contraction ct = clist [i];
318                                 int diff = ct.Source [0] - s [end];
319                                 if (diff > 0)
320                                         return null; // it's already sorted
321                                 else if (diff < 0)
322                                         continue;
323                                 char [] chars = ct.Source;
324                                 if (start - end + 1 < chars.Length)
325                                         continue;
326                                 bool match = true;
327                                 int offset = start - chars.Length + 1;
328                                 for (int n = 0; n < chars.Length; n++)
329                                         if (s [offset + n] != chars [n]) {
330                                                 match = false;
331                                                 break;
332                                         }
333                                 if (match)
334                                         return ct;
335                         }
336                         return null;
337                 }
338
339                 Contraction GetContraction (char c)
340                 {
341                         Contraction ct = GetContraction (c, contractions);
342                         if (ct != null || lcid == 127)
343                                 return ct;
344                         return GetContraction (c, invariant.contractions);
345                 }
346
347                 Contraction GetContraction (char c, Contraction [] clist)
348                 {
349                         for (int i = 0; i < clist.Length; i++) {
350                                 Contraction ct = clist [i];
351                                 if (ct.Source [0] > c)
352                                         return null; // it's already sorted
353                                 if (ct.Source [0] == c && ct.Source.Length == 1)
354                                         return ct;
355                         }
356                         return null;
357                 }
358
359                 int FilterOptions (int i, COpt opt)
360                 {
361                         if ((opt & COpt.IgnoreWidth) != 0) {
362                                 int x = Uni.ToWidthCompat (i);
363                                 if (x != 0)
364                                         i = x;
365                         }
366                         if ((opt & COpt.IgnoreCase) != 0)
367                                 i = textInfo.ToLower ((char) i);
368                         if ((opt & COpt.IgnoreKanaType) != 0)
369                                 i = Uni.ToKanaTypeInsensitive (i);
370                         return i;
371                 }
372
373                 enum ExtenderType {
374                         None,
375                         Simple,
376                         Voiced,
377                         Conditional,
378                         Buggy,
379                 }
380
381                 ExtenderType GetExtenderType (int i)
382                 {
383                         // LAMESPEC: Windows expects true for U+3005, but 
384                         // sometimes it does not represent to repeat just
385                         // one character.
386                         // Windows also expects true for U+3031 and U+3032,
387                         // but they should *never* repeat one character.
388
389                         // U+2015 becomes an extender only when it is Japanese
390                         if (i == 0x2015)
391                                 return lcid == 16 ? ExtenderType.Conditional :
392                                         ExtenderType.None;
393
394                         if (i < 0x3005 || i > 0xFF70)
395                                 return ExtenderType.None;
396                         if (i >= 0xFE7C) {
397                                 switch (i) {
398                                 case 0xFE7C:
399                                 case 0xFE7D:
400                                         return ExtenderType.Simple;
401                                 case 0xFF70:
402                                         return ExtenderType.Conditional;
403                                 case 0xFF9E:
404                                 case 0xFF9F:
405                                         return ExtenderType.Voiced;
406                                 }
407                         }
408                         if (i > 0x30FE)
409                                 return ExtenderType.None;
410                         switch (i) {
411                         case 0x3005: // LAMESPEC: see above.
412                                 return ExtenderType.Buggy;
413                         case 0x3031: // LAMESPEC: see above.
414                         case 0x3032: // LAMESPEC: see above.
415                         case 0x309D:
416                         case 0x30FD:
417                                 return ExtenderType.Simple;
418                         case 0x309E:
419                         case 0x30FE:
420                                 return ExtenderType.Voiced;
421                         case 0x30FC:
422                                 return ExtenderType.Conditional;
423                         default:
424                                 return ExtenderType.None;
425                         }
426                 }
427
428                 static byte ToDashTypeValue (ExtenderType ext, COpt opt)
429                 {
430                         if ((opt & COpt.IgnoreNonSpace) != 0) // LAMESPEC: huh, why?
431                                 return 3;
432                         switch (ext) {
433                         case ExtenderType.None:
434                                 return 3;
435                         case ExtenderType.Conditional:
436                                 return 5;
437                         default:
438                                 return 4;
439                         }
440                 }
441
442                 int FilterExtender (int i, ExtenderType ext, COpt opt)
443                 {
444                         if (ext == ExtenderType.Conditional &&
445                                 Uni.HasSpecialWeight ((char) i)) {
446                                 bool half = IsHalfKana ((char) i, opt);
447                                 bool katakana = !Uni.IsHiragana ((char) i);
448                                 switch (Level1 (i) & 7) {
449                                 case 2:
450                                         return half ? 0xFF71 : katakana ? 0x30A2 : 0x3042;
451                                 case 3:
452                                         return half ? 0xFF72 : katakana ? 0x30A4 : 0x3044;
453                                 case 4:
454                                         return half ? 0xFF73 : katakana ? 0x30A6 : 0x3046;
455                                 case 5:
456                                         return half ? 0xFF74 : katakana ? 0x30A8 : 0x3048;
457                                 case 6:
458                                         return half ? 0xFF75 : katakana ? 0x30AA : 0x304A;
459                                 }
460                         }
461                         return i;
462                 }
463
464                 static bool IsIgnorable (int i, COpt opt)
465                 {
466                         return Uni.IsIgnorable (i, (byte) (1 +
467                                 ((opt & COpt.IgnoreSymbols) != 0 ? 2 : 0) +
468                                 ((opt & COpt.IgnoreNonSpace) != 0 ? 4 : 0)));
469                         
470                 }
471
472                 bool IsSafe (int i)
473                 {
474                         return i / 8 >= unsafeFlags.Length ? true : (unsafeFlags [i / 8] & (1 << (i % 8))) == 0;
475                 }
476
477                 #region GetSortKey()
478
479                 public SortKey GetSortKey (string s)
480                 {
481                         return GetSortKey (s, CompareOptions.None);
482                 }
483
484                 public SortKey GetSortKey (string s, CompareOptions options)
485                 {
486                         return GetSortKey (s, 0, s.Length, options);
487                 }
488
489                 public SortKey GetSortKey (string s, int start, int length, CompareOptions options)
490                 {
491                         SortKeyBuffer buf = new SortKeyBuffer (lcid);
492                         buf.Initialize (options, lcid, s, frenchSort);
493                         int end = start + length;
494                         GetSortKey (s, start, end, buf, options);
495                         return buf.GetResultAndReset ();
496                 }
497
498                 void GetSortKey (string s, int start, int end,
499                         SortKeyBuffer buf, CompareOptions opt)
500                 {
501                         PreviousInfo prev = new PreviousInfo (false);
502
503                         for (int n = start; n < end; n++) {
504                                 int i = s [n];
505
506                                 ExtenderType ext = GetExtenderType (i);
507                                 if (ext != ExtenderType.None) {
508                                         i = FilterExtender (prev.Code, ext, opt);
509                                         if (i >= 0)
510                                                 FillSortKeyRaw (i, ext, buf, opt);
511                                         else if (prev.SortKey != null) {
512                                                 byte [] b = prev.SortKey;
513                                                 buf.AppendNormal (
514                                                         b [0],
515                                                         b [1],
516                                                         b [2] != 1 ? b [2] : Level2 (i, ext),
517                                                         b [3] != 1 ? b [3] : Uni.Level3 (i));
518                                         }
519                                         // otherwise do nothing.
520                                         // (if the extender is the first char
521                                         // in the string, then just ignore.)
522                                         continue;
523                                 }
524
525                                 if (IsIgnorable (i, opt))
526                                         continue;
527                                 i = FilterOptions (i, opt);
528
529                                 Contraction ct = GetContraction (s, n, end);
530                                 if (ct != null) {
531                                         if (ct.Replacement != null) {
532                                                 GetSortKey (ct.Replacement, 0, ct.Replacement.Length, buf, opt);
533                                         } else {
534                                                 byte [] b = ct.SortKey;
535                                                 buf.AppendNormal (
536                                                         b [0],
537                                                         b [1],
538                                                         b [2] != 1 ? b [2] : Level2 (i, ext),
539                                                         b [3] != 1 ? b [3] : Uni.Level3 (i));
540                                                 prev.SortKey = b;
541                                                 prev.Code = -1;
542                                         }
543                                         n += ct.Source.Length - 1;
544                                 }
545                                 else {
546                                         if (!Uni.IsIgnorableNonSpacing (i))
547                                                 prev.Code = i;
548                                         FillSortKeyRaw (i, ExtenderType.None, buf, opt);
549                                 }
550                         }
551                 }
552
553                 void FillSortKeyRaw (int i, ExtenderType ext,
554                         SortKeyBuffer buf, CompareOptions opt)
555                 {
556                         if (0x3400 <= i && i <= 0x4DB5) {
557                                 int diff = i - 0x3400;
558                                 buf.AppendCJKExtension (
559                                         (byte) (0x10 + diff / 254),
560                                         (byte) (diff % 254 + 2));
561                                 return;
562                         }
563
564                         UnicodeCategory uc = char.GetUnicodeCategory ((char) i);
565                         switch (uc) {
566                         case UnicodeCategory.PrivateUse:
567                                 int diff = i - 0xE000;
568                                 buf.AppendNormal (
569                                         (byte) (0xE5 + diff / 254),
570                                         (byte) (diff % 254 + 2),
571                                         0,
572                                         0);
573                                 return;
574                         case UnicodeCategory.Surrogate:
575                                 FillSurrogateSortKeyRaw (i, buf);
576                                 return;
577                         }
578
579                         byte level2 = Level2 (i, ext);
580                         if (Uni.HasSpecialWeight ((char) i)) {
581                                 byte level1 = Level1 (i);
582                                 buf.AppendKana (
583                                         Category (i),
584                                         level1,
585                                         level2,
586                                         Uni.Level3 (i),
587                                         Uni.IsJapaneseSmallLetter ((char) i),
588                                         ToDashTypeValue (ext, opt),
589                                         !Uni.IsHiragana ((char) i),
590                                         IsHalfKana ((char) i, opt)
591                                         );
592                                 if ((opt & COpt.IgnoreNonSpace) == 0 && ext == ExtenderType.Voiced)
593                                         // Append voice weight
594                                         buf.AppendNormal (1, 1, 1, 0);
595                         }
596                         else
597                                 buf.AppendNormal (
598                                         Category (i),
599                                         Level1 (i),
600                                         level2,
601                                         Uni.Level3 (i));
602                 }
603
604                 void FillSurrogateSortKeyRaw (int i, SortKeyBuffer buf)
605                 {
606                         int diffbase = 0;
607                         int segment = 0;
608                         byte lower = 0;
609
610                         if (i < 0xD840) {
611                                 diffbase = 0xD800;
612                                 segment = 0x41;
613                                 lower = (byte) ((i == 0xD800) ? 0x3E : 0x3F);
614                         } else if (0xD840 <= i && i < 0xD880) {
615                                 diffbase = 0xD840;
616                                 segment = 0xF2;
617                                 lower = 0x3E;
618                         } else if (0xDB80 <= i && i < 0xDC00) {
619                                 diffbase = 0xDB80 - 0x40;
620                                 segment = 0xFE;
621                                 lower = 0x3E;
622                         } else {
623                                 diffbase = 0xDC00 - 0xF8 + 2;
624                                 segment = 0x41;
625                                 lower = 0x3F;
626                         }
627                         int diff = i - diffbase;
628
629                         buf.AppendNormal (
630                                 (byte) (segment + diff / 254),
631                                 (byte) (diff % 254 + 2),
632                                 lower,
633                                 lower);
634                 }
635
636                 #endregion
637
638                 #region Compare()
639
640                 public int Compare (string s1, string s2)
641                 {
642                         return Compare (s1, s2, CompareOptions.None);
643                 }
644
645                 public int Compare (string s1, string s2, CompareOptions options)
646                 {
647                         return Compare (s1, 0, s1.Length, s2, 0, s2.Length, options);
648                 }
649
650                 private int CompareOrdinal (string s1, int idx1, int len1,
651                         string s2, int idx2, int len2)
652                 {
653                         int min = len1 < len2 ? len1 : len2;
654                         int end1 = idx1 + min;
655                         int end2 = idx2 + min;
656                         if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
657                                 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
658                         for (int i1 = idx1, i2 = idx2;
659                                 i1 < end1 && i2 < end2; i1++, i2++)
660                                 if (s1 [i1] != s2 [i2])
661                                         return s1 [i1] - s2 [i2];
662                         return len1 == len2 ? 0 :
663                                 len1 == min ? - 1 : 1;
664                 }
665
666                 public int Compare (string s1, int idx1, int len1,
667                         string s2, int idx2, int len2, CompareOptions options)
668                 {
669                         // quick equality check
670                         if (idx1 == idx2 && len1 == len2 &&
671                                 Object.ReferenceEquals (s1, s2))
672                                 return 0;
673                         // FIXME: this should be done inside Compare() at
674                         // any time.
675 //                      int ord = CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
676 //                      if (ord == 0)
677 //                              return 0;
678                         if (options == CompareOptions.Ordinal)
679                                 return CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
680
681 #if false // stable easy version, depends on GetSortKey().
682                         SortKey sk1 = GetSortKey (s1, idx1, len1, options);
683                         SortKey sk2 = GetSortKey (s2, idx2, len2, options);
684                         byte [] d1 = sk1.KeyData;
685                         byte [] d2 = sk2.KeyData;
686                         int len = d1.Length > d2.Length ? d2.Length : d1.Length;
687                         for (int i = 0; i < len; i++)
688                                 if (d1 [i] != d2 [i])
689                                         return d1 [i] < d2 [i] ? -1 : 1;
690                         return d1.Length == d2.Length ? 0 : d1.Length < d2.Length ? -1 : 1;
691 #else
692                         PreviousInfo prev1 = new PreviousInfo (false);
693                         byte [] sk1 = new byte [4];
694                         byte [] sk2 = new byte [4];
695                         bool dummy, dummy2;
696                         int ret = CompareInternal (options, s1, idx1, len1, s2, idx2, len2, out dummy, out dummy2, true, ref prev1, sk1, sk2);
697                         return ret == 0 ? 0 : ret < 0 ? -1 : 1;
698 #endif
699                 }
700
701                 int CompareInternal (COpt opt, string s1, int idx1, int len1, string s2,
702                         int idx2, int len2,
703                         out bool targetConsumed, out bool sourceConsumed,
704                         bool skipHeadingExtenders, ref PreviousInfo prev1,
705                         byte [] charSortKey, byte [] charSortKey2)
706                 {
707                         int start1 = idx1;
708                         int start2 = idx2;
709                         int end1 = idx1 + len1;
710                         int end2 = idx2 + len2;
711                         targetConsumed = false;
712                         sourceConsumed = false;
713                         PreviousInfo prev2 = new PreviousInfo (false);
714
715                         // It holds final result that comes from the comparison
716                         // at level 2 or lower. Even if Compare() found the
717                         // difference at level 2 or lower, it still has to
718                         // continue level 1 comparison. FinalResult is used
719                         // when there was no further differences.
720                         int finalResult = 0;
721                         // It holds the comparison level to do. It starts from
722                         // 5, and becomes 1 when we do primary-only comparison.
723                         int currentLevel = 5;
724
725                         int lv5At1 = -1;
726                         int lv5At2 = -1;
727                         int lv5Value1 = 0;
728                         int lv5Value2 = 0;
729
730                         // Skip heading extenders
731                         if (skipHeadingExtenders) {
732                                 for (; idx1 < end1; idx1++)
733                                         if (GetExtenderType (s1 [idx1]) == ExtenderType.None)
734                                                 break;
735                                 for (; idx2 < end2; idx2++)
736                                         if (GetExtenderType (s2 [idx2]) == ExtenderType.None)
737                                                 break;
738                         }
739
740                         ExtenderType ext1 = ExtenderType.None;
741                         ExtenderType ext2 = ExtenderType.None;
742
743                         int quickCheckPos1 = idx1;
744                         int quickCheckPos2 = idx2;
745                         bool stringSort = (opt & COpt.StringSort) != 0;
746                         bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
747                         Escape escape1 = new Escape ();
748                         Escape escape2 = new Escape ();
749
750                         while (true) {
751                                 for (; idx1 < end1; idx1++)
752                                         if (!IsIgnorable (s1 [idx1], opt))
753                                                 break;
754                                 for (; idx2 < end2; idx2++)
755                                         if (!IsIgnorable (s2 [idx2], opt))
756                                                 break;
757
758                                 if (idx1 >= end1) {
759                                         if (escape1.Source == null)
760                                                 break;
761                                         s1 = escape1.Source;
762                                         start1 = escape1.Start;
763                                         idx1 = escape1.Index;
764                                         end1 = escape1.End;
765                                         quickCheckPos1 = escape1.Optional;
766                                         escape1.Source = null;
767                                         continue;
768                                 }
769                                 if (idx2 >= end2) {
770                                         if (escape2.Source == null)
771                                                 break;
772                                         s2 = escape2.Source;
773                                         start2 = escape2.Start;
774                                         idx2 = escape2.Index;
775                                         end2 = escape2.End;
776                                         quickCheckPos2 = escape2.Optional;
777                                         escape2.Source = null;
778                                         continue;
779                                 }
780 #if true
781 // If comparison is unstable, then this part is one of the most doubtful part.
782 // Here we run quick codepoint comparison and run back to "stable" area to
783 // compare characters.
784
785                                 // Strictly to say, even the first character
786                                 // could be compared here, but it messed
787                                 // backward step, so I just avoided mess.
788                                 if (quickCheckPos1 < idx1 && quickCheckPos2 < idx2) {
789                                         while (idx1 < end1 && idx2 < end2 &&
790                                                 s1 [idx1] == s2 [idx2]) {
791                                                 idx1++;
792                                                 idx2++;
793                                         }
794                                         if (idx1 == end1 || idx2 == end2)
795                                                 continue; // check replacement
796
797                                         int backwardEnd1 = quickCheckPos1;
798                                         int backwardEnd2 = quickCheckPos2;
799                                         quickCheckPos1 = idx1;
800                                         quickCheckPos2 = idx2;
801
802                                         idx1--;
803                                         idx2--;
804
805                                         for (;idx1 > backwardEnd1; idx1--)
806                                                 if (Category (s1 [idx1]) != 1)
807                                                         break;
808                                         for (;idx2 > backwardEnd2; idx2--)
809                                                 if (Category (s2 [idx2]) != 1)
810                                                         break;
811                                         for (;idx1 > backwardEnd1; idx1--)
812                                                 if (IsSafe (s1 [idx1]))
813                                                         break;
814                                         for (;idx2 > backwardEnd2; idx2--)
815                                                 if (IsSafe (s2 [idx2]))
816                                                         break;
817                                 }
818 #endif
819
820                                 int cur1 = idx1;
821                                 int cur2 = idx2;
822                                 byte [] sk1 = null;
823                                 byte [] sk2 = null;
824                                 int i1 = FilterOptions (s1 [idx1], opt);
825                                 int i2 = FilterOptions (s2 [idx2], opt);
826                                 bool special1 = false;
827                                 bool special2 = false;
828
829                                 // If current character is an expander, then
830                                 // repeat the previous character.
831                                 ext1 = GetExtenderType (i1);
832                                 if (ext1 != ExtenderType.None) {
833                                         if (prev1.Code < 0) {
834                                                 if (prev1.SortKey == null) {
835                                                         // nothing to extend
836                                                         idx1++;
837                                                         continue;
838                                                 }
839                                                 sk1 = prev1.SortKey;
840                                         }
841                                         else
842                                                 i1 = FilterExtender (prev1.Code, ext1, opt);
843                                 }
844                                 ext2 = GetExtenderType (i2);
845                                 if (ext2 != ExtenderType.None) {
846                                         if (prev2.Code < 0) {
847                                                 if (prev2.SortKey == null) {
848                                                         // nothing to extend
849                                                         idx2++;
850                                                         continue;
851                                                 }
852                                                 sk2 = prev2.SortKey;
853                                         }
854                                         else
855                                                 i2 = FilterExtender (prev2.Code, ext2, opt);
856                                 }
857
858                                 byte cat1 = Category (i1);
859                                 byte cat2 = Category (i2);
860
861                                 // Handle special weight characters
862                                 if (!stringSort && currentLevel > 4) {
863                                         if (cat1 == 6) {
864                                                 lv5At1 = escape1.Source != null ?
865                                                         escape1.Index - escape1.Start :
866                                                         cur1 - start1;
867                                                 // here Windows has a bug that it does
868                                                 // not consider thirtiary weight.
869                                                 lv5Value1 = Level1 (i1) << 8 + Uni.Level3 (i1);
870                                                 prev1.Code = i1;
871                                                 idx1++;
872                                         }
873                                         if (cat2 == 6) {
874                                                 lv5At2 = escape2.Source != null ?
875                                                         escape2.Index - escape2.Start :
876                                                         cur2 - start2;
877                                                 // here Windows has a bug that it does
878                                                 // not consider thirtiary weight.
879                                                 lv5Value2 = Level1 (i2) << 8 + Uni.Level3 (i2);
880                                                 prev2.Code = i2;
881                                                 idx2++;
882                                         }
883                                         if (cat1 == 6 || cat2 == 6) {
884                                                 currentLevel = 4;
885                                                 continue;
886                                         }
887                                 }
888
889                                 Contraction ct1 = null;
890                                 if (ext1 == ExtenderType.None)
891                                         ct1 = GetContraction (s1, idx1, end1);
892
893                                 int offset1 = 1;
894                                 if (sk1 != null)
895                                         offset1 = 1;
896                                 else if (ct1 != null) {
897                                         offset1 = ct1.Source.Length;
898                                         if (ct1.SortKey != null) {
899                                                 sk1 = charSortKey;
900                                                 for (int i = 0; i < ct1.SortKey.Length; i++)
901                                                         sk1 [i] = ct1.SortKey [i];
902                                                 prev1.Code = -1;
903                                                 prev1.SortKey = sk1;
904                                         }
905                                         else if (escape1.Source == null) {
906                                                 escape1.Source = s1;
907                                                 escape1.Start = start1;
908                                                 escape1.Index = cur1 + ct1.Source.Length;
909                                                 escape1.End = end1;
910                                                 escape1.Optional = quickCheckPos1;
911                                                 s1 = ct1.Replacement;
912                                                 idx1 = 0;
913                                                 start1 = 0;
914                                                 end1 = s1.Length;
915                                                 quickCheckPos1 = 0;
916                                                 continue;
917                                         }
918                                 }
919                                 else {
920                                         sk1 = charSortKey;
921                                         sk1 [0] = cat1;
922                                         sk1 [1] = Level1 (i1);
923                                         if (!ignoreNonSpace && currentLevel > 1)
924                                                 sk1 [2] = Level2 (i1, ext1);
925                                         if (currentLevel > 2)
926                                                 sk1 [3] = Uni.Level3 (i1);
927                                         if (currentLevel > 3)
928                                                 special1 = Uni.HasSpecialWeight ((char) i1);
929                                         if (cat1 > 1)
930                                                 prev1.Code = i1;
931                                 }
932
933                                 Contraction ct2 = null;
934                                 if (ext2 == ExtenderType.None)
935                                         ct2 = GetContraction (s2, idx2, end2);
936
937                                 if (sk2 != null)
938                                         idx2++;
939                                 else if (ct2 != null) {
940                                         idx2 += ct2.Source.Length;
941                                         if (ct2.SortKey != null) {
942                                                 sk2 = charSortKey2;
943                                                 for (int i = 0; i < ct2.SortKey.Length; i++)
944                                                         sk2 [i] = ct2.SortKey [i];
945                                                 prev2.Code = -1;
946                                                 prev2.SortKey = sk2;
947                                         }
948                                         else if (escape2.Source == null) {
949                                                 escape2.Source = s2;
950                                                 escape2.Start = start2;
951                                                 escape2.Index = cur2 + ct2.Source.Length;
952                                                 escape2.End = end2;
953                                                 escape2.Optional = quickCheckPos2;
954                                                 s2 = ct2.Replacement;
955                                                 idx2 = 0;
956                                                 start2 = 0;
957                                                 end2 = s2.Length;
958                                                 quickCheckPos2 = 0;
959                                                 continue;
960                                         }
961                                 }
962                                 else {
963                                         sk2 = charSortKey2;
964                                         sk2 [0] = cat2;
965                                         sk2 [1] = Level1 (i2);
966                                         if (!ignoreNonSpace && currentLevel > 1)
967                                                 sk2 [2] = Level2 (i2, ext2);
968                                         if (currentLevel > 2)
969                                                 sk2 [3] = Uni.Level3 (i2);
970                                         if (currentLevel > 3)
971                                                 special2 = Uni.HasSpecialWeight ((char) i2);
972                                         if (cat2 > 1)
973                                                 prev2.Code = i2;
974                                         idx2++;
975                                 }
976
977                                 // Add offset here so that it does not skip
978                                 // idx1 while s2 has a replacement.
979                                 idx1 += offset1;
980
981                                 // add diacritical marks in s1 here
982                                 if (!ignoreNonSpace) {
983                                         while (idx1 < end1) {
984                                                 if (Category (s1 [idx1]) != 1)
985                                                         break;
986                                                 if (sk1 [2] == 0)
987                                                         sk1 [2] = 2;
988                                                 sk1 [2] = (byte) (sk1 [2] + 
989                                                         Level2 (s1 [idx1], ExtenderType.None));
990                                                 idx1++;
991                                         }
992
993                                         // add diacritical marks in s2 here
994                                         while (idx2 < end2) {
995                                                 if (Category (s2 [idx2]) != 1)
996                                                         break;
997                                                 if (sk2 [2] == 0)
998                                                         sk2 [2] = 2;
999                                                 sk2 [2] = (byte) (sk2 [2] + 
1000                                                         Level2 (s2 [idx2], ExtenderType.None));
1001                                                 idx2++;
1002                                         }
1003                                 }
1004
1005                                 int ret = sk1 [0] - sk2 [0];
1006                                 ret = ret != 0 ? ret : sk1 [1] - sk2 [1];
1007                                 if (ret != 0)
1008                                         return ret;
1009                                 if (currentLevel == 1)
1010                                         continue;
1011                                 if (!ignoreNonSpace) {
1012                                         ret = sk1 [2] - sk2 [2];
1013                                         if (ret != 0) {
1014                                                 finalResult = ret;
1015                                                 currentLevel = frenchSort ? 2 : 1;
1016                                                 continue;
1017                                         }
1018                                 }
1019                                 if (currentLevel == 2)
1020                                         continue;
1021                                 ret = sk1 [3] - sk2 [3];
1022                                 if (ret != 0) {
1023                                         finalResult = ret;
1024                                         currentLevel = 2;
1025                                         continue;
1026                                 }
1027                                 if (currentLevel == 3)
1028                                         continue;
1029                                 if (special1 != special2) {
1030                                         finalResult = special1 ? 1 : -1;
1031                                         currentLevel = 3;
1032                                         continue;
1033                                 }
1034                                 if (special1) {
1035                                         ret = CompareFlagPair (
1036                                                 !Uni.IsJapaneseSmallLetter ((char) i1),
1037                                                 !Uni.IsJapaneseSmallLetter ((char) i2));
1038                                         ret = ret != 0 ? ret :
1039                                                 ToDashTypeValue (ext1, opt) -
1040                                                 ToDashTypeValue (ext2, opt);
1041                                         ret = ret != 0 ? ret : CompareFlagPair (
1042                                                 Uni.IsHiragana ((char) i1),
1043                                                 Uni.IsHiragana ((char) i2));
1044                                         ret = ret != 0 ? ret : CompareFlagPair (
1045                                                 !IsHalfKana ((char) i1, opt),
1046                                                 !IsHalfKana ((char) i2, opt));
1047                                         if (ret != 0) {
1048                                                 finalResult = ret;
1049                                                 currentLevel = 3;
1050                                                 continue;
1051                                         }
1052                                 }
1053                         }
1054
1055                         // If there were only level 3 or lower differences,
1056                         // then we still have to find diacritical differences
1057                         // if any.
1058                         if (!ignoreNonSpace &&
1059                                 finalResult != 0 && currentLevel > 2) {
1060                                 while (idx1 < end1 && idx2 < end2) {
1061                                         if (!Uni.IsIgnorableNonSpacing (s1 [idx1]))
1062                                                 break;
1063                                         if (!Uni.IsIgnorableNonSpacing (s2 [idx2]))
1064                                                 break;
1065                                         finalResult = Level2 (FilterOptions (s1 [idx1], opt), ext1) - Level2 (FilterOptions (s2 [idx2], opt), ext2);
1066                                         if (finalResult != 0)
1067                                                 break;
1068                                         idx1++;
1069                                         idx2++;
1070                                         // they should work only for the first character
1071                                         ext1 = ExtenderType.None;
1072                                         ext2 = ExtenderType.None;
1073                                 }
1074                         }
1075                         if (currentLevel == 1 && finalResult != 0) {
1076                                 while (idx1 < end1)
1077                                         if (Uni.IsIgnorableNonSpacing (s1 [idx1]))
1078                                                 idx1++;
1079                                 while (idx2 < end2)
1080                                         if (Uni.IsIgnorableNonSpacing (s2 [idx2]))
1081                                                 idx2++;
1082                         }
1083                         // we still have to handle level 5
1084                         if (finalResult == 0) {
1085                                 finalResult = lv5At1 - lv5At2;
1086                                 if (finalResult == 0)
1087                                         finalResult = lv5Value1 - lv5Value2;
1088                         }
1089                         if (finalResult == 0) {
1090                                 if (idx2 == end2)
1091                                         targetConsumed = true;
1092                                 if (idx1 == end1)
1093                                         sourceConsumed = true;
1094                         }
1095                         return idx1 != end1 ? 1 : idx2 == end2 ? finalResult : -1;
1096                 }
1097
1098                 int CompareFlagPair (bool b1, bool b2)
1099                 {
1100                         return b1 == b2 ? 0 : b1 ? 1 : -1;
1101                 }
1102
1103                 #endregion
1104
1105                 #region IsPrefix() and IsSuffix()
1106
1107                 public bool IsPrefix (string src, string target, CompareOptions opt)
1108                 {
1109                         return IsPrefix (src, target, 0, src.Length, opt);
1110                 }
1111
1112                 public bool IsPrefix (string s, string target, int start, int length, CompareOptions opt)
1113                 {
1114                         if (target.Length == 0)
1115                                 return true;
1116                         PreviousInfo prev = new PreviousInfo (false);
1117                         byte [] sk1 = new byte [4];
1118                         byte [] sk2 = new byte [4];
1119                         return IsPrefix (opt, s, target, start, length, true, ref prev, sk1, sk2);
1120                 }
1121
1122                 bool IsPrefix (COpt opt, string s, string target, int start, int length, bool skipHeadingExtenders, ref PreviousInfo prev, byte [] sk1, byte [] sk2)
1123                 {
1124                         bool consumed, dummy;
1125                         CompareInternal (opt, s, start, length,
1126                                 target, 0, target.Length,
1127                                 out consumed, out dummy, skipHeadingExtenders,
1128                                 ref prev, sk1, sk2);
1129                         return consumed;
1130                 }
1131
1132                 // IsSuffix()
1133
1134                 public bool IsSuffix (string src, string target, CompareOptions opt)
1135                 {
1136                         return IsSuffix (src, target, src.Length - 1, src.Length, opt);
1137                 }
1138
1139                 public bool IsSuffix (string s, string target, int start, int length, CompareOptions opt)
1140                 {
1141                         if (target.Length == 0)
1142                                 return true;
1143                         int last = LastIndexOf (s, target, start, length, opt);
1144                         return last >= 0 && Compare (s, last, s.Length - last, target, 0, target.Length, opt) == 0;
1145 /*
1146                         // quick check : simple codepoint comparison
1147                         if (s.Length >= target.Length) {
1148                                 int si = start;
1149                                 int se = start - length;
1150                                 for (int i = target.Length - 1; si >= se && i >= 0; i--, si--)
1151                                         if (s [si] != target [i])
1152                                                 break;
1153                                 if (si == start + target.Length)
1154                                         return true;
1155                         }
1156
1157                         PreviousInfo prev = new PreviousInfo (false);
1158                         byte [] sk1 = new byte [4];
1159                         byte [] sk2 = new byte [4];
1160                         return IsSuffix (opt, s, target, start, length, ref prev, sk1, sk2);
1161 */
1162                 }
1163
1164                 bool IsSuffix (COpt opt, string s, string t, int start, int length, ref PreviousInfo prev, byte [] sk1, byte [] sk2)
1165                 {
1166                         int tstart = 0;
1167                         for (;tstart < t.Length; tstart++)
1168                                 if (!IsIgnorable (t [tstart], opt))
1169                                         break;
1170                         if (tstart == t.Length)
1171                                 return true; // as if target is String.Empty.
1172
1173 #if false
1174 // This is still not working. If it is not likely to get working, then just remove it.
1175                         int si = start;
1176                         int send = start - length;
1177                         int ti = t.Length - 1;
1178                         int tend = -1;
1179
1180                         int sStep = start + 1;
1181                         int tStep = t.Length;
1182                         bool sourceConsumed, targetConsumed;
1183                         while (true) {
1184                                 for (; send < si; si--)
1185                                         if (!IsIgnorable (s [si]))
1186                                                 break;
1187                                 for (; tend < ti; ti--)
1188                                         if (!IsIgnorable (t [ti]))
1189                                                 break;
1190                                 if (tend == ti)
1191                                         break;
1192                                 for (; send < si; si--)
1193                                         if (IsSafe (s [si]))
1194                                                 break;
1195                                 for (; tend < ti; ti--)
1196                                         if (IsSafe (t [ti]))
1197                                                 break;
1198 Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, length, t, ti, tstart, sStep - si, tStep - ti);
1199                                 if (CompareInternal (opt, s, si, sStep - si,
1200                                         t, ti, tStep - ti,
1201                                         out targetConsumed, out sourceConsumed,
1202                                         true) != 0)
1203                                         return false;
1204                                 if (send == si)
1205                                         return false;
1206                                 sStep = si;
1207                                 tStep = ti;
1208                                 si--;
1209                                 ti--;
1210                         }
1211                         return true;
1212 #else
1213                         // FIXME: it is not efficient for very long target.
1214                         bool sourceConsumed, targetConsumed;
1215                         int mismatchCount = 0;
1216                         for (int i = 0; i < length; i++) {
1217                                 prev = new PreviousInfo (false); // prev.Reset();
1218
1219                                 int ret = CompareInternal (opt, s, start - i, i + 1,
1220                                         t, tstart, t.Length - tstart,
1221                                         out targetConsumed,
1222                                         out sourceConsumed, true, ref prev,
1223                                         sk1, sk2);
1224                                 if (ret == 0)
1225                                         return true;
1226                                 if (!sourceConsumed && targetConsumed)
1227                                         return false;
1228                                 if (!targetConsumed) {
1229                                         mismatchCount++;
1230                                         if (mismatchCount > Uni.MaxExpansionLength)
1231                                                 // The largest length of an
1232                                                 // expansion is 3, so if the
1233                                                 // target was not consumed more
1234                                                 // than 3 times, then the tail
1235                                                 // character does not match.
1236                                                 return false;
1237                                 }
1238                         }
1239                         return false;
1240 #endif
1241                 }
1242
1243                 #endregion
1244
1245                 #region IndexOf() / LastIndexOf()
1246
1247                 public int IndexOf (string s, string target, CompareOptions opt)
1248                 {
1249                         return IndexOf (s, target, 0, s.Length, opt);
1250                 }
1251
1252                 public int IndexOf (string s, string target, int start, int length, CompareOptions opt)
1253                 {
1254                         PreviousInfo prev = new PreviousInfo (false);
1255                         byte [] checkedFlags = s.Length > 50 ? new byte [16] : null;
1256                         byte [] targetSortKey = new byte [4];
1257                         byte [] sk1 = new byte [4];
1258                         byte [] sk2 = new byte [4];
1259
1260                         return IndexOf (opt, s, target, start, length,
1261                                 checkedFlags, targetSortKey, ref prev, sk1, sk2);
1262                 }
1263
1264                 public int IndexOf (string s, char target, CompareOptions opt)
1265                 {
1266                         return IndexOf (s, target, 0, s.Length, opt);
1267                 }
1268
1269                 public int IndexOf (string s, char target, int start, int length, CompareOptions opt)
1270                 {
1271                         PreviousInfo prev = new PreviousInfo (false);
1272                         byte [] sk1 = new byte [4];
1273                         byte [] sk2 = new byte [4];
1274                         byte [] checkedFlags = s.Length > 50 ? new byte [16] : null;
1275                         byte [] targetSortKey = new byte [4];
1276
1277                         // If target is contraction, then use string search.
1278                         Contraction ct = GetContraction (target);
1279                         if (ct != null) {
1280                                 if (ct.Replacement != null)
1281                                         return IndexOf (opt, s, ct.Replacement,
1282                                                 start, length, checkedFlags, targetSortKey, ref prev, sk1, sk2);
1283                                 else
1284                                         return IndexOfSortKey (opt, s, start, length, ct.SortKey, char.MinValue, -1, true, checkedFlags, ref prev, sk1);
1285                         } else {
1286                                 int ti = FilterOptions ((int) target, opt);
1287                                 targetSortKey [0] = Category (ti);
1288                                 targetSortKey [1] = Level1 (ti);
1289                                 if ((opt & COpt.IgnoreNonSpace) == 0)
1290                                         targetSortKey [2] =
1291                                                 Level2 (ti, ExtenderType.None);
1292                                 targetSortKey [3] = Uni.Level3 (ti);
1293                                 return IndexOfSortKey (opt, s, start, length,
1294                                         targetSortKey, target, ti,
1295                                         !Uni.HasSpecialWeight ((char) ti), checkedFlags, ref prev, sk1);
1296                         }
1297                 }
1298
1299                 // Searches target byte[] keydata
1300                 int IndexOfSortKey (COpt opt, string s, int start, int length, byte [] sortkey, char target, int ti, bool noLv4, byte [] checkedFlags, ref PreviousInfo prev, byte [] sk)
1301                 {
1302                         int end = start + length;
1303                         int idx = start;
1304
1305                         while (idx < end) {
1306                                 int cur = idx;
1307                                 if (MatchesForward (opt, s, ref idx, end, ti, sortkey, noLv4, checkedFlags, ref prev, sk))
1308                                         return cur;
1309                         }
1310                         return -1;
1311                 }
1312
1313                 // Searches string. Search head character (or keydata when
1314                 // the head is contraction sortkey) and try IsPrefix().
1315                 int IndexOf (COpt opt, string s, string target, int start, int length, byte [] checkedFlags, byte [] targetSortKey, ref PreviousInfo prev, byte [] sk1, byte [] sk2)
1316                 {
1317                         int tidx = 0;
1318                         for (; tidx < target.Length; tidx++)
1319                                 if (!IsIgnorable (target [tidx], opt))
1320                                         break;
1321                         if (tidx == target.Length)
1322                                 return start;
1323                         Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1324                         string replace = ct != null ? ct.Replacement : null;
1325                         byte [] sk = replace == null ? targetSortKey : null;
1326                         bool noLv4 = true;
1327                         char tc = char.MinValue;
1328                         int ti = -1;
1329                         if (ct != null && sk != null) {
1330                                 for (int i = 0; i < ct.SortKey.Length; i++)
1331                                         sk [i] = ct.SortKey [i];
1332                         } else if (sk != null) {
1333                                 tc = target [tidx];
1334                                 ti = FilterOptions (target [tidx], opt);
1335                                 sk [0] = Category (ti);
1336                                 sk [1] = Level1 (ti);
1337                                 if ((opt & COpt.IgnoreNonSpace) == 0)
1338                                         sk [2] = Level2 (ti, ExtenderType.None);
1339                                 sk [3] = Uni.Level3 (ti);
1340                                 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1341                         }
1342                         if (sk != null) {
1343                                 for (tidx++; tidx < target.Length; tidx++) {
1344                                         if (Category (target [tidx]) != 1)
1345                                                 break;
1346                                         if (sk [2] == 0)
1347                                                 sk [2] = 2;
1348                                         sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1349                                 }
1350                         }
1351
1352                         do {
1353                                 int idx = 0;
1354                                 if (replace != null)
1355                                         idx = IndexOf (opt, s, replace, start, length, checkedFlags, targetSortKey, ref prev, sk1, sk2);
1356                                 else
1357                                         idx = IndexOfSortKey (opt, s, start, length, sk, tc, ti, noLv4, checkedFlags, ref prev, sk1);
1358                                 if (idx < 0)
1359                                         return -1;
1360                                 length -= idx - start;
1361                                 start = idx;
1362                                 if (IsPrefix (opt, s, target, start, length, false, ref prev, sk1, sk2))
1363                                         return idx;
1364                                 Contraction cts = GetContraction (s, start, length);
1365                                 if (cts != null) {
1366                                         start += cts.Source.Length;
1367                                         length -= cts.Source.Length;
1368                                 } else {
1369                                         start++;
1370                                         length--;
1371                                 }
1372                         } while (length > 0);
1373                         return -1;
1374                 }
1375
1376                 //
1377                 // There are the same number of IndexOf() related methods,
1378                 // with the same functionalities for each.
1379                 //
1380
1381                 public int LastIndexOf (string s, string target, CompareOptions opt)
1382                 {
1383                         return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1384                 }
1385
1386                 public int LastIndexOf (string s, string target, int start, int length, CompareOptions opt)
1387                 {
1388                         PreviousInfo prev = new PreviousInfo (false);
1389                         byte [] sk1 = new byte [4];
1390                         byte [] sk2 = new byte [4];
1391                         byte [] checkedFlags = s.Length > 50 ? new byte [16] : null;
1392                         byte [] targetSortKey = new byte [4];
1393                         return LastIndexOf (opt, s, target, start, length,
1394                                 checkedFlags, targetSortKey, ref prev, sk1, sk2);
1395                 }
1396
1397                 public int LastIndexOf (string s, char target, CompareOptions opt)
1398                 {
1399                         return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1400                 }
1401
1402                 public int LastIndexOf (string s, char target, int start, int length, CompareOptions opt)
1403                 {
1404                         PreviousInfo prev = new PreviousInfo (false);
1405                         byte [] sk1 = new byte [4];
1406                         byte [] sk2 = new byte [4];
1407                         byte [] checkedFlags = s.Length > 50 ? new byte [16] : null;
1408                         byte [] targetSortKey = new byte [4];
1409
1410                         // If target is contraction, then use string search.
1411                         Contraction ct = GetContraction (target);
1412                         if (ct != null) {
1413                                 if (ct.Replacement != null)
1414                                         return LastIndexOf (opt, s,
1415                                                 ct.Replacement, start, length,
1416                                                 checkedFlags, targetSortKey, ref prev, sk1, sk2);
1417                                 else
1418                                         return LastIndexOfSortKey (opt, s, start,
1419                                                 start, length, ct.SortKey,
1420                                                 char.MinValue, -1, true,
1421                                                 checkedFlags, ref prev, sk1);
1422                         }
1423                         else {
1424                                 int ti = FilterOptions ((int) target, opt);
1425                                 targetSortKey [0] = Category (ti);
1426                                 targetSortKey [1] = Level1 (ti);
1427                                 if ((opt & COpt.IgnoreNonSpace) == 0)
1428                                         targetSortKey [2] = Level2 (ti, ExtenderType.None);
1429                                 targetSortKey [3] = Uni.Level3 (ti);
1430                                 return LastIndexOfSortKey (opt, s, start, start,
1431                                         length, targetSortKey, target,
1432                                         ti, !Uni.HasSpecialWeight ((char) ti),
1433                                         checkedFlags, ref prev, sk1);
1434                         }
1435                 }
1436
1437                 // Searches target byte[] keydata
1438                 int LastIndexOfSortKey (COpt opt, string s, int start, int orgStart, int length, byte [] sortkey, char target, int ti, bool noLv4, byte [] checkedFlags, ref PreviousInfo prev, byte [] sk)
1439                 {
1440                         int end = start - length;
1441                         int idx = start;
1442                         while (idx > end) {
1443                                 int cur = idx;
1444                                 if (MatchesBackward (opt, s, ref idx, end, orgStart,
1445                                         ti, sortkey, noLv4, checkedFlags, ref prev, sk))
1446                                         return cur;
1447                         }
1448                         return -1;
1449                 }
1450
1451                 // Searches string. Search head character (or keydata when
1452                 // the head is contraction sortkey) and try IsPrefix().
1453                 int LastIndexOf (COpt opt, string s, string target, int start, int length, byte [] checkedFlags, byte [] targetSortKey, ref PreviousInfo prev, byte [] sk1, byte [] sk2)
1454                 {
1455                         int orgStart = start;
1456                         int tidx = 0;
1457                         for (; tidx < target.Length; tidx++)
1458                                 if (!IsIgnorable (target [tidx], opt))
1459                                         break;
1460                         if (tidx == target.Length)
1461                                 return start;
1462                         Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1463                         string replace = ct != null ? ct.Replacement : null;
1464                         byte [] sk = replace == null ? targetSortKey : null;
1465
1466                         bool noLv4 = true;
1467                         char tc = char.MinValue;
1468                         int ti = -1;
1469                         if (ct != null && sk != null) {
1470                                 for (int i = 0; i < ct.SortKey.Length; i++)
1471                                         sk [i] = ct.SortKey [i];
1472                         } else if (sk != null) {
1473                                 tc = target [tidx];
1474                                 ti = FilterOptions (target [tidx], opt);
1475                                 sk [0] = Category (ti);
1476                                 sk [1] = Level1 (ti);
1477                                 if ((opt & COpt.IgnoreNonSpace) == 0)
1478                                         sk [2] = Level2 (ti, ExtenderType.None);
1479                                 sk [3] = Uni.Level3 (ti);
1480                                 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1481                         }
1482                         if (sk != null) {
1483                                 for (tidx++; tidx < target.Length; tidx++) {
1484                                         if (Category (target [tidx]) != 1)
1485                                                 break;
1486                                         if (sk [2] == 0)
1487                                                 sk [2] = 2;
1488                                         sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1489                                 }
1490                         }
1491
1492                         do {
1493                                 int idx = 0;
1494
1495                                 if (replace != null)
1496                                         idx = LastIndexOf (opt, s, replace,
1497                                                 start, length, checkedFlags,
1498                                                 targetSortKey, ref prev, sk1, sk2);
1499                                 else
1500                                         idx = LastIndexOfSortKey (opt, s, start, orgStart, length, sk, tc, ti, noLv4, checkedFlags, ref prev, sk1);
1501                                 if (idx < 0)
1502                                         return -1;
1503                                 length -= start - idx;
1504                                 start = idx;
1505
1506                                 if (IsPrefix (opt, s, target, idx, orgStart - idx + 1, false, ref prev, sk1, sk2)) {
1507                                         for (;idx < orgStart; idx++)
1508                                                 if (!IsIgnorable (s [idx], opt))
1509                                                         break;
1510                                         return idx;
1511                                 }
1512                                 Contraction cts = GetContraction (s, idx, orgStart - idx + 1);
1513                                 if (cts != null) {
1514                                         start -= cts.Source.Length;
1515                                         length -= cts.Source.Length;
1516                                 } else {
1517                                         start--;
1518                                         length--;
1519                                 }
1520                         } while (length > 0);
1521                         return -1;
1522                 }
1523
1524                 private bool MatchesForward (COpt opt, string s, ref int idx, int end, int ti, byte [] sortkey, bool noLv4, byte [] checkedFlags, ref PreviousInfo prev, byte [] sk)
1525                 {
1526                         int si = s [idx];
1527                         if (checkedFlags != null && si < 128 && (checkedFlags [si / 8] & (1 << (si % 8))) != 0) {
1528                                 idx++;
1529                                 return false;
1530                         }
1531                         ExtenderType ext = GetExtenderType (s [idx]);
1532                         Contraction ct = null;
1533                         if (MatchesForwardCore (opt, s, ref idx, end, ti, sortkey, noLv4, ext, ref ct, checkedFlags, ref prev, sk))
1534                                 return true;
1535                         if (checkedFlags != null && ct == null && ext == ExtenderType.None && si < 128) {
1536                                 checkedFlags [si / 8] |= (byte) (1 << (si % 8));
1537                         }
1538                         return false;
1539                 }
1540
1541                 private bool MatchesForwardCore (COpt opt, string s, ref int idx, int end, int ti, byte [] sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, byte [] checkedFlags, ref PreviousInfo prev, byte [] charSortKey)
1542                 {
1543                         bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
1544                         int si = -1;
1545                         if (ext == ExtenderType.None)
1546                                 ct = GetContraction (s, idx, end);
1547                         else if (prev.Code < 0) {
1548                                 if (prev.SortKey == null) {
1549                                         idx++;
1550                                         return false;
1551                                 }
1552                                 charSortKey = prev.SortKey;
1553                         }
1554                         else
1555                                 si = FilterExtender (prev.Code, ext, opt);
1556                         // if lv4 exists, it never matches contraction
1557                         if (ct != null) {
1558                                 idx += ct.Source.Length;
1559                                 if (!noLv4)
1560                                         return false;
1561                                 if (ct.SortKey != null) {
1562                                         for (int i = 0; i < sortkey.Length; i++)
1563                                                 charSortKey [i] = sortkey [i];
1564                                         prev.Code = -1;
1565                                         prev.SortKey = charSortKey;
1566                                 } else {
1567                                         // Here is the core of LAMESPEC
1568                                         // described at the top of the source.
1569                                         int dummy = 0;
1570                                         return MatchesForward (opt, ct.Replacement, ref dummy,
1571                                                 ct.Replacement.Length, ti, sortkey, noLv4, checkedFlags, ref prev, charSortKey);
1572                                 }
1573                         } else {
1574                                 if (si < 0)
1575                                         si = FilterOptions (s [idx], opt);
1576                                 idx++;
1577                                 charSortKey [0] = Category (si);
1578                                 bool noMatch = false;
1579                                 if (sortkey [0] == charSortKey [0])
1580                                         charSortKey [1] = Level1 (si);
1581                                 else
1582                                         noMatch = true;
1583                                 if (!ignoreNonSpace && sortkey [1] == charSortKey [1])
1584                                         charSortKey [2] = Level2 (si, ext);
1585                                 else if (!ignoreNonSpace)
1586                                         noMatch = true;
1587                                 if (noMatch) {
1588                                         for (; idx < end; idx++) {
1589                                                 if (Category (s [idx]) != 1)
1590                                                         break;
1591                                         }
1592                                         return false;
1593                                 }
1594                                 charSortKey [3] = Uni.Level3 (si);
1595                                 if (charSortKey [0] != 1)
1596                                         prev.Code = si;
1597                         }
1598                         for (; idx < end; idx++) {
1599                                 if (Category (s [idx]) != 1)
1600                                         break;
1601                                 if (ignoreNonSpace)
1602                                         continue;
1603                                 if (charSortKey [2] == 0)
1604                                                 charSortKey [2] = 2;
1605                                         charSortKey [2] = (byte) (charSortKey [2]
1606                                                 + Level2 (s [idx], ExtenderType.None));
1607                         }
1608
1609                         return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4);
1610                 }
1611
1612                 private bool MatchesPrimitive (COpt opt, byte [] source, int si, ExtenderType ext, byte [] target, int ti, bool noLv4)
1613                 {
1614                         bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
1615                         if (source [0] != target [0] ||
1616                                 source [1] != target [1] ||
1617                                 (!ignoreNonSpace && source [2] != target [2]) ||
1618                                 source [3] != target [3])
1619                                 return false;
1620                         if (noLv4 && (si < 0 || !Uni.HasSpecialWeight ((char) si)))
1621                                 return true;
1622                         else if (noLv4)
1623                                 return false;
1624                         // Since target can never be an extender, if the source
1625                         // is an expander and it matters, then they never match.
1626                         if (!ignoreNonSpace && ext == ExtenderType.Conditional)
1627                                 return false;
1628                         if (Uni.IsJapaneseSmallLetter ((char) si) !=
1629                                 Uni.IsJapaneseSmallLetter ((char) ti) ||
1630                                 ToDashTypeValue (ext, opt) !=
1631                                 // FIXME: we will have to specify correct value for target
1632                                 ToDashTypeValue (ExtenderType.None, opt) ||
1633                                 !Uni.IsHiragana ((char) si) !=
1634                                 !Uni.IsHiragana ((char) ti) ||
1635                                 IsHalfKana ((char) si, opt) !=
1636                                 IsHalfKana ((char) ti, opt))
1637                                 return false;
1638                         return true;
1639                 }
1640
1641                 private bool MatchesBackward (COpt opt, string s, ref int idx, int end, int orgStart, int ti, byte [] sortkey, bool noLv4, byte [] checkedFlags, ref PreviousInfo prev, byte [] sk)
1642                 {
1643                         int si = s [idx];
1644                         if (checkedFlags != null && si < 128 && (checkedFlags [si / 8] & (1 << (si % 8))) != 0) {
1645                                 idx--;
1646                                 return false;
1647                         }
1648                         ExtenderType ext = GetExtenderType (s [idx]);
1649                         Contraction ct = null;
1650                         if (MatchesBackwardCore (opt, s, ref idx, end, orgStart, ti, sortkey, noLv4, ext, ref ct, checkedFlags, ref prev, sk))
1651                                 return true;
1652                         if (checkedFlags != null && ct == null && ext == ExtenderType.None && si < 128) {
1653                                 checkedFlags [si / 8] |= (byte) (1 << (si % 8));
1654                         }
1655                         return false;
1656                 }
1657
1658                 private bool MatchesBackwardCore (COpt opt, string s, ref int idx, int end, int orgStart, int ti, byte [] sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, byte [] checkedFlags, ref PreviousInfo prev, byte [] charSortKey)
1659                 {
1660                         bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
1661                         int cur = idx;
1662                         int si = -1;
1663                         // To handle extenders in source, we need to
1664                         // check next _primary_ character.
1665                         if (ext != ExtenderType.None) {
1666                                 byte diacritical = 0;
1667                                 for (int tmp = 0; ; tmp--) {
1668                                         if (tmp < 0) // heading extender
1669                                                 return false;
1670                                         if (IsIgnorable (s [tmp], opt))
1671                                                 continue;
1672                                         int tmpi = FilterOptions (s [tmp], opt);
1673                                         byte category = Category (tmpi);
1674                                         if (category == 1) {
1675                                                 diacritical = Level2 (tmpi, ExtenderType.None);
1676                                                 continue;
1677                                         }
1678                                         si = FilterExtender (tmpi, ext, opt);
1679                                         charSortKey [0] = category;
1680                                         charSortKey [1] = Level1 (si);
1681                                         if (!ignoreNonSpace)
1682                                                 charSortKey [2] = Level2 (si, ext);
1683                                         charSortKey [3] = Uni.Level3 (si);
1684                                         if (ext != ExtenderType.Conditional &&
1685                                                 diacritical != 0)
1686                                                 charSortKey [2] =
1687                                                         (charSortKey [2] == 0) ?
1688                                                         (byte) (diacritical + 2) :
1689                                                         diacritical;
1690                                         break;
1691                                 }
1692                                 idx--;
1693                         }
1694                         if (ext == ExtenderType.None)
1695                                 ct = GetContraction (s, idx, end);
1696                         // if lv4 exists, it never matches contraction
1697                         if (ct != null) {
1698                                 idx -= ct.Source.Length;
1699                                 if (!noLv4)
1700                                         return false;
1701                                 if (ct.SortKey != null) {
1702                                         for (int i = 0; i < sortkey.Length; i++)
1703                                                 charSortKey [i] = sortkey [i];
1704                                         prev.Code = -1;
1705                                         prev.SortKey = charSortKey;
1706                                 } else {
1707                                         // Here is the core of LAMESPEC
1708                                         // described at the top of the source.
1709                                         int dummy = ct.Replacement.Length - 1;
1710                                         return MatchesBackward (opt, 
1711                                                 ct.Replacement, ref dummy,
1712                                                 dummy, -1, ti, sortkey, noLv4,
1713                                                 checkedFlags, ref prev, charSortKey);
1714                                 }
1715                         } else if (ext == ExtenderType.None) {
1716                                 if (si < 0)
1717                                         si = FilterOptions (s [idx], opt);
1718                                 idx--;
1719                                 bool noMatch = false;
1720                                 charSortKey [0] = Category (si);
1721                                 if (charSortKey [0] == sortkey [0])
1722                                         charSortKey [1] = Level1 (si);
1723                                 else
1724                                         noMatch = true;
1725                                 if (!ignoreNonSpace && charSortKey [1] == sortkey [1])
1726                                         charSortKey [2] = Level2 (si, ext);
1727                                 else if (!ignoreNonSpace)
1728                                         noMatch = true;
1729                                 if (noMatch)
1730                                         return false;
1731                                 charSortKey [3] = Uni.Level3 (si);
1732                                 if (charSortKey [0] != 1)
1733                                         prev.Code = si;
1734                         }
1735                         if (ext == ExtenderType.None) {
1736                                 for (int tmp = cur + 1; tmp < orgStart; tmp++) {
1737                                         if (Category (s [tmp]) != 1)
1738                                                 break;
1739                                         if (ignoreNonSpace)
1740                                                 continue;
1741                                         if (charSortKey [2] == 0)
1742                                                         charSortKey [2] = 2;
1743                                         charSortKey [2] = (byte) (charSortKey [2]
1744                                                 + Level2 (s [tmp], ExtenderType.None));
1745                                 }
1746                         }
1747                         return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4);
1748                 }
1749                 #endregion
1750         }
1751 }