Merge pull request #93 from konrad-kruczynski/dispatcher_timer_fix
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / SimpleCollator.cs
1 //
2 // SimpleCollator.cs : the core collator implementation
3 //
4 // Author:
5 //      Atsushi Enomoto  <atsushi@ximian.com>
6 //
7 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
8 //
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
16 // 
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
19 // 
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 //
28
29 //
30 // Here's a summary for supporting contractions, expansions and diacritical 
31 // remappings.
32 //
33 // Diacritical mapping is a simple tailoring rule that "remaps" diacritical
34 // weight value from one to another. For now it applies to all range of
35 // characters, but at some stage we might need to limit the remapping ranges.
36 //
37 // A Contraction consists of a string (actually char[]) and a sortkey entry
38 // (i.e. byte[]). It indicates that a sequence of characters is interpreted
39 // as a single character that has the mapped sortkey value. There is no
40 // character which goes across "special rules". When the collator encountered
41 // such a sequence of characters, it just returns the sortkey value without
42 // further replacement.
43 //
44 // Since it is still likely to happen that a contraction sequence matches
45 // other character than the identical sequence (depending on CompareOptions
46 // and of course, defined sortkey value itself), comparison cannot be done
47 // at source char[] level.
48 //
49 // In IndexOf(), the first character in the target string or the target char
50 // itself is turned into sortkey bytes. If the character has a contraction and
51 // that is sortkey map, then it is used instead. If the contraction exists and
52 // that is replacement map, then the first character of the replacement string
53 // is searched instead. IndexOf() always searches only for the top character,
54 // and if it returned negative value, then it returns -1. Otherwise, it then
55 // tries IsPrefix() from that location. If it returns true, then it returns
56 // the index.
57 //
58 // LAMESPEC: IndexOf() is lame as a whole API. It never matches in the middle
59 // of expansion and there is no proper way to return such indexes within
60 // a single int return value.
61 //
62 // For example, try below in .NET:
63 //      IndexOf("\u00E6", "a")
64 //      IndexOf("\u00E6", "e")
65 //
66
67
68 using System;
69 using System.Collections;
70 using System.Globalization;
71
72 using Uni = Mono.Globalization.Unicode.MSCompatUnicodeTable;
73 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
74 using COpt = System.Globalization.CompareOptions;
75
76 namespace Mono.Globalization.Unicode
77 {
78         internal class SimpleCollator
79         {
80                 // this environment variable is for debugging quick check.
81 #pragma warning disable 169, 414
82                 static bool QuickCheckDisabled =
83                         Environment.internalGetEnvironmentVariable (
84                         "MONO_COLLATION_QUICK_CHECK_DISABLED") == "yes";
85 #pragma warning restore 169, 414                
86
87                 unsafe internal struct Context
88                 {
89                         public Context (CompareOptions opt, byte* alwaysMatchFlags, byte* neverMatchFlags, byte* buffer1, byte* buffer2, byte* prev1, bool quickCheckPossible)
90                         {
91                                 Option = opt;
92                                 AlwaysMatchFlags = alwaysMatchFlags;
93                                 NeverMatchFlags = neverMatchFlags;
94                                 Buffer1 = buffer1;
95                                 Buffer2 = buffer2;
96                                 PrevSortKey = prev1;
97                                 PrevCode = -1;
98                                 QuickCheckPossible = quickCheckPossible;
99                         }
100
101                         public readonly CompareOptions Option;
102                         public readonly byte* NeverMatchFlags;
103                         public readonly byte* AlwaysMatchFlags;
104                         public byte* Buffer1;
105                         public byte* Buffer2;
106                         public int PrevCode;
107                         public byte* PrevSortKey;
108                         public readonly bool QuickCheckPossible;
109
110                         public void ClearPrevInfo ()
111                         {
112                                 PrevCode = -1;
113                                 PrevSortKey = null;
114                         }
115                 }
116
117                 unsafe struct PreviousInfo
118                 {
119                         public int Code;
120                         public byte* SortKey;
121
122                         public PreviousInfo (bool dummy)
123                         {
124                                 Code = -1;
125                                 SortKey = null;
126                         }
127                 }
128
129                 struct Escape
130                 {
131                         public string Source;
132                         public int Index;
133                         public int Start;
134                         public int End;
135                         public int Optional;
136                 }
137
138                 static SimpleCollator invariant =
139                         new SimpleCollator (CultureInfo.InvariantCulture);
140
141                 readonly TextInfo textInfo; // for ToLower().
142                 readonly CodePointIndexer cjkIndexer;
143                 readonly Contraction [] contractions;
144                 readonly Level2Map [] level2Maps;
145                 // This flag marks characters as "unsafe", where the character
146                 // could be used as part of a contraction (whose length > 1).
147                 readonly byte [] unsafeFlags;
148
149                 unsafe readonly byte* cjkCatTable;
150                 unsafe readonly byte* cjkLv1Table;
151                 unsafe readonly byte* cjkLv2Table;
152                 readonly CodePointIndexer cjkLv2Indexer;
153                 readonly int lcid;
154                 readonly bool frenchSort;
155
156
157                 const int UnsafeFlagLength = 0x300 / 8;
158
159 //              readonly byte [] contractionFlags = new byte [16];
160
161                 // This array is internally used inside IndexOf() to store
162                 // "no need to check" ASCII characters.
163                 //
164                 // Now that it should be thread safe, this array is allocated
165                 // at every time.
166 //              byte [] neverMatchFlags = new byte [128 / 8];
167
168                 #region .ctor() and split functions
169
170                 public SimpleCollator (CultureInfo culture)
171                 {
172                         lcid = culture.LCID;
173                         textInfo = culture.TextInfo;
174
175                         unsafe {
176                                 SetCJKTable (culture, ref cjkIndexer,
177                                         ref cjkCatTable, ref cjkLv1Table,
178                                         ref cjkLv2Indexer, ref cjkLv2Table);
179                         }
180
181                         // Get tailoring info
182                         TailoringInfo t = null;
183                         for (CultureInfo ci = culture; ci.LCID != 127; ci = ci.Parent) {
184                                 t = Uni.GetTailoringInfo (ci.LCID);
185                                 if (t != null)
186                                         break;
187                         }
188                         if (t == null) // then use invariant
189                                 t = Uni.GetTailoringInfo (127);
190
191                         frenchSort = t.FrenchSort;
192                         Uni.BuildTailoringTables (culture, t, ref contractions,
193                                 ref level2Maps);
194                         unsafeFlags = new byte [UnsafeFlagLength];
195                         foreach (Contraction c in contractions) {
196                                 if (c.Source.Length > 1)
197                                         foreach (char ch in c.Source)
198                                                 unsafeFlags [(int) ch / 8 ]
199                                                         |= (byte) (1 << ((int) ch & 7));
200 //                              for (int i = 0; i < c.Source.Length; i++) {
201 //                                      int ch = c.Source [i];
202 //                                      if (ch > 127)
203 //                                              continue;
204 //                                      contractionFlags [ch / 8] |= (byte) (1 << (ch & 7));
205 //                              }
206                         }
207                         if (lcid != 127)
208                                 foreach (Contraction c in invariant.contractions) {
209                                         if (c.Source.Length > 1)
210                                                 foreach (char ch in c.Source)
211                                                         unsafeFlags [(int) ch / 8 ] 
212                                                                 |= (byte) (1 << ((int) ch & 7));
213 //                                      for (int i = 0; i < c.Source.Length; i++) {
214 //                                              int ch = c.Source [i];
215 //                                              if (ch > 127)
216 //                                                      continue;
217 //                                              contractionFlags [ch / 8] |= (byte) (1 << (ch & 7));
218 //                                      }
219                                 }
220
221                         // FIXME: Since tailorings are mostly for latin
222                         // (and in some cases Cyrillic) characters, it would
223                         // be much better for performance to store "start 
224                         // indexes" for > 370 (culture-specific letters).
225
226 /*
227 // dump tailoring table
228 Console.WriteLine ("******** building table for {0} : contractions - {1} diacritical - {2}",
229 culture.LCID, contractions.Length, level2Maps.Length);
230 foreach (Contraction c in contractions) {
231 foreach (char cc in c.Source)
232 Console.Write ("{0:X4} ", (int) cc);
233 Console.WriteLine (" -> '{0}'", c.Replacement);
234 }
235 */
236                 }
237
238                 unsafe private void SetCJKTable (
239                         CultureInfo culture, ref CodePointIndexer cjkIndexer,
240                         ref byte* catTable, ref byte* lv1Table,
241                         ref CodePointIndexer lv2Indexer, ref byte* lv2Table)
242                 {
243                         string name = GetNeutralCulture (culture).Name;
244
245                         Uni.FillCJK (name, ref cjkIndexer, ref catTable,
246                                 ref lv1Table, ref lv2Indexer, ref lv2Table);
247                 }
248
249                 static CultureInfo GetNeutralCulture (CultureInfo info)
250                 {
251                         CultureInfo ret = info;
252                         while (ret.Parent != null && ret.Parent.LCID != 127)
253                                 ret = ret.Parent;
254                         return ret;
255                 }
256
257                 #endregion
258
259                 unsafe byte Category (int cp)
260                 {
261                         if (cp < 0x3000 || cjkCatTable == null)
262                                 return Uni.Category (cp);
263                         int idx = cjkIndexer.ToIndex (cp);
264                         return idx < 0 ? Uni.Category (cp) : cjkCatTable [idx];
265                 }
266
267                 unsafe byte Level1 (int cp)
268                 {
269                         if (cp < 0x3000 || cjkLv1Table == null)
270                                 return Uni.Level1 (cp);
271                         int idx = cjkIndexer.ToIndex (cp);
272                         return idx < 0 ? Uni.Level1 (cp) : cjkLv1Table [idx];
273                 }
274
275                 unsafe byte Level2 (int cp, ExtenderType ext)
276                 {
277                         if (ext == ExtenderType.Buggy)
278                                 return 5;
279                         else if (ext == ExtenderType.Conditional)
280                                 return 0;
281
282                         if (cp < 0x3000 || cjkLv2Table == null)
283                                 return Uni.Level2 (cp);
284                         int idx = cjkLv2Indexer.ToIndex (cp);
285                         byte ret = idx < 0 ? (byte) 0 : cjkLv2Table [idx];
286                         if (ret != 0)
287                                 return ret;
288                         ret = Uni.Level2 (cp);
289                         if (level2Maps.Length == 0)
290                                 return ret;
291                         for (int i = 0; i < level2Maps.Length; i++) {
292                                 if (level2Maps [i].Source == ret)
293                                         return level2Maps [i].Replace;
294                                 else if (level2Maps [i].Source > ret)
295                                         break;
296                         }
297                         return ret;
298                 }
299
300                 static bool IsHalfKana (int cp, COpt opt)
301                 {
302                         return (opt & COpt.IgnoreWidth) != 0 ||
303                                 Uni.IsHalfWidthKana ((char) cp);
304                 }
305
306                 Contraction GetContraction (string s, int start, int end)
307                 {
308 //                      int si = s [start];
309 //                      if (si < 128 && (contractionFlags [si / 8] & (1 << (si & 7))) == 0)
310 //                              return null;
311                         Contraction c = GetContraction (s, start, end, contractions);
312                         if (c != null || lcid == 127)
313                                 return c;
314                         return GetContraction (s, start, end, invariant.contractions);
315                 }
316
317                 Contraction GetContraction (string s, int start, int end, Contraction [] clist)
318                 {
319                         for (int i = 0; i < clist.Length; i++) {
320                                 Contraction ct = clist [i];
321                                 int diff = ct.Source [0] - s [start];
322                                 if (diff > 0)
323                                         return null; // it's already sorted
324                                 else if (diff < 0)
325                                         continue;
326                                 char [] chars = ct.Source;
327                                 if (end - start < chars.Length)
328                                         continue;
329                                 bool match = true;
330                                 for (int n = 0; n < chars.Length; n++)
331                                         if (s [start + n] != chars [n]) {
332                                                 match = false;
333                                                 break;
334                                         }
335                                 if (match)
336                                         return ct;
337                         }
338                         return null;
339                 }
340
341                 Contraction GetTailContraction (string s, int start, int end)
342                 {
343 //                      int si = s [end - 1];
344 //                      if (si < 128 && (contractionFlags [si / 8] & (1 << (si & 7))) == 0)
345 //                              return null;
346                         Contraction c = GetTailContraction (s, start, end, contractions);
347                         if (c != null || lcid == 127)
348                                 return c;
349                         return GetTailContraction (s, start, end, invariant.contractions);
350                 }
351
352                 Contraction GetTailContraction (string s, int start, int end, Contraction [] clist)
353                 {
354                         if (start == end || end < -1 || start >= s.Length || s.Length <= end + 1)
355                                 throw new SystemException (String.Format ("MONO internal error. Failed to get TailContraction. start = {0} end = {1} string = '{2}'", start, end, s));
356                         for (int i = 0; i < clist.Length; i++) {
357                                 Contraction ct = clist [i];
358                                 char [] chars = ct.Source;
359                                 if (chars.Length > start - end)
360                                         continue;
361                                 if (chars [chars.Length - 1] != s [start])
362                                         continue;
363
364                                 bool match = true;
365                                 for (int n = 0, spos = start - chars.Length + 1;
366                                      n < chars.Length;
367                                      n++, spos++) {
368                                         if (s [spos] != chars [n]) {
369                                                 match = false;
370                                                 break;
371                                         }
372                                 }
373                                 if (match)
374                                         return ct;
375                         }
376                         return null;
377                 }
378
379                 Contraction GetContraction (char c)
380                 {
381                         Contraction ct = GetContraction (c, contractions);
382                         if (ct != null || lcid == 127)
383                                 return ct;
384                         return GetContraction (c, invariant.contractions);
385                 }
386
387                 Contraction GetContraction (char c, Contraction [] clist)
388                 {
389                         for (int i = 0; i < clist.Length; i++) {
390                                 Contraction ct = clist [i];
391                                 if (ct.Source [0] > c)
392                                         return null; // it's already sorted
393                                 if (ct.Source [0] == c && ct.Source.Length == 1)
394                                         return ct;
395                         }
396                         return null;
397                 }
398
399                 int FilterOptions (int i, COpt opt)
400                 {
401                         if ((opt & COpt.IgnoreWidth) != 0) {
402                                 int x = Uni.ToWidthCompat (i);
403                                 if (x != 0)
404                                         i = x;
405                         }
406                         if ((opt & COpt.OrdinalIgnoreCase) != 0)
407                                 i = textInfo.ToLower ((char) i);
408                         if ((opt & COpt.IgnoreCase) != 0)
409                                 i = textInfo.ToLower ((char) i);
410                         if ((opt & COpt.IgnoreKanaType) != 0)
411                                 i = Uni.ToKanaTypeInsensitive (i);
412                         return i;
413                 }
414
415                 enum ExtenderType {
416                         None,
417                         Simple,
418                         Voiced,
419                         Conditional,
420                         Buggy,
421                 }
422
423                 ExtenderType GetExtenderType (int i)
424                 {
425                         // LAMESPEC: Windows expects true for U+3005, but 
426                         // sometimes it does not represent to repeat just
427                         // one character.
428                         // Windows also expects true for U+3031 and U+3032,
429                         // but they should *never* repeat one character.
430
431                         // U+2015 becomes an extender only when it is Japanese
432                         if (i == 0x2015)
433                                 return lcid == 16 ? ExtenderType.Conditional :
434                                         ExtenderType.None;
435
436                         if (i < 0x3005 || i > 0xFF70)
437                                 return ExtenderType.None;
438                         if (i >= 0xFE7C) {
439                                 switch (i) {
440                                 case 0xFE7C:
441                                 case 0xFE7D:
442                                         return ExtenderType.Simple;
443                                 case 0xFF70:
444                                         return ExtenderType.Conditional;
445                                 case 0xFF9E:
446                                 case 0xFF9F:
447                                         return ExtenderType.Voiced;
448                                 }
449                         }
450                         if (i > 0x30FE)
451                                 return ExtenderType.None;
452                         switch (i) {
453                         case 0x3005: // LAMESPEC: see above.
454                                 return ExtenderType.Buggy;
455                         case 0x3031: // LAMESPEC: see above.
456                         case 0x3032: // LAMESPEC: see above.
457                         case 0x309D:
458                         case 0x30FD:
459                                 return ExtenderType.Simple;
460                         case 0x309E:
461                         case 0x30FE:
462                                 return ExtenderType.Voiced;
463                         case 0x30FC:
464                                 return ExtenderType.Conditional;
465                         default:
466                                 return ExtenderType.None;
467                         }
468                 }
469
470                 static byte ToDashTypeValue (ExtenderType ext, COpt opt)
471                 {
472                         if ((opt & COpt.IgnoreNonSpace) != 0) // LAMESPEC: huh, why?
473                                 return 3;
474                         switch (ext) {
475                         case ExtenderType.None:
476                                 return 3;
477                         case ExtenderType.Conditional:
478                                 return 5;
479                         default:
480                                 return 4;
481                         }
482                 }
483
484                 int FilterExtender (int i, ExtenderType ext, COpt opt)
485                 {
486                         if (ext == ExtenderType.Conditional &&
487                                 Uni.HasSpecialWeight ((char) i)) {
488                                 bool half = IsHalfKana ((char) i, opt);
489                                 bool katakana = !Uni.IsHiragana ((char) i);
490                                 switch (Level1 (i) & 7) {
491                                 case 2:
492                                         return half ? 0xFF71 : katakana ? 0x30A2 : 0x3042;
493                                 case 3:
494                                         return half ? 0xFF72 : katakana ? 0x30A4 : 0x3044;
495                                 case 4:
496                                         return half ? 0xFF73 : katakana ? 0x30A6 : 0x3046;
497                                 case 5:
498                                         return half ? 0xFF74 : katakana ? 0x30A8 : 0x3048;
499                                 case 6:
500                                         return half ? 0xFF75 : katakana ? 0x30AA : 0x304A;
501                                 }
502                         }
503                         return i;
504                 }
505
506                 static bool IsIgnorable (int i, COpt opt)
507                 {
508                         return Uni.IsIgnorable (i, (byte) (1 +
509                                 ((opt & COpt.IgnoreSymbols) != 0 ? 2 : 0) +
510                                 ((opt & COpt.IgnoreNonSpace) != 0 ? 4 : 0)));
511                         
512                 }
513
514                 bool IsSafe (int i)
515                 {
516                         return i / 8 >= unsafeFlags.Length ? true : (unsafeFlags [i / 8] & (1 << (i % 8))) == 0;
517                 }
518
519                 #region GetSortKey()
520
521                 public SortKey GetSortKey (string s)
522                 {
523                         return GetSortKey (s, CompareOptions.None);
524                 }
525
526                 public SortKey GetSortKey (string s, CompareOptions options)
527                 {
528                         return GetSortKey (s, 0, s.Length, options);
529                 }
530
531                 public SortKey GetSortKey (string s, int start, int length, CompareOptions options)
532                 {
533                         SortKeyBuffer buf = new SortKeyBuffer (lcid);
534                         buf.Initialize (options, lcid, s, frenchSort);
535                         int end = start + length;
536                         GetSortKey (s, start, end, buf, options);
537                         return buf.GetResultAndReset ();
538                 }
539
540                 unsafe void GetSortKey (string s, int start, int end,
541                         SortKeyBuffer buf, CompareOptions opt)
542                 {
543                         byte* prevbuf = stackalloc byte [4];
544                         ClearBuffer (prevbuf, 4);
545                         Context ctx = new Context (opt, null, null, null, null, prevbuf, false);
546
547                         for (int n = start; n < end; n++) {
548                                 int i = s [n];
549
550                                 ExtenderType ext = GetExtenderType (i);
551                                 if (ext != ExtenderType.None) {
552                                         i = FilterExtender (ctx.PrevCode, ext, opt);
553                                         if (i >= 0)
554                                                 FillSortKeyRaw (i, ext, buf, opt);
555                                         else if (ctx.PrevSortKey != null) {
556                                                 byte* b = ctx.PrevSortKey;
557                                                 buf.AppendNormal (
558                                                         b [0],
559                                                         b [1],
560                                                         b [2] != 1 ? b [2] : Level2 (i, ext),
561                                                         b [3] != 1 ? b [3] : Uni.Level3 (i));
562                                         }
563                                         // otherwise do nothing.
564                                         // (if the extender is the first char
565                                         // in the string, then just ignore.)
566                                         continue;
567                                 }
568
569                                 if (IsIgnorable (i, opt))
570                                         continue;
571                                 i = FilterOptions (i, opt);
572
573                                 Contraction ct = GetContraction (s, n, end);
574                                 if (ct != null) {
575                                         if (ct.Replacement != null) {
576                                                 GetSortKey (ct.Replacement, 0, ct.Replacement.Length, buf, opt);
577                                         } else {
578                                                 byte* b = ctx.PrevSortKey;
579                                                 for (int bi = 0; bi < ct.SortKey.Length; bi++)
580                                                         b [bi] = ct.SortKey [bi];
581                                                 buf.AppendNormal (
582                                                         b [0],
583                                                         b [1],
584                                                         b [2] != 1 ? b [2] : Level2 (i, ext),
585                                                         b [3] != 1 ? b [3] : Uni.Level3 (i));
586                                                 ctx.PrevCode = -1;
587                                         }
588                                         n += ct.Source.Length - 1;
589                                 }
590                                 else {
591                                         if (!Uni.IsIgnorableNonSpacing (i))
592                                                 ctx.PrevCode = i;
593                                         FillSortKeyRaw (i, ExtenderType.None, buf, opt);
594                                 }
595                         }
596                 }
597
598                 void FillSortKeyRaw (int i, ExtenderType ext,
599                         SortKeyBuffer buf, CompareOptions opt)
600                 {
601                         if (0x3400 <= i && i <= 0x4DB5) {
602                                 int diff = i - 0x3400;
603                                 buf.AppendCJKExtension (
604                                         (byte) (0x10 + diff / 254),
605                                         (byte) (diff % 254 + 2));
606                                 return;
607                         }
608
609                         UnicodeCategory uc = char.GetUnicodeCategory ((char) i);
610                         switch (uc) {
611                         case UnicodeCategory.PrivateUse:
612                                 int diff = i - 0xE000;
613                                 buf.AppendNormal (
614                                         (byte) (0xE5 + diff / 254),
615                                         (byte) (diff % 254 + 2),
616                                         0,
617                                         0);
618                                 return;
619                         case UnicodeCategory.Surrogate:
620                                 FillSurrogateSortKeyRaw (i, buf);
621                                 return;
622                         }
623
624                         byte level2 = Level2 (i, ext);
625                         if (Uni.HasSpecialWeight ((char) i)) {
626                                 byte level1 = Level1 (i);
627                                 buf.AppendKana (
628                                         Category (i),
629                                         level1,
630                                         level2,
631                                         Uni.Level3 (i),
632                                         Uni.IsJapaneseSmallLetter ((char) i),
633                                         ToDashTypeValue (ext, opt),
634                                         !Uni.IsHiragana ((char) i),
635                                         IsHalfKana ((char) i, opt)
636                                         );
637                                 if ((opt & COpt.IgnoreNonSpace) == 0 && ext == ExtenderType.Voiced)
638                                         // Append voice weight
639                                         buf.AppendNormal (1, 1, 1, 0);
640                         }
641                         else
642                                 buf.AppendNormal (
643                                         Category (i),
644                                         Level1 (i),
645                                         level2,
646                                         Uni.Level3 (i));
647                 }
648
649                 void FillSurrogateSortKeyRaw (int i, SortKeyBuffer buf)
650                 {
651                         int diffbase = 0;
652                         int segment = 0;
653                         byte lower = 0;
654
655                         if (i < 0xD840) {
656                                 diffbase = 0xD800;
657                                 segment = 0x41;
658                                 lower = (byte) ((i == 0xD800) ? 0x3E : 0x3F);
659                         } else if (0xD840 <= i && i < 0xD880) {
660                                 diffbase = 0xD840;
661                                 segment = 0xF2;
662                                 lower = 0x3E;
663                         } else if (0xDB80 <= i && i < 0xDC00) {
664                                 diffbase = 0xDB80 - 0x40;
665                                 segment = 0xFE;
666                                 lower = 0x3E;
667                         } else {
668                                 diffbase = 0xDC00 - 0xF8 + 2;
669                                 segment = 0x41;
670                                 lower = 0x3F;
671                         }
672                         int diff = i - diffbase;
673
674                         buf.AppendNormal (
675                                 (byte) (segment + diff / 254),
676                                 (byte) (diff % 254 + 2),
677                                 lower,
678                                 lower);
679                 }
680
681                 #endregion
682
683                 #region Compare()
684
685                 public int Compare (string s1, string s2)
686                 {
687                         return Compare (s1, s2, CompareOptions.None);
688                 }
689
690                 public int Compare (string s1, string s2, CompareOptions options)
691                 {
692                         return Compare (s1, 0, s1.Length, s2, 0, s2.Length, options);
693                 }
694
695                 private int CompareOrdinal (string s1, int idx1, int len1,
696                         string s2, int idx2, int len2)
697                 {
698                         int min = len1 < len2 ? len1 : len2;
699                         int end1 = idx1 + min;
700                         int end2 = idx2 + min;
701                         if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
702                                 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
703                         for (int i1 = idx1, i2 = idx2;
704                                 i1 < end1 && i2 < end2; i1++, i2++)
705                                 if (s1 [i1] != s2 [i2])
706                                         return s1 [i1] - s2 [i2];
707                         return len1 == len2 ? 0 :
708                                 len1 == min ? - 1 : 1;
709                 }
710
711                 // mostly equivalent to CompareOrdinal, but the return value is
712                 // not based on codepoints.
713                 private int CompareQuick (string s1, int idx1, int len1,
714                         string s2, int idx2, int len2, out bool sourceConsumed,
715                         out bool targetConsumed, bool immediateBreakup)
716                 {
717                         sourceConsumed = false;
718                         targetConsumed = false;
719                         int min = len1 < len2 ? len1 : len2;
720                         int end1 = idx1 + min;
721                         int end2 = idx2 + min;
722                         if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
723                                 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
724                         for (int i1 = idx1, i2 = idx2;
725                                 i1 < end1 && i2 < end2; i1++, i2++)
726                                 if (s1 [i1] != s2 [i2]) {
727                                         if (immediateBreakup)
728                                                 return -1;
729                                         int ret = Category (s1 [i1]) - Category (s2 [i2]);
730                                         if (ret == 0)
731                                                 ret = Level1 (s1 [i1]) - Level1 (s2 [i2]);
732                                         // no level2 and 4
733                                         if (ret == 0)
734                                                 ret = Uni.Level3 (s1 [i1]) - Uni.Level3 (s2 [i2]);
735                                         if (ret == 0)
736                                                 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. '{0}' {2} {3} '{1}' {4} {5}", s1, s2, idx1, end1, idx2, end2));
737                                         return ret;
738                                 }
739                         sourceConsumed = len1 <= len2;
740                         targetConsumed = len1 >= len2;
741                         return len1 == len2 ? 0 :
742                                 len1 == min ? - 1 : 1;
743                 }
744
745                 private int CompareOrdinalIgnoreCase (string s1, int idx1, int len1,
746                         string s2, int idx2, int len2)
747                 {
748                         int min = len1 < len2 ? len1 : len2;
749                         int end1 = idx1 + min;
750                         int end2 = idx2 + min;
751                         if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
752                                 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
753                         TextInfo ti = invariant.textInfo;
754                         for (int i1 = idx1, i2 = idx2;
755                                 i1 < end1 && i2 < end2; i1++, i2++)
756                                 if (ti.ToLower (s1 [i1]) != ti.ToLower (s2 [i2]))
757                                         return ti.ToLower (s1 [i1]) - ti.ToLower (s2 [i2]);
758                         return len1 == len2 ? 0 :
759                                 len1 == min ? - 1 : 1;
760                 }
761
762                 public unsafe int Compare (string s1, int idx1, int len1,
763                         string s2, int idx2, int len2, CompareOptions options)
764                 {
765                         // quick equality check
766                         if (idx1 == idx2 && len1 == len2 &&
767                                 Object.ReferenceEquals (s1, s2))
768                                 return 0;
769                         if (options == CompareOptions.Ordinal)
770                                 return CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
771                         if (options == CompareOptions.OrdinalIgnoreCase)
772                                 return CompareOrdinalIgnoreCase (s1, idx1, len1, s2, idx2, len2);
773
774 #if false // stable easy version, depends on GetSortKey().
775                         SortKey sk1 = GetSortKey (s1, idx1, len1, options);
776                         SortKey sk2 = GetSortKey (s2, idx2, len2, options);
777                         byte [] d1 = sk1.KeyData;
778                         byte [] d2 = sk2.KeyData;
779                         int len = d1.Length > d2.Length ? d2.Length : d1.Length;
780                         for (int i = 0; i < len; i++)
781                                 if (d1 [i] != d2 [i])
782                                         return d1 [i] < d2 [i] ? -1 : 1;
783                         return d1.Length == d2.Length ? 0 : d1.Length < d2.Length ? -1 : 1;
784 #else
785                         byte* sk1 = stackalloc byte [4];
786                         byte* sk2 = stackalloc byte [4];
787                         ClearBuffer (sk1, 4);
788                         ClearBuffer (sk2, 4);
789                         Context ctx = new Context (options, null, null, sk1, sk2, null,
790                                 QuickCheckPossible (s1, idx1, idx1 + len1, s2, idx2, idx2 + len2));
791
792                         bool dummy, dummy2;
793                         int ret = CompareInternal (s1, idx1, len1, s2, idx2, len2, out dummy, out dummy2, true, false, ref ctx);
794                         return ret == 0 ? 0 : ret < 0 ? -1 : 1;
795 #endif
796                 }
797
798                 unsafe void ClearBuffer (byte* buffer, int size)
799                 {
800                         for (int i = 0; i < size; i++)
801                                 buffer [i] = 0;
802                 }
803
804                 bool QuickCheckPossible (string s1, int idx1, int end1,
805                         string s2, int idx2, int end2)
806                 {
807 #if true
808                         // looks like it is buggy and inefficient, so just disable it.
809                         return false;
810 #else
811                         if (QuickCheckDisabled)
812                                 return false;
813 //                      if (s1.Length > 100 || s2.Length > 100)
814 //                              return false;
815                         for (int i = idx1; i < end1; i++)
816                                 if (s1 [i] < 0x20 && (s1 [i] < '\x9' || s1 [i] > '\xD') || s1 [i] >= 0x80 || s1 [i] == '-' || s1 [i] == '\'')
817                                         return false;
818                         for (int i = idx2; i < end2; i++)
819                                 if (s2 [i] < 0x20 && (s2 [i] < '\x9' || s2 [i] > '\xD') || s2 [i] >= 0x80 || s2 [i] == '-' || s2 [i] == '\'')
820                                         return false;
821                         return true;
822 #endif
823                 }
824
825                 unsafe int CompareInternal (string s1, int idx1, int len1, string s2,
826                         int idx2, int len2,
827                         out bool targetConsumed, out bool sourceConsumed,
828                         bool skipHeadingExtenders, bool immediateBreakup,
829                         ref Context ctx)
830                 {
831                         COpt opt = ctx.Option;
832                         int start1 = idx1;
833                         int start2 = idx2;
834                         int end1 = idx1 + len1;
835                         int end2 = idx2 + len2;
836                         targetConsumed = false;
837                         sourceConsumed = false;
838                         PreviousInfo prev2 = new PreviousInfo (false);
839
840                         if (opt == CompareOptions.None && ctx.QuickCheckPossible)
841                                 return CompareQuick (s1, idx1, len1, s2, idx2, len2, out sourceConsumed, out targetConsumed, immediateBreakup);
842
843                         // It holds final result that comes from the comparison
844                         // at level 2 or lower. Even if Compare() found the
845                         // difference at level 2 or lower, it still has to
846                         // continue level 1 comparison. FinalResult is used
847                         // when there was no further differences.
848                         int finalResult = 0;
849                         // It holds the comparison level to do. It starts from
850                         // 5, and becomes 1 when we do primary-only comparison.
851                         int currentLevel = 5;
852
853                         int lv5At1 = -1;
854                         int lv5At2 = -1;
855                         int lv5Value1 = 0;
856                         int lv5Value2 = 0;
857
858                         // Skip heading extenders
859                         if (skipHeadingExtenders) {
860                                 for (; idx1 < end1; idx1++)
861                                         if (GetExtenderType (s1 [idx1]) == ExtenderType.None)
862                                                 break;
863                                 for (; idx2 < end2; idx2++)
864                                         if (GetExtenderType (s2 [idx2]) == ExtenderType.None)
865                                                 break;
866                         }
867
868                         ExtenderType ext1 = ExtenderType.None;
869                         ExtenderType ext2 = ExtenderType.None;
870
871                         int quickCheckPos1 = idx1;
872                         int quickCheckPos2 = idx2;
873                         bool stringSort = (opt & COpt.StringSort) != 0;
874                         bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
875                         Escape escape1 = new Escape ();
876                         Escape escape2 = new Escape ();
877
878                         while (true) {
879                                 for (; idx1 < end1; idx1++)
880                                         if (!IsIgnorable (s1 [idx1], opt))
881                                                 break;
882                                 for (; idx2 < end2; idx2++)
883                                         if (!IsIgnorable (s2 [idx2], opt))
884                                                 break;
885
886                                 if (idx1 >= end1) {
887                                         if (escape1.Source == null)
888                                                 break;
889                                         s1 = escape1.Source;
890                                         start1 = escape1.Start;
891                                         idx1 = escape1.Index;
892                                         end1 = escape1.End;
893                                         quickCheckPos1 = escape1.Optional;
894                                         escape1.Source = null;
895                                         continue;
896                                 }
897                                 if (idx2 >= end2) {
898                                         if (escape2.Source == null)
899                                                 break;
900                                         s2 = escape2.Source;
901                                         start2 = escape2.Start;
902                                         idx2 = escape2.Index;
903                                         end2 = escape2.End;
904                                         quickCheckPos2 = escape2.Optional;
905                                         escape2.Source = null;
906                                         continue;
907                                 }
908 #if true
909 // If comparison is unstable, then this part is one of the most doubtful part.
910 // Here we run quick codepoint comparison and run back to "stable" area to
911 // compare characters.
912
913                                 // Strictly to say, even the first character
914                                 // could be compared here, but it messed
915                                 // backward step, so I just avoided mess.
916                                 if (quickCheckPos1 < idx1 && quickCheckPos2 < idx2) {
917                                         while (idx1 < end1 && idx2 < end2 &&
918                                                 s1 [idx1] == s2 [idx2]) {
919                                                 idx1++;
920                                                 idx2++;
921                                         }
922                                         if (idx1 == end1 || idx2 == end2)
923                                                 continue; // check replacement
924
925                                         int backwardEnd1 = quickCheckPos1;
926                                         int backwardEnd2 = quickCheckPos2;
927                                         quickCheckPos1 = idx1;
928                                         quickCheckPos2 = idx2;
929
930                                         idx1--;
931                                         idx2--;
932
933                                         for (;idx1 > backwardEnd1; idx1--)
934                                                 if (Category (s1 [idx1]) != 1)
935                                                         break;
936                                         for (;idx2 > backwardEnd2; idx2--)
937                                                 if (Category (s2 [idx2]) != 1)
938                                                         break;
939                                         for (;idx1 > backwardEnd1; idx1--)
940                                                 if (IsSafe (s1 [idx1]))
941                                                         break;
942                                         for (;idx2 > backwardEnd2; idx2--)
943                                                 if (IsSafe (s2 [idx2]))
944                                                         break;
945                                 }
946 #endif
947
948                                 int cur1 = idx1;
949                                 int cur2 = idx2;
950                                 byte* sk1 = null;
951                                 byte* sk2 = null;
952                                 int i1 = FilterOptions (s1 [idx1], opt);
953                                 int i2 = FilterOptions (s2 [idx2], opt);
954                                 bool special1 = false;
955                                 bool special2 = false;
956
957                                 // If current character is an expander, then
958                                 // repeat the previous character.
959                                 ext1 = GetExtenderType (i1);
960                                 if (ext1 != ExtenderType.None) {
961                                         if (ctx.PrevCode < 0) {
962                                                 if (ctx.PrevSortKey == null) {
963                                                         // nothing to extend
964                                                         idx1++;
965                                                         continue;
966                                                 }
967                                                 sk1 = ctx.PrevSortKey;
968                                         }
969                                         else
970                                                 i1 = FilterExtender (ctx.PrevCode, ext1, opt);
971                                 }
972                                 ext2 = GetExtenderType (i2);
973                                 if (ext2 != ExtenderType.None) {
974                                         if (prev2.Code < 0) {
975                                                 if (prev2.SortKey == null) {
976                                                         // nothing to extend
977                                                         idx2++;
978                                                         continue;
979                                                 }
980                                                 sk2 = prev2.SortKey;
981                                         }
982                                         else
983                                                 i2 = FilterExtender (prev2.Code, ext2, opt);
984                                 }
985
986                                 byte cat1 = Category (i1);
987                                 byte cat2 = Category (i2);
988
989                                 // Handle special weight characters
990                                 if (cat1 == 6) {
991                                         if (!stringSort && currentLevel == 5) {
992                                                 lv5At1 = escape1.Source != null ?
993                                                         escape1.Index - escape1.Start :
994                                                         cur1 - start1;
995                                                 // here Windows has a bug that it does
996                                                 // not consider thirtiary weight.
997                                                 lv5Value1 = Level1 (i1) << 8 + Uni.Level3 (i1);
998                                         }
999                                         ctx.PrevCode = i1;
1000                                         idx1++;
1001                                 }
1002                                 if (cat2 == 6) {
1003                                         if (!stringSort && currentLevel == 5) {
1004                                                 lv5At2 = escape2.Source != null ?
1005                                                         escape2.Index - escape2.Start :
1006                                                         cur2 - start2;
1007                                                 // here Windows has a bug that it does
1008                                                 // not consider thirtiary weight.
1009                                                 lv5Value2 = Level1 (i2) << 8 + Uni.Level3 (i2);
1010                                         }
1011                                         prev2.Code = i2;
1012                                         idx2++;
1013                                 }
1014                                 if (cat1 == 6 || cat2 == 6) {
1015                                         if (currentLevel == 5) {
1016                                                 if (lv5Value1 == lv5Value2) {
1017                                                         // There is not really difference here.
1018                                                         lv5At1 = lv5At2 = -1;
1019                                                         lv5Value1 = lv5Value2 = 0;
1020                                                 }
1021                                                 else
1022                                                         currentLevel = 4;
1023                                         }
1024                                         continue;
1025                                 }
1026
1027                                 Contraction ct1 = null;
1028                                 if (ext1 == ExtenderType.None)
1029                                         ct1 = GetContraction (s1, idx1, end1);
1030
1031                                 int offset1 = 1;
1032                                 if (sk1 != null)
1033                                         offset1 = 1;
1034                                 else if (ct1 != null) {
1035                                         offset1 = ct1.Source.Length;
1036                                         if (ct1.SortKey != null) {
1037                                                 sk1 = ctx.Buffer1;
1038                                                 for (int i = 0; i < ct1.SortKey.Length; i++)
1039                                                         sk1 [i] = ct1.SortKey [i];
1040                                                 ctx.PrevCode = -1;
1041                                                 ctx.PrevSortKey = sk1;
1042                                         }
1043                                         else if (escape1.Source == null) {
1044                                                 escape1.Source = s1;
1045                                                 escape1.Start = start1;
1046                                                 escape1.Index = cur1 + ct1.Source.Length;
1047                                                 escape1.End = end1;
1048                                                 escape1.Optional = quickCheckPos1;
1049                                                 s1 = ct1.Replacement;
1050                                                 idx1 = 0;
1051                                                 start1 = 0;
1052                                                 end1 = s1.Length;
1053                                                 quickCheckPos1 = 0;
1054                                                 continue;
1055                                         }
1056                                 }
1057                                 else {
1058                                         sk1 = ctx.Buffer1;
1059                                         sk1 [0] = cat1;
1060                                         sk1 [1] = Level1 (i1);
1061                                         if (!ignoreNonSpace && currentLevel > 1)
1062                                                 sk1 [2] = Level2 (i1, ext1);
1063                                         if (currentLevel > 2)
1064                                                 sk1 [3] = Uni.Level3 (i1);
1065                                         if (currentLevel > 3)
1066                                                 special1 = Uni.HasSpecialWeight ((char) i1);
1067                                         if (cat1 > 1)
1068                                                 ctx.PrevCode = i1;
1069                                 }
1070
1071                                 Contraction ct2 = null;
1072                                 if (ext2 == ExtenderType.None)
1073                                         ct2 = GetContraction (s2, idx2, end2);
1074
1075                                 if (sk2 != null)
1076                                         idx2++;
1077                                 else if (ct2 != null) {
1078                                         idx2 += ct2.Source.Length;
1079                                         if (ct2.SortKey != null) {
1080                                                 sk2 = ctx.Buffer2;
1081                                                 for (int i = 0; i < ct2.SortKey.Length; i++)
1082                                                         sk2 [i] = ct2.SortKey [i];
1083                                                 prev2.Code = -1;
1084                                                 prev2.SortKey = sk2;
1085                                         }
1086                                         else if (escape2.Source == null) {
1087                                                 escape2.Source = s2;
1088                                                 escape2.Start = start2;
1089                                                 escape2.Index = cur2 + ct2.Source.Length;
1090                                                 escape2.End = end2;
1091                                                 escape2.Optional = quickCheckPos2;
1092                                                 s2 = ct2.Replacement;
1093                                                 idx2 = 0;
1094                                                 start2 = 0;
1095                                                 end2 = s2.Length;
1096                                                 quickCheckPos2 = 0;
1097                                                 continue;
1098                                         }
1099                                 }
1100                                 else {
1101                                         sk2 = ctx.Buffer2;
1102                                         sk2 [0] = cat2;
1103                                         sk2 [1] = Level1 (i2);
1104                                         if (!ignoreNonSpace && currentLevel > 1)
1105                                                 sk2 [2] = Level2 (i2, ext2);
1106                                         if (currentLevel > 2)
1107                                                 sk2 [3] = Uni.Level3 (i2);
1108                                         if (currentLevel > 3)
1109                                                 special2 = Uni.HasSpecialWeight ((char) i2);
1110                                         if (cat2 > 1)
1111                                                 prev2.Code = i2;
1112                                         idx2++;
1113                                 }
1114
1115                                 // Add offset here so that it does not skip
1116                                 // idx1 while s2 has a replacement.
1117                                 idx1 += offset1;
1118
1119                                 if (!ignoreNonSpace) {
1120                                         // add diacritical marks in s1 here
1121                                         while (idx1 < end1) {
1122                                                 if (Category (s1 [idx1]) != 1)
1123                                                         break;
1124                                                 if (sk1 [2] == 0)
1125                                                         sk1 [2] = 2;
1126                                                 sk1 [2] = (byte) (sk1 [2] + 
1127                                                         Level2 (s1 [idx1], ExtenderType.None));
1128                                                 idx1++;
1129                                         }
1130
1131                                         // add diacritical marks in s2 here
1132                                         while (idx2 < end2) {
1133                                                 if (Category (s2 [idx2]) != 1)
1134                                                         break;
1135                                                 if (sk2 [2] == 0)
1136                                                         sk2 [2] = 2;
1137                                                 sk2 [2] = (byte) (sk2 [2] + 
1138                                                         Level2 (s2 [idx2], ExtenderType.None));
1139                                                 idx2++;
1140                                         }
1141                                 }
1142
1143                                 int ret = sk1 [0] - sk2 [0];
1144                                 ret = ret != 0 ? ret : sk1 [1] - sk2 [1];
1145                                 if (ret != 0)
1146                                         return ret;
1147                                 if (currentLevel == 1)
1148                                         continue;
1149                                 if (!ignoreNonSpace) {
1150                                         ret = sk1 [2] - sk2 [2];
1151                                         if (ret != 0) {
1152                                                 finalResult = ret;
1153                                                 if (immediateBreakup)
1154                                                         return -1; // different
1155                                                 currentLevel = frenchSort ? 2 : 1;
1156                                                 continue;
1157                                         }
1158                                 }
1159                                 if (currentLevel == 2)
1160                                         continue;
1161                                 ret = sk1 [3] - sk2 [3];
1162                                 if (ret != 0) {
1163                                         finalResult = ret;
1164                                         if (immediateBreakup)
1165                                                 return -1; // different
1166                                         currentLevel = 2;
1167                                         continue;
1168                                 }
1169                                 if (currentLevel == 3)
1170                                         continue;
1171                                 if (special1 != special2) {
1172                                         if (immediateBreakup)
1173                                                 return -1; // different
1174                                         finalResult = special1 ? 1 : -1;
1175                                         currentLevel = 3;
1176                                         continue;
1177                                 }
1178                                 if (special1) {
1179                                         ret = CompareFlagPair (
1180                                                 !Uni.IsJapaneseSmallLetter ((char) i1),
1181                                                 !Uni.IsJapaneseSmallLetter ((char) i2));
1182                                         ret = ret != 0 ? ret :
1183                                                 ToDashTypeValue (ext1, opt) -
1184                                                 ToDashTypeValue (ext2, opt);
1185                                         ret = ret != 0 ? ret : CompareFlagPair (
1186                                                 Uni.IsHiragana ((char) i1),
1187                                                 Uni.IsHiragana ((char) i2));
1188                                         ret = ret != 0 ? ret : CompareFlagPair (
1189                                                 !IsHalfKana ((char) i1, opt),
1190                                                 !IsHalfKana ((char) i2, opt));
1191                                         if (ret != 0) {
1192                                                 if (immediateBreakup)
1193                                                         return -1; // different
1194                                                 finalResult = ret;
1195                                                 currentLevel = 3;
1196                                                 continue;
1197                                         }
1198                                 }
1199                         }
1200
1201                         // If there were only level 3 or lower differences,
1202                         // then we still have to find diacritical differences
1203                         // if any.
1204                         if (!ignoreNonSpace &&
1205                                 finalResult != 0 && currentLevel > 2) {
1206                                 while (idx1 < end1 && idx2 < end2) {
1207                                         if (!Uni.IsIgnorableNonSpacing (s1 [idx1]))
1208                                                 break;
1209                                         if (!Uni.IsIgnorableNonSpacing (s2 [idx2]))
1210                                                 break;
1211                                         finalResult = Level2 (FilterOptions (s1 [idx1], opt), ext1) - Level2 (FilterOptions (s2 [idx2], opt), ext2);
1212                                         if (finalResult != 0)
1213                                                 break;
1214                                         idx1++;
1215                                         idx2++;
1216                                         // they should work only for the first character
1217                                         ext1 = ExtenderType.None;
1218                                         ext2 = ExtenderType.None;
1219                                 }
1220                         }
1221                         if (currentLevel == 1 && finalResult != 0) {
1222                                 while (idx1 < end1) {
1223                                         if (Uni.IsIgnorableNonSpacing (s1 [idx1]))
1224                                                 idx1++;
1225                                         else
1226                                                 break;
1227                                 }
1228                                 while (idx2 < end2) {
1229                                         if (Uni.IsIgnorableNonSpacing (s2 [idx2]))
1230                                                 idx2++;
1231                                         else
1232                                                 break;
1233                                 }
1234                         }
1235                         // we still have to check level 5
1236                         if (finalResult == 0) {
1237                                 if (lv5At1 < 0 && lv5At2 >= 0)
1238                                         finalResult = -1;
1239                                 else if (lv5At2 < 0 && lv5At1 >= 0)
1240                                         finalResult = 1;
1241                                 else {
1242                                         finalResult = lv5At1 - lv5At2;
1243                                         if (finalResult == 0)
1244                                                 finalResult = lv5Value1 - lv5Value2;
1245                                 }
1246                         }
1247                         if (finalResult == 0) {
1248                                 if (idx2 == end2)
1249                                         targetConsumed = true;
1250                                 if (idx1 == end1)
1251                                         sourceConsumed = true;
1252                         }
1253                         return idx1 != end1 ? 1 : idx2 == end2 ? finalResult : -1;
1254                 }
1255
1256                 int CompareFlagPair (bool b1, bool b2)
1257                 {
1258                         return b1 == b2 ? 0 : b1 ? 1 : -1;
1259                 }
1260
1261                 #endregion
1262
1263                 #region IsPrefix() and IsSuffix()
1264
1265                 public bool IsPrefix (string src, string target, CompareOptions opt)
1266                 {
1267                         return IsPrefix (src, target, 0, src.Length, opt);
1268                 }
1269
1270                 public unsafe bool IsPrefix (string s, string target, int start, int length, CompareOptions opt)
1271                 {
1272                         if (target.Length == 0)
1273                                 return true;
1274                         byte* sk1 = stackalloc byte [4];
1275                         byte* sk2 = stackalloc byte [4];
1276                         ClearBuffer (sk1, 4);
1277                         ClearBuffer (sk2, 4);
1278                         Context ctx = new Context (opt, null, null, sk1, sk2, null,
1279                                 QuickCheckPossible (s, start, start + length, target, 0, target.Length));
1280                         return IsPrefix (s, target, start, length, true, ref ctx);
1281                 }
1282
1283                 unsafe bool IsPrefix (string s, string target, int start, int length, bool skipHeadingExtenders, ref Context ctx)
1284                 {
1285                         bool consumed, dummy;
1286                         CompareInternal (s, start, length,
1287                                 target, 0, target.Length,
1288                                 out consumed, out dummy, skipHeadingExtenders,
1289                                 true, ref ctx);
1290                         return consumed;
1291                 }
1292
1293                 // IsSuffix()
1294
1295                 public bool IsSuffix (string src, string target, CompareOptions opt)
1296                 {
1297                         return IsSuffix (src, target, src.Length - 1, src.Length, opt);
1298                 }
1299
1300                 public unsafe bool IsSuffix (string s, string target, int start, int length, CompareOptions opt)
1301                 {
1302                         if (target.Length == 0)
1303                                 return true;
1304                         int last = LastIndexOf (s, target, start, length, opt);
1305                         return last >= 0 && Compare (s, last, s.Length - last, target, 0, target.Length, opt) == 0;
1306 /*
1307                         // quick check : simple codepoint comparison
1308                         if (s.Length >= target.Length) {
1309                                 int si = start;
1310                                 int se = start - length;
1311                                 for (int i = target.Length - 1; si >= se && i >= 0; i--, si--)
1312                                         if (s [si] != target [i])
1313                                                 break;
1314                                 if (si == start + target.Length)
1315                                         return true;
1316                         }
1317
1318                         PreviousInfo prev = new PreviousInfo (false);
1319                         byte* sk1 = stackalloc byte [4];
1320                         byte* sk2 = stackalloc byte [4];
1321                         ClearBuffer (sk1, 4);
1322                         ClearBuffer (sk2, 4);
1323                         return IsSuffix (opt, s, target, start, length, ref prev, sk1, sk2);
1324 */
1325                 }
1326 /*
1327                 unsafe bool IsSuffix (string s, string t, int start, int length, ref Context ctx)
1328                 {
1329                         int tstart = 0;
1330                         COpt opt = ctx.Option;
1331
1332                         for (;tstart < t.Length; tstart++)
1333                                 if (!IsIgnorable (t [tstart], opt))
1334                                         break;
1335                         if (tstart == t.Length)
1336                                 return true; // as if target is String.Empty.
1337
1338 #if false
1339 // This is still not working. If it is not likely to get working, then just remove it.
1340                         int si = start;
1341                         int send = start - length;
1342                         int ti = t.Length - 1;
1343                         int tend = -1;
1344
1345                         int sStep = start + 1;
1346                         int tStep = t.Length;
1347                         bool sourceConsumed, targetConsumed;
1348                         while (true) {
1349                                 for (; send < si; si--)
1350                                         if (!IsIgnorable (s [si]))
1351                                                 break;
1352                                 for (; tend < ti; ti--)
1353                                         if (!IsIgnorable (t [ti]))
1354                                                 break;
1355                                 if (tend == ti)
1356                                         break;
1357                                 for (; send < si; si--)
1358                                         if (IsSafe (s [si]))
1359                                                 break;
1360                                 for (; tend < ti; ti--)
1361                                         if (IsSafe (t [ti]))
1362                                                 break;
1363 Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, length, t, ti, tstart, sStep - si, tStep - ti);
1364                                 if (CompareInternal (opt, s, si, sStep - si,
1365                                         t, ti, tStep - ti,
1366                                         out targetConsumed, out sourceConsumed,
1367                                         true) != 0)
1368                                         return false;
1369                                 if (send == si)
1370                                         return false;
1371                                 sStep = si;
1372                                 tStep = ti;
1373                                 si--;
1374                                 ti--;
1375                         }
1376                         return true;
1377 #else
1378                         // FIXME: it is not efficient for very long target.
1379                         bool sourceConsumed, targetConsumed;
1380                         int mismatchCount = 0;
1381                         for (int i = 0; i < length; i++) {
1382                                 ctx.ClearPrevInfo ();
1383
1384                                 int ret = CompareInternal (s, start - i, i + 1,
1385                                         t, tstart, t.Length - tstart,
1386                                         out targetConsumed,
1387                                         // FIXME: could immediately breakup
1388                                         out sourceConsumed, true, true, ref ctx);
1389                                 if (ret == 0)
1390                                         return true;
1391                                 if (!sourceConsumed && targetConsumed)
1392                                         return false;
1393                                 if (!targetConsumed) {
1394                                         mismatchCount++;
1395                                         if (mismatchCount > Uni.MaxExpansionLength)
1396                                                 // The largest length of an
1397                                                 // expansion is 3, so if the
1398                                                 // target was not consumed more
1399                                                 // than 3 times, then the tail
1400                                                 // character does not match.
1401                                                 return false;
1402                                 }
1403                         }
1404                         return false;
1405 #endif
1406                 }
1407 */
1408                 #endregion
1409
1410                 #region IndexOf() / LastIndexOf()
1411
1412                 // string
1413
1414                 public int IndexOf (string s, string target, CompareOptions opt)
1415                 {
1416                         return IndexOf (s, target, 0, s.Length, opt);
1417                 }
1418
1419                 int QuickIndexOf (string s, string target, int start, int length, out bool testWasUnable)
1420                 {
1421                         int testedSourcePos = -1, testedTargetPos = -1;
1422
1423                         testWasUnable = true;
1424                         if (target.Length == 0)
1425                                 return 0;
1426                         else if (target.Length > length)
1427                                 return -1;
1428                         testWasUnable = false;
1429
1430                         int end = start + length - target.Length + 1;
1431                         for (int i = start; i < end; i++) {
1432                                 bool no = false;
1433                                 for (int j = 0; j < target.Length; j++) {
1434                                         if (testedTargetPos < j) {
1435                                                 char c = target [j];
1436                                                 if (c == 0 || c >= 0x80) {
1437                                                         testWasUnable = true;
1438                                                         return -1;
1439                                                 }
1440                                                 else
1441                                                         testedTargetPos = j;
1442                                         }
1443                                         if (testedSourcePos < i + j) {
1444                                                 char c = s [i + j];
1445                                                 if (c == 0 || c >= 0x80) {
1446                                                         testWasUnable = true;
1447                                                         return -1;
1448                                                 }
1449                                                 else
1450                                                         testedSourcePos = i + j;
1451                                         }
1452                                         if (s [i + j] != target [j]) {
1453                                                 no = true;
1454                                                 break;
1455                                         }
1456                                 }
1457                                 if (no)
1458                                         continue;
1459                                 return i;
1460                         }
1461                         return -1;
1462                 }
1463
1464                 public unsafe int IndexOf (string s, string target, int start, int length, CompareOptions opt)
1465                 {
1466                         if (opt == CompareOptions.Ordinal)
1467                                 return IndexOfOrdinal (s, target, start, length);
1468                         if (opt == CompareOptions.OrdinalIgnoreCase)
1469                                 return IndexOfOrdinalIgnoreCase (s, target, start, length);
1470                         if (opt == CompareOptions.None) {
1471                                 bool testWasUnable;
1472                                 int ret = QuickIndexOf (s, target, start, length, out testWasUnable);
1473                                 if (!testWasUnable)
1474                                         return ret;
1475                         }
1476
1477                         byte* alwaysMatchFlags = stackalloc byte [16];
1478                         byte* neverMatchFlags = stackalloc byte [16];
1479                         byte* targetSortKey = stackalloc byte [4];
1480                         byte* sk1 = stackalloc byte [4];
1481                         byte* sk2 = stackalloc byte [4];
1482                         ClearBuffer (alwaysMatchFlags, 16);
1483                         ClearBuffer (neverMatchFlags, 16);
1484                         ClearBuffer (targetSortKey, 4);
1485                         ClearBuffer (sk1, 4);
1486                         ClearBuffer (sk2, 4);
1487                         Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1488
1489                         return IndexOf (s, target, start, length,
1490                                 targetSortKey, ref ctx);
1491                 }
1492
1493                 // note that it wouldn't be used since ordinal IndexOf()
1494                 // should be directed to icall.
1495                 int IndexOfOrdinal (string s, string target, int start, int length)
1496                 {
1497                         if (target.Length == 0)
1498                                 return 0;
1499                         else if (target.Length > length)
1500                                 return -1;
1501
1502                         int end = start + length - target.Length + 1;
1503                         for (int i = start; i < end; i++) {
1504                                 bool no = false;
1505                                 for (int j = 0; j < target.Length; j++) {
1506                                         if (s [i + j] != target [j]) {
1507                                                 no = true;
1508                                                 break;
1509                                         }
1510                                 }
1511                                 if (no)
1512                                         continue;
1513                                 return i;
1514                         }
1515                         return -1;
1516                 }
1517
1518                 int IndexOfOrdinalIgnoreCase (string s, string target, int start, int length)
1519                 {
1520                         if (target.Length == 0)
1521                                 return 0;
1522                         else if (target.Length > length)
1523                                 return -1;
1524
1525                         int end = start + length - target.Length + 1;
1526                         for (int i = start; i < end; i++) {
1527                                 bool no = false;
1528                                 for (int j = 0; j < target.Length; j++) {
1529                                         // I think almost all text has more lower letters than upper ones. Thus with this invariant comparison ToLower() should be faster since it costs less operations.
1530                                         if (textInfo.ToLower (s [i + j]) != textInfo.ToLower (target [j])) {
1531                                                 no = true;
1532                                                 break;
1533                                         }
1534                                 }
1535                                 if (no)
1536                                         continue;
1537                                 return i;
1538                         }
1539                         return -1;
1540                 }
1541
1542                 // char
1543
1544                 public int IndexOf (string s, char target, CompareOptions opt)
1545                 {
1546                         return IndexOf (s, target, 0, s.Length, opt);
1547                 }
1548
1549                 public unsafe int IndexOf (string s, char target, int start, int length, CompareOptions opt)
1550                 {
1551                         if (opt == CompareOptions.Ordinal)
1552                                 return IndexOfOrdinal (s, target, start, length);
1553                         if (opt == CompareOptions.OrdinalIgnoreCase)
1554                                 return IndexOfOrdinalIgnoreCase (s, target, start, length);
1555                         byte* alwaysMatchFlags = stackalloc byte [16];
1556                         byte* neverMatchFlags = stackalloc byte [16];
1557                         byte* targetSortKey = stackalloc byte [4];
1558                         byte* sk1 = stackalloc byte [4];
1559                         byte* sk2 = stackalloc byte [4];
1560                         ClearBuffer (alwaysMatchFlags, 16);
1561                         ClearBuffer (neverMatchFlags, 16);
1562                         ClearBuffer (targetSortKey, 4);
1563                         ClearBuffer (sk1, 4);
1564                         ClearBuffer (sk2, 4);
1565                         Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1566
1567                         // If target is contraction, then use string search.
1568                         Contraction ct = GetContraction (target);
1569                         if (ct != null) {
1570                                 if (ct.Replacement != null)
1571                                         return IndexOf (s, ct.Replacement,
1572                                                 start, length, targetSortKey, ref ctx);
1573                                 else {
1574                                         for (int i = 0; i < ct.SortKey.Length; i++)
1575                                                 sk2 [i] = ct.SortKey [i];
1576                                         return IndexOfSortKey (s, start, length, sk2, char.MinValue, -1, true, ref ctx);
1577                                 }
1578                         } else {
1579                                 int ti = FilterOptions ((int) target, opt);
1580                                 targetSortKey [0] = Category (ti);
1581                                 targetSortKey [1] = Level1 (ti);
1582                                 if ((opt & COpt.IgnoreNonSpace) == 0)
1583                                         targetSortKey [2] =
1584                                                 Level2 (ti, ExtenderType.None);
1585                                 targetSortKey [3] = Uni.Level3 (ti);
1586                                 return IndexOfSortKey (s, start, length,
1587                                         targetSortKey, target, ti,
1588                                         !Uni.HasSpecialWeight ((char) ti), ref ctx);
1589                         }
1590                 }
1591
1592                 // note that it wouldn't be used since ordinal IndexOf()
1593                 // should be directed to icall.
1594                 int IndexOfOrdinal (string s, char target, int start, int length)
1595                 {
1596                         int end = start + length;
1597                         for (int i = start; i < end; i++)
1598                                 if (s [i] == target)
1599                                         return i;
1600                         return -1;
1601                 }
1602
1603                 int IndexOfOrdinalIgnoreCase (string s, char target, int start, int length)
1604                 {
1605                         int end = start + length;
1606                         target = textInfo.ToLower (target);
1607                         for (int i = start; i < end; i++)
1608                                 if (textInfo.ToLower (s [i]) == target)
1609                                         return i;
1610                         return -1;
1611                 }
1612
1613                 // Searches target byte[] keydata
1614                 unsafe int IndexOfSortKey (string s, int start, int length, byte* sortkey, char target, int ti, bool noLv4, ref Context ctx)
1615                 {
1616                         int end = start + length;
1617                         int idx = start;
1618
1619                         while (idx < end) {
1620                                 int cur = idx;
1621                                 if (MatchesForward (s, ref idx, end, ti, sortkey, noLv4, ref ctx))
1622                                         return cur;
1623                         }
1624                         return -1;
1625                 }
1626
1627                 // Searches string. Search head character (or keydata when
1628                 // the head is contraction sortkey) and try IsPrefix().
1629                 unsafe int IndexOf (string s, string target, int start, int length, byte* targetSortKey, ref Context ctx)
1630                 {
1631                         COpt opt = ctx.Option;
1632                         int tidx = 0;
1633                         for (; tidx < target.Length; tidx++)
1634                                 if (!IsIgnorable (target [tidx], opt))
1635                                         break;
1636                         if (tidx == target.Length)
1637                                 // FIXME: this is likely a hack. A string that is consists of \0 differs from those of other ignorable characters.
1638                                 return IndexOfOrdinal (target, '\0', 0, target.Length) >= 0 ? IndexOfOrdinal (s, target, start, length) : start;
1639                         Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1640                         string replace = ct != null ? ct.Replacement : null;
1641                         byte* sk = replace == null ? targetSortKey : null;
1642                         bool noLv4 = true;
1643                         char tc = char.MinValue;
1644                         int ti = -1;
1645                         if (ct != null && sk != null) {
1646                                 for (int i = 0; i < ct.SortKey.Length; i++)
1647                                         sk [i] = ct.SortKey [i];
1648                         } else if (sk != null) {
1649                                 tc = target [tidx];
1650                                 ti = FilterOptions (target [tidx], opt);
1651                                 sk [0] = Category (ti);
1652                                 sk [1] = Level1 (ti);
1653                                 if ((opt & COpt.IgnoreNonSpace) == 0)
1654                                         sk [2] = Level2 (ti, ExtenderType.None);
1655                                 sk [3] = Uni.Level3 (ti);
1656                                 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1657                         }
1658                         if (sk != null) {
1659                                 for (tidx++; tidx < target.Length; tidx++) {
1660                                         if (Category (target [tidx]) != 1)
1661                                                 break;
1662                                         if (sk [2] == 0)
1663                                                 sk [2] = 2;
1664                                         sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1665                                 }
1666                         }
1667
1668                         do {
1669                                 int idx = 0;
1670                                 if (replace != null)
1671                                         idx = IndexOf (s, replace, start, length, targetSortKey, ref ctx);
1672                                 else
1673                                         idx = IndexOfSortKey (s, start, length, sk, tc, ti, noLv4, ref ctx);
1674                                 if (idx < 0)
1675                                         return -1;
1676                                 length -= idx - start;
1677                                 start = idx;
1678                                 if (IsPrefix (s, target, start, length, false, ref ctx))
1679                                         return idx;
1680                                 Contraction cts = GetContraction (s, start, length);
1681                                 if (cts != null) {
1682                                         start += cts.Source.Length;
1683                                         length -= cts.Source.Length;
1684                                 } else {
1685                                         start++;
1686                                         length--;
1687                                 }
1688                         } while (length > 0);
1689                         return -1;
1690                 }
1691
1692                 //
1693                 // There are the same number of LastIndexOf() methods as
1694                 // IndexOf() related methods, with the same functionalities
1695                 // for each.
1696                 //
1697
1698                 // string
1699
1700                 public int LastIndexOf (string s, string target, CompareOptions opt)
1701                 {
1702                         return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1703                 }
1704
1705                 public unsafe int LastIndexOf (string s, string target, int start, int length, CompareOptions opt)
1706                 {
1707                         if (opt == CompareOptions.Ordinal)
1708                                 return LastIndexOfOrdinal (s, target, start, length);
1709                         if (opt == CompareOptions.OrdinalIgnoreCase)
1710                                 return LastIndexOfOrdinalIgnoreCase (s, target, start, length);
1711                         byte* alwaysMatchFlags = stackalloc byte [16];
1712                         byte* neverMatchFlags = stackalloc byte [16];
1713                         byte* targetSortKey = stackalloc byte [4];
1714                         byte* sk1 = stackalloc byte [4];
1715                         byte* sk2 = stackalloc byte [4];
1716                         ClearBuffer (alwaysMatchFlags, 16);
1717                         ClearBuffer (neverMatchFlags, 16);
1718                         ClearBuffer (targetSortKey, 4);
1719                         ClearBuffer (sk1, 4);
1720                         ClearBuffer (sk2, 4);
1721                         // For some unknown reason CompareQuick() does not work fine w/ LastIndexOf().
1722                         Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1723                         return LastIndexOf (s, target, start, length,
1724                                 targetSortKey, ref ctx);
1725                 }
1726
1727                 int LastIndexOfOrdinal (string s, string target, int start, int length)
1728                 {
1729                         if (target.Length == 0)
1730                                 return start;
1731                         if (s.Length < target.Length || target.Length > length)
1732                                 return -1;
1733                         int end = start - length + target.Length -1;
1734                         char tail = target [target.Length - 1];
1735                         for (int i = start; i > end;) {
1736                                 if (s [i] != tail) {
1737                                         i--;
1738                                         continue;
1739                                 }
1740                                 int x = i - target.Length + 1;
1741                                 i--;
1742                                 bool mismatch = false;
1743                                 for (int j = target.Length - 2; j >= 0; j--)
1744                                         if (s [x + j] != target [j]) {
1745                                                 mismatch = true;
1746                                                 break;
1747                                         }
1748                                 if (mismatch)
1749                                         continue;
1750                                 return x;
1751                         }
1752                         return -1;
1753                 }
1754
1755                 int LastIndexOfOrdinalIgnoreCase (string s, string target, int start, int length)
1756                 {
1757                         if (target.Length == 0)
1758                                 return start;
1759                         if (s.Length < length || target.Length > length)
1760                                 return -1;
1761                         int end = start - length + target.Length - 1;
1762                         char tail = textInfo.ToLower (target [target.Length - 1]);
1763                         for (int i = start; i > end;) {
1764                                 if (textInfo.ToLower (s [i]) != tail) {
1765                                         i--;
1766                                         continue;
1767                                 }
1768                                 int x = i - target.Length + 1;
1769                                 i--;
1770                                 bool mismatch = false;
1771                                 for (int j = target.Length - 2; j >= 0; j--)
1772                                         if (textInfo.ToLower (s [x + j]) != textInfo.ToLower (target [j])) {
1773                                                 mismatch = true;
1774                                                 break;
1775                                         }
1776                                 if (mismatch)
1777                                         continue;
1778                                 return x;
1779                         }
1780                         return -1;
1781                 }
1782
1783                 // char
1784
1785                 public int LastIndexOf (string s, char target, CompareOptions opt)
1786                 {
1787                         return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1788                 }
1789
1790                 public unsafe int LastIndexOf (string s, char target, int start, int length, CompareOptions opt)
1791                 {
1792                         if (opt == CompareOptions.Ordinal)
1793                                 return LastIndexOfOrdinal (s, target, start, length);
1794                         if (opt == CompareOptions.OrdinalIgnoreCase)
1795                                 return LastIndexOfOrdinalIgnoreCase (s, target, start, length);
1796                         byte* alwaysMatchFlags = stackalloc byte [16];
1797                         byte* neverMatchFlags = stackalloc byte [16];
1798                         byte* targetSortKey = stackalloc byte [4];
1799                         byte* sk1 = stackalloc byte [4];
1800                         byte* sk2 = stackalloc byte [4];
1801                         ClearBuffer (alwaysMatchFlags, 16);
1802                         ClearBuffer (neverMatchFlags, 16);
1803                         ClearBuffer (targetSortKey, 4);
1804                         ClearBuffer (sk1, 4);
1805                         ClearBuffer (sk2, 4);
1806                         Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1807
1808                         // If target is a replacement contraction, then use 
1809                         // string search.
1810                         Contraction ct = GetContraction (target);
1811                         if (ct != null) {
1812                                 if (ct.Replacement != null)
1813                                         return LastIndexOf (s,
1814                                                 ct.Replacement, start, length,
1815                                                 targetSortKey, ref ctx);
1816                                 else {
1817                                         for (int bi = 0; bi < ct.SortKey.Length; bi++)
1818                                                 sk2 [bi] = ct.SortKey [bi];
1819                                         return LastIndexOfSortKey (s, start,
1820                                                 start, length, sk2,
1821                                                 -1, true, ref ctx);
1822                                 }
1823                         }
1824                         else {
1825                                 int ti = FilterOptions ((int) target, opt);
1826                                 targetSortKey [0] = Category (ti);
1827                                 targetSortKey [1] = Level1 (ti);
1828                                 if ((opt & COpt.IgnoreNonSpace) == 0)
1829                                         targetSortKey [2] = Level2 (ti, ExtenderType.None);
1830                                 targetSortKey [3] = Uni.Level3 (ti);
1831                                 return LastIndexOfSortKey (s, start, start,
1832                                         length, targetSortKey,
1833                                         ti, !Uni.HasSpecialWeight ((char) ti),
1834                                         ref ctx);
1835                         }
1836                 }
1837
1838                 int LastIndexOfOrdinal (string s, char target, int start, int length)
1839                 {
1840                         if (s.Length == 0)
1841                                 return -1;
1842                         int end = start - length;
1843                         for (int i = start; i > end; i--)
1844                                 if (s [i] == target)
1845                                         return i;
1846                         return -1;
1847                 }
1848
1849                 int LastIndexOfOrdinalIgnoreCase (string s, char target, int start, int length)
1850                 {
1851                         if (s.Length == 0)
1852                                 return -1;
1853                         int end = start - length;
1854                         char c = textInfo.ToUpper (target);
1855                         for (int i = start; i > end; i--)
1856                                 if (textInfo.ToUpper (s [i]) == c)
1857                                         return i;
1858                         return -1;
1859                 }
1860
1861                 // Searches target byte[] keydata
1862                 unsafe int LastIndexOfSortKey (string s, int start, int orgStart, int length, byte* sortkey, int ti, bool noLv4, ref Context ctx)
1863                 {
1864                         int end = start - length;
1865                         int idx = start;
1866                         while (idx > end) {
1867                                 int cur = idx;
1868                                 if (MatchesBackward (s, ref idx, end, orgStart,
1869                                         ti, sortkey, noLv4, ref ctx))
1870                                         return cur;
1871                         }
1872                         return -1;
1873                 }
1874
1875                 // Searches string. Search head character (or keydata when
1876                 // the head is contraction sortkey) and try IsPrefix().
1877                 unsafe int LastIndexOf (string s, string target, int start, int length, byte* targetSortKey, ref Context ctx)
1878                 {
1879                         COpt opt = ctx.Option;
1880                         int orgStart = start;
1881                         int tidx = 0;
1882                         for (; tidx < target.Length; tidx++)
1883                                 if (!IsIgnorable (target [tidx], opt))
1884                                         break;
1885                         if (tidx == target.Length)
1886                                 // FIXME: this is likely a hack. A string that is consists of \0 differs from those of other ignorable characters.
1887                                 return IndexOfOrdinal (target, '\0', 0, target.Length) >= 0 ? LastIndexOfOrdinal (s, target, start, length) : start;
1888                         Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1889                         string replace = ct != null ? ct.Replacement : null;
1890                         byte* sk = replace == null ? targetSortKey : null;
1891
1892                         bool noLv4 = true;
1893                         int ti = -1;
1894                         if (ct != null && sk != null) {
1895                                 for (int i = 0; i < ct.SortKey.Length; i++)
1896                                         sk [i] = ct.SortKey [i];
1897                         } else if (sk != null) {
1898                                 ti = FilterOptions (target [tidx], opt);
1899                                 sk [0] = Category (ti);
1900                                 sk [1] = Level1 (ti);
1901                                 if ((opt & COpt.IgnoreNonSpace) == 0)
1902                                         sk [2] = Level2 (ti, ExtenderType.None);
1903                                 sk [3] = Uni.Level3 (ti);
1904                                 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1905                         }
1906                         if (sk != null) {
1907                                 for (tidx++; tidx < target.Length; tidx++) {
1908                                         if (Category (target [tidx]) != 1)
1909                                                 break;
1910                                         if (sk [2] == 0)
1911                                                 sk [2] = 2;
1912                                         sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1913                                 }
1914                         }
1915
1916                         do {
1917                                 int idx = 0;
1918
1919                                 if (replace != null)
1920                                         idx = LastIndexOf (s, replace,
1921                                                 start, length,
1922                                                 targetSortKey, ref ctx);
1923                                 else
1924                                         idx = LastIndexOfSortKey (s, start, orgStart, length, sk, ti, noLv4, ref ctx);
1925                                 if (idx < 0)
1926                                         return -1;
1927                                 length -= start - idx;
1928                                 start = idx;
1929
1930                                 if (IsPrefix (s, target, idx, orgStart - idx + 1, false, ref ctx)) {
1931                                         for (;idx < orgStart; idx++)
1932                                                 if (!IsIgnorable (s [idx], opt))
1933                                                         break;
1934                                         return idx;
1935                                 }
1936                                 Contraction cts = GetContraction (s, idx, orgStart - idx + 1);
1937                                 if (cts != null) {
1938                                         start -= cts.Source.Length;
1939                                         length -= cts.Source.Length;
1940                                 } else {
1941                                         start--;
1942                                         length--;
1943                                 }
1944                         } while (length > 0);
1945                         return -1;
1946                 }
1947
1948                 unsafe bool MatchesForward (string s, ref int idx, int end, int ti, byte* sortkey, bool noLv4, ref Context ctx)
1949                 {
1950                         int si = s [idx];
1951                         if (ctx.AlwaysMatchFlags != null && si < 128 && (ctx.AlwaysMatchFlags [si / 8] & (1 << (si % 8))) != 0)
1952                                 return true;
1953                         if (ctx.NeverMatchFlags != null &&
1954                                         si < 128 &&
1955                                         (ctx.NeverMatchFlags [si / 8] & (1 << (si % 8))) != 0) {
1956                                 idx++;
1957                                 return false;
1958                         }
1959                         ExtenderType ext = GetExtenderType (s [idx]);
1960                         Contraction ct = null;
1961                         if (MatchesForwardCore (s, ref idx, end, ti, sortkey, noLv4, ext, ref ct, ref ctx)) {
1962                                 if (ctx.AlwaysMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
1963                                         ctx.AlwaysMatchFlags [si / 8] |= (byte) (1 << (si % 8));
1964                                 return true;
1965                         }
1966                         if (ctx.NeverMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
1967                                 ctx.NeverMatchFlags [si / 8] |= (byte) (1 << (si % 8));
1968                         return false;
1969                 }
1970
1971                 unsafe bool MatchesForwardCore (string s, ref int idx, int end, int ti, byte* sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, ref Context ctx)
1972                 {
1973                         COpt opt = ctx.Option;
1974                         byte* charSortKey = ctx.Buffer1;
1975                         bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
1976                         int si = -1;
1977                         if (ext == ExtenderType.None)
1978                                 ct = GetContraction (s, idx, end);
1979                         else if (ctx.PrevCode < 0) {
1980                                 if (ctx.PrevSortKey == null) {
1981                                         idx++;
1982                                         return false;
1983                                 }
1984                                 charSortKey = ctx.PrevSortKey;
1985                         }
1986                         else
1987                                 si = FilterExtender (ctx.PrevCode, ext, opt);
1988                         // if lv4 exists, it never matches contraction
1989                         if (ct != null) {
1990                                 idx += ct.Source.Length;
1991                                 if (!noLv4)
1992                                         return false;
1993                                 if (ct.SortKey != null) {
1994                                         for (int i = 0; i < 4; i++)
1995                                                 charSortKey [i] = sortkey [i];
1996                                         ctx.PrevCode = -1;
1997                                         ctx.PrevSortKey = charSortKey;
1998                                 } else {
1999                                         // Here is the core of LAMESPEC
2000                                         // described at the top of the source.
2001                                         int dummy = 0;
2002                                         return MatchesForward (ct.Replacement, ref dummy,
2003                                                 ct.Replacement.Length, ti, sortkey, noLv4, ref ctx);
2004                                 }
2005                         } else {
2006                                 if (si < 0)
2007                                         si = FilterOptions (s [idx], opt);
2008                                 idx++;
2009                                 charSortKey [0] = Category (si);
2010                                 bool noMatch = false;
2011                                 if (sortkey [0] == charSortKey [0])
2012                                         charSortKey [1] = Level1 (si);
2013                                 else
2014                                         noMatch = true;
2015                                 if (!ignoreNonSpace && sortkey [1] == charSortKey [1])
2016                                         charSortKey [2] = Level2 (si, ext);
2017                                 else if (!ignoreNonSpace)
2018                                         noMatch = true;
2019                                 if (noMatch) {
2020                                         for (; idx < end; idx++) {
2021                                                 if (Category (s [idx]) != 1)
2022                                                         break;
2023                                         }
2024                                         return false;
2025                                 }
2026                                 charSortKey [3] = Uni.Level3 (si);
2027                                 if (charSortKey [0] != 1)
2028                                         ctx.PrevCode = si;
2029                         }
2030                         for (; idx < end; idx++) {
2031                                 if (Category (s [idx]) != 1)
2032                                         break;
2033                                 if (ignoreNonSpace)
2034                                         continue;
2035                                 if (charSortKey [2] == 0)
2036                                                 charSortKey [2] = 2;
2037                                         charSortKey [2] = (byte) (charSortKey [2]
2038                                                 + Level2 (s [idx], ExtenderType.None));
2039                         }
2040
2041                         return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4);
2042                 }
2043
2044                 unsafe bool MatchesPrimitive (COpt opt, byte* source, int si, ExtenderType ext, byte* target, int ti, bool noLv4)
2045                 {
2046                         bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
2047                         if (source [0] != target [0] ||
2048                                 source [1] != target [1] ||
2049                                 (!ignoreNonSpace && source [2] != target [2]) ||
2050                                 source [3] != target [3])
2051                                 return false;
2052                         if (noLv4 && (si < 0 || !Uni.HasSpecialWeight ((char) si)))
2053                                 return true;
2054                         else if (noLv4)
2055                                 return false;
2056                         // Since target can never be an extender, if the source
2057                         // is an expander and it matters, then they never match.
2058                         if (!ignoreNonSpace && ext == ExtenderType.Conditional)
2059                                 return false;
2060                         if (Uni.IsJapaneseSmallLetter ((char) si) !=
2061                                 Uni.IsJapaneseSmallLetter ((char) ti) ||
2062                                 ToDashTypeValue (ext, opt) !=
2063                                 // FIXME: we will have to specify correct value for target
2064                                 ToDashTypeValue (ExtenderType.None, opt) ||
2065                                 !Uni.IsHiragana ((char) si) !=
2066                                 !Uni.IsHiragana ((char) ti) ||
2067                                 IsHalfKana ((char) si, opt) !=
2068                                 IsHalfKana ((char) ti, opt))
2069                                 return false;
2070                         return true;
2071                 }
2072
2073                 unsafe bool MatchesBackward (string s, ref int idx, int end, int orgStart, int ti, byte* sortkey, bool noLv4, ref Context ctx)
2074                 {
2075                         int si = s [idx];
2076                         if (ctx.AlwaysMatchFlags != null && si < 128 && (ctx.AlwaysMatchFlags [si / 8] & (1 << (si % 8))) != 0)
2077                                 return true;
2078                         if (ctx.NeverMatchFlags != null && si < 128 && (ctx.NeverMatchFlags [si / 8] & (1 << (si % 8))) != 0) {
2079                                 idx--;
2080                                 return false;
2081                         }
2082                         ExtenderType ext = GetExtenderType (s [idx]);
2083                         Contraction ct = null;
2084                         if (MatchesBackwardCore (s, ref idx, end, orgStart, ti, sortkey, noLv4, ext, ref ct, ref ctx)) {
2085                                 if (ctx.AlwaysMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
2086                                         ctx.AlwaysMatchFlags [si / 8] |= (byte) (1 << (si % 8));
2087                                 return true;
2088                         }
2089                         if (ctx.NeverMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128) {
2090                                 ctx.NeverMatchFlags [si / 8] |= (byte) (1 << (si % 8));
2091                         }
2092                         return false;
2093                 }
2094
2095                 unsafe bool MatchesBackwardCore (string s, ref int idx, int end, int orgStart, int ti, byte* sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, ref Context ctx)
2096                 {
2097                         COpt opt = ctx.Option;
2098                         byte* charSortKey = ctx.Buffer1;
2099                         bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
2100                         int cur = idx;
2101                         int si = -1;
2102                         // To handle extenders in source, we need to
2103                         // check next _primary_ character.
2104                         if (ext != ExtenderType.None) {
2105                                 byte diacritical = 0;
2106                                 for (int tmp = idx; ; tmp--) {
2107                                         if (tmp < 0) // heading extender
2108                                                 return false;
2109                                         if (IsIgnorable (s [tmp], opt))
2110                                                 continue;
2111                                         int tmpi = FilterOptions (s [tmp], opt);
2112                                         byte category = Category (tmpi);
2113                                         if (category == 1) {
2114                                                 diacritical = Level2 (tmpi, ExtenderType.None);
2115                                                 continue;
2116                                         }
2117                                         si = FilterExtender (tmpi, ext, opt);
2118                                         charSortKey [0] = category;
2119                                         charSortKey [1] = Level1 (si);
2120                                         if (!ignoreNonSpace)
2121                                                 charSortKey [2] = Level2 (si, ext);
2122                                         charSortKey [3] = Uni.Level3 (si);
2123                                         if (ext != ExtenderType.Conditional &&
2124                                                 diacritical != 0)
2125                                                 charSortKey [2] =
2126                                                         (charSortKey [2] == 0) ?
2127                                                         (byte) (diacritical + 2) :
2128                                                         diacritical;
2129                                         break;
2130                                 }
2131                                 idx--;
2132                         }
2133                         if (ext == ExtenderType.None)
2134                                 ct = GetTailContraction (s, idx, end);
2135                         // if lv4 exists, it never matches contraction
2136                         if (ct != null) {
2137                                 idx -= ct.Source.Length;
2138                                 if (!noLv4)
2139                                         return false;
2140                                 if (ct.SortKey != null) {
2141                                         for (int i = 0; i < 4; i++)
2142                                                 charSortKey [i] = sortkey [i];
2143                                         ctx.PrevCode = -1;
2144                                         ctx.PrevSortKey = charSortKey;
2145                                 } else {
2146                                         // Here is the core of LAMESPEC
2147                                         // described at the top of the source.
2148                                         int dummy = ct.Replacement.Length - 1;
2149                                         return 0 <= LastIndexOfSortKey (
2150                                                 ct.Replacement, dummy, dummy,
2151                                                 ct.Replacement.Length, sortkey,
2152                                                 ti, noLv4, ref ctx);
2153                                 }
2154                         } else if (ext == ExtenderType.None) {
2155                                 if (si < 0)
2156                                         si = FilterOptions (s [idx], opt);
2157                                 idx--;
2158                                 bool noMatch = false;
2159                                 charSortKey [0] = Category (si);
2160                                 if (charSortKey [0] == sortkey [0])
2161                                         charSortKey [1] = Level1 (si);
2162                                 else
2163                                         noMatch = true;
2164                                 if (!ignoreNonSpace && charSortKey [1] == sortkey [1])
2165                                         charSortKey [2] = Level2 (si, ext);
2166                                 else if (!ignoreNonSpace)
2167                                         noMatch = true;
2168                                 if (noMatch)
2169                                         return false;
2170                                 charSortKey [3] = Uni.Level3 (si);
2171                                 if (charSortKey [0] != 1)
2172                                         ctx.PrevCode = si;
2173                         }
2174                         if (ext == ExtenderType.None) {
2175                                 for (int tmp = cur + 1; tmp < orgStart; tmp++) {
2176                                         if (Category (s [tmp]) != 1)
2177                                                 break;
2178                                         if (ignoreNonSpace)
2179                                                 continue;
2180                                         if (charSortKey [2] == 0)
2181                                                         charSortKey [2] = 2;
2182                                         charSortKey [2] = (byte) (charSortKey [2]
2183                                                 + Level2 (s [tmp], ExtenderType.None));
2184                                 }
2185                         }
2186                         return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4);
2187                 }
2188                 #endregion
2189         }
2190 }