Merge pull request #585 from mk8/master
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / SimpleCollator.cs
1 //
2 // SimpleCollator.cs : the core collator implementation
3 //
4 // Author:
5 //      Atsushi Enomoto  <atsushi@ximian.com>
6 //
7 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
8 //
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
16 // 
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
19 // 
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 //
28
29 //
30 // Here's a summary for supporting contractions, expansions and diacritical 
31 // remappings.
32 //
33 // Diacritical mapping is a simple tailoring rule that "remaps" diacritical
34 // weight value from one to another. For now it applies to all range of
35 // characters, but at some stage we might need to limit the remapping ranges.
36 //
37 // A Contraction consists of a string (actually char[]) and a sortkey entry
38 // (i.e. byte[]). It indicates that a sequence of characters is interpreted
39 // as a single character that has the mapped sortkey value. There is no
40 // character which goes across "special rules". When the collator encountered
41 // such a sequence of characters, it just returns the sortkey value without
42 // further replacement.
43 //
44 // Since it is still likely to happen that a contraction sequence matches
45 // other character than the identical sequence (depending on CompareOptions
46 // and of course, defined sortkey value itself), comparison cannot be done
47 // at source char[] level.
48 //
49 // In IndexOf(), the first character in the target string or the target char
50 // itself is turned into sortkey bytes. If the character has a contraction and
51 // that is sortkey map, then it is used instead. If the contraction exists and
52 // that is replacement map, then the first character of the replacement string
53 // is searched instead. IndexOf() always searches only for the top character,
54 // and if it returned negative value, then it returns -1. Otherwise, it then
55 // tries IsPrefix() from that location. If it returns true, then it returns
56 // the index.
57 //
58 // LAMESPEC: IndexOf() is lame as a whole API. It never matches in the middle
59 // of expansion and there is no proper way to return such indexes within
60 // a single int return value.
61 //
62 // For example, try below in .NET:
63 //      IndexOf("\u00E6", "a")
64 //      IndexOf("\u00E6", "e")
65 //
66
67
68 using System;
69 using System.Collections;
70 using System.Globalization;
71
72 using Uni = Mono.Globalization.Unicode.MSCompatUnicodeTable;
73 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
74 using COpt = System.Globalization.CompareOptions;
75
76 namespace Mono.Globalization.Unicode
77 {
78         internal class SimpleCollator
79         {
80                 // this environment variable is for debugging quick check.
81 #pragma warning disable 169, 414
82                 static bool QuickCheckDisabled =
83                         Environment.internalGetEnvironmentVariable (
84                         "MONO_COLLATION_QUICK_CHECK_DISABLED") == "yes";
85 #pragma warning restore 169, 414                
86
87                 unsafe internal struct Context
88                 {
89                         public Context (CompareOptions opt, byte* alwaysMatchFlags, byte* neverMatchFlags, byte* buffer1, byte* buffer2, byte* prev1/*, bool quickCheckPossible*/)
90                         {
91                                 Option = opt;
92                                 AlwaysMatchFlags = alwaysMatchFlags;
93                                 NeverMatchFlags = neverMatchFlags;
94                                 Buffer1 = buffer1;
95                                 Buffer2 = buffer2;
96                                 PrevSortKey = prev1;
97                                 PrevCode = -1;
98 //                              QuickCheckPossible = quickCheckPossible;
99                         }
100
101                         public readonly CompareOptions Option;
102                         public readonly byte* NeverMatchFlags;
103                         public readonly byte* AlwaysMatchFlags;
104                         public byte* Buffer1;
105                         public byte* Buffer2;
106                         public int PrevCode;
107                         public byte* PrevSortKey;
108 //                      public readonly bool QuickCheckPossible;
109
110                         public void ClearPrevInfo ()
111                         {
112                                 PrevCode = -1;
113                                 PrevSortKey = null;
114                         }
115                 }
116
117                 unsafe struct PreviousInfo
118                 {
119                         public int Code;
120                         public byte* SortKey;
121
122                         public PreviousInfo (bool dummy)
123                         {
124                                 Code = -1;
125                                 SortKey = null;
126                         }
127                 }
128
129                 struct Escape
130                 {
131                         public string Source;
132                         public int Index;
133                         public int Start;
134                         public int End;
135                         public int Optional;
136                 }
137
138                 static SimpleCollator invariant =
139                         new SimpleCollator (CultureInfo.InvariantCulture);
140
141                 readonly TextInfo textInfo; // for ToLower().
142                 readonly CodePointIndexer cjkIndexer;
143                 readonly Contraction [] contractions;
144                 readonly Level2Map [] level2Maps;
145                 // This flag marks characters as "unsafe", where the character
146                 // could be used as part of a contraction (whose length > 1).
147                 readonly byte [] unsafeFlags;
148
149                 unsafe readonly byte* cjkCatTable;
150                 unsafe readonly byte* cjkLv1Table;
151                 unsafe readonly byte* cjkLv2Table;
152                 readonly CodePointIndexer cjkLv2Indexer;
153                 readonly int lcid;
154                 readonly bool frenchSort;
155
156
157                 const int UnsafeFlagLength = 0x300 / 8;
158
159 //              readonly byte [] contractionFlags = new byte [16];
160
161                 // This array is internally used inside IndexOf() to store
162                 // "no need to check" ASCII characters.
163                 //
164                 // Now that it should be thread safe, this array is allocated
165                 // at every time.
166 //              byte [] neverMatchFlags = new byte [128 / 8];
167
168                 #region .ctor() and split functions
169
170                 public SimpleCollator (CultureInfo culture)
171                 {
172                         lcid = culture.LCID;
173                         textInfo = culture.TextInfo;
174
175                         unsafe {
176                                 SetCJKTable (culture, ref cjkIndexer,
177                                         ref cjkCatTable, ref cjkLv1Table,
178                                         ref cjkLv2Indexer, ref cjkLv2Table);
179                         }
180
181                         // Get tailoring info
182                         TailoringInfo t = null;
183                         for (CultureInfo ci = culture; ci.LCID != 127; ci = ci.Parent) {
184                                 t = Uni.GetTailoringInfo (ci.LCID);
185                                 if (t != null)
186                                         break;
187                         }
188                         if (t == null) // then use invariant
189                                 t = Uni.GetTailoringInfo (127);
190
191                         frenchSort = t.FrenchSort;
192                         Uni.BuildTailoringTables (culture, t, ref contractions,
193                                 ref level2Maps);
194                         unsafeFlags = new byte [UnsafeFlagLength];
195                         foreach (Contraction c in contractions) {
196                                 if (c.Source.Length > 1)
197                                         foreach (char ch in c.Source)
198                                                 unsafeFlags [(int) ch / 8 ]
199                                                         |= (byte) (1 << ((int) ch & 7));
200 //                              for (int i = 0; i < c.Source.Length; i++) {
201 //                                      int ch = c.Source [i];
202 //                                      if (ch > 127)
203 //                                              continue;
204 //                                      contractionFlags [ch / 8] |= (byte) (1 << (ch & 7));
205 //                              }
206                         }
207                         if (lcid != 127)
208                                 foreach (Contraction c in invariant.contractions) {
209                                         if (c.Source.Length > 1)
210                                                 foreach (char ch in c.Source)
211                                                         unsafeFlags [(int) ch / 8 ] 
212                                                                 |= (byte) (1 << ((int) ch & 7));
213 //                                      for (int i = 0; i < c.Source.Length; i++) {
214 //                                              int ch = c.Source [i];
215 //                                              if (ch > 127)
216 //                                                      continue;
217 //                                              contractionFlags [ch / 8] |= (byte) (1 << (ch & 7));
218 //                                      }
219                                 }
220
221                         // FIXME: Since tailorings are mostly for latin
222                         // (and in some cases Cyrillic) characters, it would
223                         // be much better for performance to store "start 
224                         // indexes" for > 370 (culture-specific letters).
225
226 /*
227 // dump tailoring table
228 Console.WriteLine ("******** building table for {0} : contractions - {1} diacritical - {2}",
229 culture.LCID, contractions.Length, level2Maps.Length);
230 foreach (Contraction c in contractions) {
231 foreach (char cc in c.Source)
232 Console.Write ("{0:X4} ", (int) cc);
233 Console.WriteLine (" -> '{0}'", c.Replacement);
234 }
235 */
236                 }
237
238                 unsafe private void SetCJKTable (
239                         CultureInfo culture, ref CodePointIndexer cjkIndexer,
240                         ref byte* catTable, ref byte* lv1Table,
241                         ref CodePointIndexer lv2Indexer, ref byte* lv2Table)
242                 {
243                         string name = GetNeutralCulture (culture).Name;
244
245                         Uni.FillCJK (name, ref cjkIndexer, ref catTable,
246                                 ref lv1Table, ref lv2Indexer, ref lv2Table);
247                 }
248
249                 static CultureInfo GetNeutralCulture (CultureInfo info)
250                 {
251                         CultureInfo ret = info;
252                         while (ret.Parent != null && ret.Parent.LCID != 127)
253                                 ret = ret.Parent;
254                         return ret;
255                 }
256
257                 #endregion
258
259                 unsafe byte Category (int cp)
260                 {
261                         if (cp < 0x3000 || cjkCatTable == null)
262                                 return Uni.Category (cp);
263                         int idx = cjkIndexer.ToIndex (cp);
264                         return idx < 0 ? Uni.Category (cp) : cjkCatTable [idx];
265                 }
266
267                 unsafe byte Level1 (int cp)
268                 {
269                         if (cp < 0x3000 || cjkLv1Table == null)
270                                 return Uni.Level1 (cp);
271                         int idx = cjkIndexer.ToIndex (cp);
272                         return idx < 0 ? Uni.Level1 (cp) : cjkLv1Table [idx];
273                 }
274
275                 unsafe byte Level2 (int cp, ExtenderType ext)
276                 {
277                         if (ext == ExtenderType.Buggy)
278                                 return 5;
279                         else if (ext == ExtenderType.Conditional)
280                                 return 0;
281
282                         if (cp < 0x3000 || cjkLv2Table == null)
283                                 return Uni.Level2 (cp);
284                         int idx = cjkLv2Indexer.ToIndex (cp);
285                         byte ret = idx < 0 ? (byte) 0 : cjkLv2Table [idx];
286                         if (ret != 0)
287                                 return ret;
288                         ret = Uni.Level2 (cp);
289                         if (level2Maps.Length == 0)
290                                 return ret;
291                         for (int i = 0; i < level2Maps.Length; i++) {
292                                 if (level2Maps [i].Source == ret)
293                                         return level2Maps [i].Replace;
294                                 else if (level2Maps [i].Source > ret)
295                                         break;
296                         }
297                         return ret;
298                 }
299
300                 static bool IsHalfKana (int cp, COpt opt)
301                 {
302                         return (opt & COpt.IgnoreWidth) != 0 ||
303                                 Uni.IsHalfWidthKana ((char) cp);
304                 }
305
306                 Contraction GetContraction (string s, int start, int end)
307                 {
308 //                      int si = s [start];
309 //                      if (si < 128 && (contractionFlags [si / 8] & (1 << (si & 7))) == 0)
310 //                              return null;
311                         Contraction c = GetContraction (s, start, end, contractions);
312                         if (c != null || lcid == 127)
313                                 return c;
314                         return GetContraction (s, start, end, invariant.contractions);
315                 }
316
317                 Contraction GetContraction (string s, int start, int end, Contraction [] clist)
318                 {
319                         for (int i = 0; i < clist.Length; i++) {
320                                 Contraction ct = clist [i];
321                                 int diff = ct.Source [0] - s [start];
322                                 if (diff > 0)
323                                         return null; // it's already sorted
324                                 else if (diff < 0)
325                                         continue;
326                                 char [] chars = ct.Source;
327                                 if (end - start < chars.Length)
328                                         continue;
329                                 bool match = true;
330                                 for (int n = 0; n < chars.Length; n++)
331                                         if (s [start + n] != chars [n]) {
332                                                 match = false;
333                                                 break;
334                                         }
335                                 if (match)
336                                         return ct;
337                         }
338                         return null;
339                 }
340
341                 Contraction GetTailContraction (string s, int start, int end)
342                 {
343 //                      int si = s [end - 1];
344 //                      if (si < 128 && (contractionFlags [si / 8] & (1 << (si & 7))) == 0)
345 //                              return null;
346                         Contraction c = GetTailContraction (s, start, end, contractions);
347                         if (c != null || lcid == 127)
348                                 return c;
349                         return GetTailContraction (s, start, end, invariant.contractions);
350                 }
351
352                 Contraction GetTailContraction (string s, int start, int end, Contraction [] clist)
353                 {
354                         if (start == end || end < -1 || start >= s.Length || s.Length <= end + 1)
355                                 throw new SystemException (String.Format ("MONO internal error. Failed to get TailContraction. start = {0} end = {1} string = '{2}'", start, end, s));
356                         for (int i = 0; i < clist.Length; i++) {
357                                 Contraction ct = clist [i];
358                                 char [] chars = ct.Source;
359                                 if (chars.Length > start - end)
360                                         continue;
361                                 if (chars [chars.Length - 1] != s [start])
362                                         continue;
363
364                                 bool match = true;
365                                 for (int n = 0, spos = start - chars.Length + 1;
366                                      n < chars.Length;
367                                      n++, spos++) {
368                                         if (s [spos] != chars [n]) {
369                                                 match = false;
370                                                 break;
371                                         }
372                                 }
373                                 if (match)
374                                         return ct;
375                         }
376                         return null;
377                 }
378
379                 Contraction GetContraction (char c)
380                 {
381                         Contraction ct = GetContraction (c, contractions);
382                         if (ct != null || lcid == 127)
383                                 return ct;
384                         return GetContraction (c, invariant.contractions);
385                 }
386
387                 Contraction GetContraction (char c, Contraction [] clist)
388                 {
389                         for (int i = 0; i < clist.Length; i++) {
390                                 Contraction ct = clist [i];
391                                 if (ct.Source [0] > c)
392                                         return null; // it's already sorted
393                                 if (ct.Source [0] == c && ct.Source.Length == 1)
394                                         return ct;
395                         }
396                         return null;
397                 }
398
399                 int FilterOptions (int i, COpt opt)
400                 {
401                         if ((opt & COpt.IgnoreWidth) != 0) {
402                                 int x = Uni.ToWidthCompat (i);
403                                 if (x != 0)
404                                         i = x;
405                         }
406                         if ((opt & COpt.OrdinalIgnoreCase) != 0)
407                                 i = textInfo.ToLower ((char) i);
408                         if ((opt & COpt.IgnoreCase) != 0)
409                                 i = textInfo.ToLower ((char) i);
410                         if ((opt & COpt.IgnoreKanaType) != 0)
411                                 i = Uni.ToKanaTypeInsensitive (i);
412                         return i;
413                 }
414
415                 enum ExtenderType {
416                         None,
417                         Simple,
418                         Voiced,
419                         Conditional,
420                         Buggy,
421                 }
422
423                 ExtenderType GetExtenderType (int i)
424                 {
425                         // LAMESPEC: Windows expects true for U+3005, but 
426                         // sometimes it does not represent to repeat just
427                         // one character.
428                         // Windows also expects true for U+3031 and U+3032,
429                         // but they should *never* repeat one character.
430
431                         // U+2015 becomes an extender only when it is Japanese
432                         if (i == 0x2015)
433                                 return lcid == 16 ? ExtenderType.Conditional :
434                                         ExtenderType.None;
435
436                         if (i < 0x3005 || i > 0xFF70)
437                                 return ExtenderType.None;
438                         if (i >= 0xFE7C) {
439                                 switch (i) {
440                                 case 0xFE7C:
441                                 case 0xFE7D:
442                                         return ExtenderType.Simple;
443                                 case 0xFF70:
444                                         return ExtenderType.Conditional;
445                                 case 0xFF9E:
446                                 case 0xFF9F:
447                                         return ExtenderType.Voiced;
448                                 }
449                         }
450                         if (i > 0x30FE)
451                                 return ExtenderType.None;
452                         switch (i) {
453                         case 0x3005: // LAMESPEC: see above.
454                                 return ExtenderType.Buggy;
455                         case 0x3031: // LAMESPEC: see above.
456                         case 0x3032: // LAMESPEC: see above.
457                         case 0x309D:
458                         case 0x30FD:
459                                 return ExtenderType.Simple;
460                         case 0x309E:
461                         case 0x30FE:
462                                 return ExtenderType.Voiced;
463                         case 0x30FC:
464                                 return ExtenderType.Conditional;
465                         default:
466                                 return ExtenderType.None;
467                         }
468                 }
469
470                 static byte ToDashTypeValue (ExtenderType ext, COpt opt)
471                 {
472                         if ((opt & COpt.IgnoreNonSpace) != 0) // LAMESPEC: huh, why?
473                                 return 3;
474                         switch (ext) {
475                         case ExtenderType.None:
476                                 return 3;
477                         case ExtenderType.Conditional:
478                                 return 5;
479                         default:
480                                 return 4;
481                         }
482                 }
483
484                 int FilterExtender (int i, ExtenderType ext, COpt opt)
485                 {
486                         if (ext == ExtenderType.Conditional &&
487                                 Uni.HasSpecialWeight ((char) i)) {
488                                 bool half = IsHalfKana ((char) i, opt);
489                                 bool katakana = !Uni.IsHiragana ((char) i);
490                                 switch (Level1 (i) & 7) {
491                                 case 2:
492                                         return half ? 0xFF71 : katakana ? 0x30A2 : 0x3042;
493                                 case 3:
494                                         return half ? 0xFF72 : katakana ? 0x30A4 : 0x3044;
495                                 case 4:
496                                         return half ? 0xFF73 : katakana ? 0x30A6 : 0x3046;
497                                 case 5:
498                                         return half ? 0xFF74 : katakana ? 0x30A8 : 0x3048;
499                                 case 6:
500                                         return half ? 0xFF75 : katakana ? 0x30AA : 0x304A;
501                                 }
502                         }
503                         return i;
504                 }
505
506                 static bool IsIgnorable (int i, COpt opt)
507                 {
508                         return Uni.IsIgnorable (i, (byte) (1 +
509                                 ((opt & COpt.IgnoreSymbols) != 0 ? 2 : 0) +
510                                 ((opt & COpt.IgnoreNonSpace) != 0 ? 4 : 0)));
511                         
512                 }
513
514                 bool IsSafe (int i)
515                 {
516                         return i / 8 >= unsafeFlags.Length ? true : (unsafeFlags [i / 8] & (1 << (i % 8))) == 0;
517                 }
518
519                 #region GetSortKey()
520
521                 public SortKey GetSortKey (string s)
522                 {
523                         return GetSortKey (s, CompareOptions.None);
524                 }
525
526                 public SortKey GetSortKey (string s, CompareOptions options)
527                 {
528                         return GetSortKey (s, 0, s.Length, options);
529                 }
530
531                 public SortKey GetSortKey (string s, int start, int length, CompareOptions options)
532                 {
533                         SortKeyBuffer buf = new SortKeyBuffer (lcid);
534                         buf.Initialize (options, lcid, s, frenchSort);
535                         int end = start + length;
536                         GetSortKey (s, start, end, buf, options);
537                         return buf.GetResultAndReset ();
538                 }
539
540                 unsafe void GetSortKey (string s, int start, int end,
541                         SortKeyBuffer buf, CompareOptions opt)
542                 {
543                         byte* prevbuf = stackalloc byte [4];
544                         ClearBuffer (prevbuf, 4);
545                         Context ctx = new Context (opt, null, null, null, null, prevbuf);
546
547                         for (int n = start; n < end; n++) {
548                                 int i = s [n];
549
550                                 ExtenderType ext = GetExtenderType (i);
551                                 if (ext != ExtenderType.None) {
552                                         i = FilterExtender (ctx.PrevCode, ext, opt);
553                                         if (i >= 0)
554                                                 FillSortKeyRaw (i, ext, buf, opt);
555                                         else if (ctx.PrevSortKey != null) {
556                                                 byte* b = ctx.PrevSortKey;
557                                                 buf.AppendNormal (
558                                                         b [0],
559                                                         b [1],
560                                                         b [2] != 1 ? b [2] : Level2 (i, ext),
561                                                         b [3] != 1 ? b [3] : Uni.Level3 (i));
562                                         }
563                                         // otherwise do nothing.
564                                         // (if the extender is the first char
565                                         // in the string, then just ignore.)
566                                         continue;
567                                 }
568
569                                 if (IsIgnorable (i, opt))
570                                         continue;
571                                 i = FilterOptions (i, opt);
572
573                                 Contraction ct = GetContraction (s, n, end);
574                                 if (ct != null) {
575                                         if (ct.Replacement != null) {
576                                                 GetSortKey (ct.Replacement, 0, ct.Replacement.Length, buf, opt);
577                                         } else {
578                                                 byte* b = ctx.PrevSortKey;
579                                                 for (int bi = 0; bi < ct.SortKey.Length; bi++)
580                                                         b [bi] = ct.SortKey [bi];
581                                                 buf.AppendNormal (
582                                                         b [0],
583                                                         b [1],
584                                                         b [2] != 1 ? b [2] : Level2 (i, ext),
585                                                         b [3] != 1 ? b [3] : Uni.Level3 (i));
586                                                 ctx.PrevCode = -1;
587                                         }
588                                         n += ct.Source.Length - 1;
589                                 }
590                                 else {
591                                         if (!Uni.IsIgnorableNonSpacing (i))
592                                                 ctx.PrevCode = i;
593                                         FillSortKeyRaw (i, ExtenderType.None, buf, opt);
594                                 }
595                         }
596                 }
597
598                 void FillSortKeyRaw (int i, ExtenderType ext,
599                         SortKeyBuffer buf, CompareOptions opt)
600                 {
601                         if (0x3400 <= i && i <= 0x4DB5) {
602                                 int diff = i - 0x3400;
603                                 buf.AppendCJKExtension (
604                                         (byte) (0x10 + diff / 254),
605                                         (byte) (diff % 254 + 2));
606                                 return;
607                         }
608
609                         UnicodeCategory uc = char.GetUnicodeCategory ((char) i);
610                         switch (uc) {
611                         case UnicodeCategory.PrivateUse:
612                                 int diff = i - 0xE000;
613                                 buf.AppendNormal (
614                                         (byte) (0xE5 + diff / 254),
615                                         (byte) (diff % 254 + 2),
616                                         0,
617                                         0);
618                                 return;
619                         case UnicodeCategory.Surrogate:
620                                 FillSurrogateSortKeyRaw (i, buf);
621                                 return;
622                         }
623
624                         byte level2 = Level2 (i, ext);
625                         if (Uni.HasSpecialWeight ((char) i)) {
626                                 byte level1 = Level1 (i);
627                                 buf.AppendKana (
628                                         Category (i),
629                                         level1,
630                                         level2,
631                                         Uni.Level3 (i),
632                                         Uni.IsJapaneseSmallLetter ((char) i),
633                                         ToDashTypeValue (ext, opt),
634                                         !Uni.IsHiragana ((char) i),
635                                         IsHalfKana ((char) i, opt)
636                                         );
637                                 if ((opt & COpt.IgnoreNonSpace) == 0 && ext == ExtenderType.Voiced)
638                                         // Append voice weight
639                                         buf.AppendNormal (1, 1, 1, 0);
640                         }
641                         else
642                                 buf.AppendNormal (
643                                         Category (i),
644                                         Level1 (i),
645                                         level2,
646                                         Uni.Level3 (i));
647                 }
648
649                 void FillSurrogateSortKeyRaw (int i, SortKeyBuffer buf)
650                 {
651                         int diffbase = 0;
652                         int segment = 0;
653                         byte lower = 0;
654
655                         if (i < 0xD840) {
656                                 diffbase = 0xD800;
657                                 segment = 0x41;
658                                 lower = (byte) ((i == 0xD800) ? 0x3E : 0x3F);
659                         } else if (0xD840 <= i && i < 0xD880) {
660                                 diffbase = 0xD840;
661                                 segment = 0xF2;
662                                 lower = 0x3E;
663                         } else if (0xDB80 <= i && i < 0xDC00) {
664                                 diffbase = 0xDB80 - 0x40;
665                                 segment = 0xFE;
666                                 lower = 0x3E;
667                         } else {
668                                 diffbase = 0xDC00 - 0xF8 + 2;
669                                 segment = 0x41;
670                                 lower = 0x3F;
671                         }
672                         int diff = i - diffbase;
673
674                         buf.AppendNormal (
675                                 (byte) (segment + diff / 254),
676                                 (byte) (diff % 254 + 2),
677                                 lower,
678                                 lower);
679                 }
680
681                 #endregion
682
683                 #region Compare()
684
685                 public int Compare (string s1, string s2)
686                 {
687                         return Compare (s1, 0, s1.Length, s2, 0, s2.Length, CompareOptions.None);
688                 }
689 /*
690                 private int CompareOrdinal (string s1, int idx1, int len1,
691                         string s2, int idx2, int len2)
692                 {
693                         int min = len1 < len2 ? len1 : len2;
694                         int end1 = idx1 + min;
695                         int end2 = idx2 + min;
696                         if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
697                                 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
698                         for (int i1 = idx1, i2 = idx2;
699                                 i1 < end1 && i2 < end2; i1++, i2++)
700                                 if (s1 [i1] != s2 [i2])
701                                         return s1 [i1] - s2 [i2];
702                         return len1 == len2 ? 0 :
703                                 len1 == min ? - 1 : 1;
704                 }
705
706                 // mostly equivalent to CompareOrdinal, but the return value is
707                 // not based on codepoints.
708                 private int CompareQuick (string s1, int idx1, int len1,
709                         string s2, int idx2, int len2, out bool sourceConsumed,
710                         out bool targetConsumed, bool immediateBreakup)
711                 {
712                         sourceConsumed = false;
713                         targetConsumed = false;
714                         int min = len1 < len2 ? len1 : len2;
715                         int end1 = idx1 + min;
716                         int end2 = idx2 + min;
717                         if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
718                                 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
719                         for (int i1 = idx1, i2 = idx2;
720                                 i1 < end1 && i2 < end2; i1++, i2++)
721                                 if (s1 [i1] != s2 [i2]) {
722                                         if (immediateBreakup)
723                                                 return -1;
724                                         int ret = Category (s1 [i1]) - Category (s2 [i2]);
725                                         if (ret == 0)
726                                                 ret = Level1 (s1 [i1]) - Level1 (s2 [i2]);
727                                         // no level2 and 4
728                                         if (ret == 0)
729                                                 ret = Uni.Level3 (s1 [i1]) - Uni.Level3 (s2 [i2]);
730                                         if (ret == 0)
731                                                 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. '{0}' {2} {3} '{1}' {4} {5}", s1, s2, idx1, end1, idx2, end2));
732                                         return ret;
733                                 }
734                         sourceConsumed = len1 <= len2;
735                         targetConsumed = len1 >= len2;
736                         return len1 == len2 ? 0 :
737                                 len1 == min ? - 1 : 1;
738                 }
739
740                 private int CompareOrdinalIgnoreCase (string s1, int idx1, int len1,
741                         string s2, int idx2, int len2)
742                 {
743                         int min = len1 < len2 ? len1 : len2;
744                         int end1 = idx1 + min;
745                         int end2 = idx2 + min;
746                         if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
747                                 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
748                         TextInfo ti = invariant.textInfo;
749                         for (int i1 = idx1, i2 = idx2;
750                                 i1 < end1 && i2 < end2; i1++, i2++)
751                                 if (ti.ToLower (s1 [i1]) != ti.ToLower (s2 [i2]))
752                                         return ti.ToLower (s1 [i1]) - ti.ToLower (s2 [i2]);
753                         return len1 == len2 ? 0 :
754                                 len1 == min ? - 1 : 1;
755                 }
756 */
757                 internal unsafe int Compare (string s1, int idx1, int len1,
758                         string s2, int idx2, int len2, CompareOptions options)
759                 {
760 #if false // stable easy version, depends on GetSortKey().
761                         SortKey sk1 = GetSortKey (s1, idx1, len1, options);
762                         SortKey sk2 = GetSortKey (s2, idx2, len2, options);
763                         byte [] d1 = sk1.KeyData;
764                         byte [] d2 = sk2.KeyData;
765                         int len = d1.Length > d2.Length ? d2.Length : d1.Length;
766                         for (int i = 0; i < len; i++)
767                                 if (d1 [i] != d2 [i])
768                                         return d1 [i] < d2 [i] ? -1 : 1;
769                         return d1.Length == d2.Length ? 0 : d1.Length < d2.Length ? -1 : 1;
770 #else
771                         byte* sk1 = stackalloc byte [4];
772                         byte* sk2 = stackalloc byte [4];
773                         ClearBuffer (sk1, 4);
774                         ClearBuffer (sk2, 4);
775                         Context ctx = new Context (options, null, null, sk1, sk2, null);
776                         //      QuickCheckPossible (s1, idx1, idx1 + len1, s2, idx2, idx2 + len2));
777
778                         bool dummy, dummy2;
779                         int ret = CompareInternal (s1, idx1, len1, s2, idx2, len2, out dummy, out dummy2, true, false, ref ctx);
780                         return ret == 0 ? 0 : ret < 0 ? -1 : 1;
781 #endif
782                 }
783
784                 unsafe void ClearBuffer (byte* buffer, int size)
785                 {
786                         for (int i = 0; i < size; i++)
787                                 buffer [i] = 0;
788                 }
789
790 /*
791                 bool QuickCheckPossible (string s1, int idx1, int end1,
792                         string s2, int idx2, int end2)
793                 {
794 #if true
795                         // looks like it is buggy and inefficient, so just disable it.
796                         return false;
797 #else
798                         if (QuickCheckDisabled)
799                                 return false;
800 //                      if (s1.Length > 100 || s2.Length > 100)
801 //                              return false;
802                         for (int i = idx1; i < end1; i++)
803                                 if (s1 [i] < 0x20 && (s1 [i] < '\x9' || s1 [i] > '\xD') || s1 [i] >= 0x80 || s1 [i] == '-' || s1 [i] == '\'')
804                                         return false;
805                         for (int i = idx2; i < end2; i++)
806                                 if (s2 [i] < 0x20 && (s2 [i] < '\x9' || s2 [i] > '\xD') || s2 [i] >= 0x80 || s2 [i] == '-' || s2 [i] == '\'')
807                                         return false;
808                         return true;
809 #endif
810                 }
811 */
812
813                 unsafe int CompareInternal (string s1, int idx1, int len1, string s2,
814                         int idx2, int len2,
815                         out bool targetConsumed, out bool sourceConsumed,
816                         bool skipHeadingExtenders, bool immediateBreakup,
817                         ref Context ctx)
818                 {
819                         COpt opt = ctx.Option;
820                         int start1 = idx1;
821                         int start2 = idx2;
822                         int end1 = idx1 + len1;
823                         int end2 = idx2 + len2;
824                         targetConsumed = false;
825                         sourceConsumed = false;
826                         PreviousInfo prev2 = new PreviousInfo (false);
827
828 //                      if (opt == CompareOptions.None && ctx.QuickCheckPossible)
829 //                              return CompareQuick (s1, idx1, len1, s2, idx2, len2, out sourceConsumed, out targetConsumed, immediateBreakup);
830
831                         // It holds final result that comes from the comparison
832                         // at level 2 or lower. Even if Compare() found the
833                         // difference at level 2 or lower, it still has to
834                         // continue level 1 comparison. FinalResult is used
835                         // when there was no further differences.
836                         int finalResult = 0;
837                         // It holds the comparison level to do. It starts from
838                         // 5, and becomes 1 when we do primary-only comparison.
839                         int currentLevel = 5;
840
841                         int lv5At1 = -1;
842                         int lv5At2 = -1;
843                         int lv5Value1 = 0;
844                         int lv5Value2 = 0;
845
846                         // Skip heading extenders
847                         if (skipHeadingExtenders) {
848                                 for (; idx1 < end1; idx1++)
849                                         if (GetExtenderType (s1 [idx1]) == ExtenderType.None)
850                                                 break;
851                                 for (; idx2 < end2; idx2++)
852                                         if (GetExtenderType (s2 [idx2]) == ExtenderType.None)
853                                                 break;
854                         }
855
856                         ExtenderType ext1 = ExtenderType.None;
857                         ExtenderType ext2 = ExtenderType.None;
858
859                         int quickCheckPos1 = idx1;
860                         int quickCheckPos2 = idx2;
861                         bool stringSort = (opt & COpt.StringSort) != 0;
862                         bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
863                         Escape escape1 = new Escape ();
864                         Escape escape2 = new Escape ();
865
866                         while (true) {
867                                 for (; idx1 < end1; idx1++)
868                                         if (!IsIgnorable (s1 [idx1], opt))
869                                                 break;
870                                 for (; idx2 < end2; idx2++)
871                                         if (!IsIgnorable (s2 [idx2], opt))
872                                                 break;
873
874                                 if (idx1 >= end1) {
875                                         if (escape1.Source == null)
876                                                 break;
877                                         s1 = escape1.Source;
878                                         start1 = escape1.Start;
879                                         idx1 = escape1.Index;
880                                         end1 = escape1.End;
881                                         quickCheckPos1 = escape1.Optional;
882                                         escape1.Source = null;
883                                         continue;
884                                 }
885                                 if (idx2 >= end2) {
886                                         if (escape2.Source == null)
887                                                 break;
888                                         s2 = escape2.Source;
889                                         start2 = escape2.Start;
890                                         idx2 = escape2.Index;
891                                         end2 = escape2.End;
892                                         quickCheckPos2 = escape2.Optional;
893                                         escape2.Source = null;
894                                         continue;
895                                 }
896 #if true
897 // If comparison is unstable, then this part is one of the most doubtful part.
898 // Here we run quick codepoint comparison and run back to "stable" area to
899 // compare characters.
900
901                                 // Strictly to say, even the first character
902                                 // could be compared here, but it messed
903                                 // backward step, so I just avoided mess.
904                                 if (quickCheckPos1 < idx1 && quickCheckPos2 < idx2) {
905                                         while (idx1 < end1 && idx2 < end2 &&
906                                                 s1 [idx1] == s2 [idx2]) {
907                                                 idx1++;
908                                                 idx2++;
909                                         }
910                                         if (idx1 == end1 || idx2 == end2)
911                                                 continue; // check replacement
912
913                                         int backwardEnd1 = quickCheckPos1;
914                                         int backwardEnd2 = quickCheckPos2;
915                                         quickCheckPos1 = idx1;
916                                         quickCheckPos2 = idx2;
917
918                                         idx1--;
919                                         idx2--;
920
921                                         for (;idx1 > backwardEnd1; idx1--)
922                                                 if (Category (s1 [idx1]) != 1)
923                                                         break;
924                                         for (;idx2 > backwardEnd2; idx2--)
925                                                 if (Category (s2 [idx2]) != 1)
926                                                         break;
927                                         for (;idx1 > backwardEnd1; idx1--)
928                                                 if (IsSafe (s1 [idx1]))
929                                                         break;
930                                         for (;idx2 > backwardEnd2; idx2--)
931                                                 if (IsSafe (s2 [idx2]))
932                                                         break;
933                                 }
934 #endif
935
936                                 int cur1 = idx1;
937                                 int cur2 = idx2;
938                                 byte* sk1 = null;
939                                 byte* sk2 = null;
940                                 int i1 = FilterOptions (s1 [idx1], opt);
941                                 int i2 = FilterOptions (s2 [idx2], opt);
942                                 bool special1 = false;
943                                 bool special2 = false;
944
945                                 // If current character is an expander, then
946                                 // repeat the previous character.
947                                 ext1 = GetExtenderType (i1);
948                                 if (ext1 != ExtenderType.None) {
949                                         if (ctx.PrevCode < 0) {
950                                                 if (ctx.PrevSortKey == null) {
951                                                         // nothing to extend
952                                                         idx1++;
953                                                         continue;
954                                                 }
955                                                 sk1 = ctx.PrevSortKey;
956                                         }
957                                         else
958                                                 i1 = FilterExtender (ctx.PrevCode, ext1, opt);
959                                 }
960                                 ext2 = GetExtenderType (i2);
961                                 if (ext2 != ExtenderType.None) {
962                                         if (prev2.Code < 0) {
963                                                 if (prev2.SortKey == null) {
964                                                         // nothing to extend
965                                                         idx2++;
966                                                         continue;
967                                                 }
968                                                 sk2 = prev2.SortKey;
969                                         }
970                                         else
971                                                 i2 = FilterExtender (prev2.Code, ext2, opt);
972                                 }
973
974                                 byte cat1 = Category (i1);
975                                 byte cat2 = Category (i2);
976
977                                 // Handle special weight characters
978                                 if (cat1 == 6) {
979                                         if (!stringSort && currentLevel == 5) {
980                                                 lv5At1 = escape1.Source != null ?
981                                                         escape1.Index - escape1.Start :
982                                                         cur1 - start1;
983                                                 // here Windows has a bug that it does
984                                                 // not consider thirtiary weight.
985                                                 lv5Value1 = Level1 (i1) << 8 + Uni.Level3 (i1);
986                                         }
987                                         ctx.PrevCode = i1;
988                                         idx1++;
989                                 }
990                                 if (cat2 == 6) {
991                                         if (!stringSort && currentLevel == 5) {
992                                                 lv5At2 = escape2.Source != null ?
993                                                         escape2.Index - escape2.Start :
994                                                         cur2 - start2;
995                                                 // here Windows has a bug that it does
996                                                 // not consider thirtiary weight.
997                                                 lv5Value2 = Level1 (i2) << 8 + Uni.Level3 (i2);
998                                         }
999                                         prev2.Code = i2;
1000                                         idx2++;
1001                                 }
1002                                 if (cat1 == 6 || cat2 == 6) {
1003                                         if (currentLevel == 5) {
1004                                                 if (lv5Value1 == lv5Value2) {
1005                                                         // There is not really difference here.
1006                                                         lv5At1 = lv5At2 = -1;
1007                                                         lv5Value1 = lv5Value2 = 0;
1008                                                 }
1009                                                 else
1010                                                         currentLevel = 4;
1011                                         }
1012                                         continue;
1013                                 }
1014
1015                                 Contraction ct1 = null;
1016                                 if (ext1 == ExtenderType.None)
1017                                         ct1 = GetContraction (s1, idx1, end1);
1018
1019                                 int offset1 = 1;
1020                                 if (sk1 != null)
1021                                         offset1 = 1;
1022                                 else if (ct1 != null) {
1023                                         offset1 = ct1.Source.Length;
1024                                         if (ct1.SortKey != null) {
1025                                                 sk1 = ctx.Buffer1;
1026                                                 for (int i = 0; i < ct1.SortKey.Length; i++)
1027                                                         sk1 [i] = ct1.SortKey [i];
1028                                                 ctx.PrevCode = -1;
1029                                                 ctx.PrevSortKey = sk1;
1030                                         }
1031                                         else if (escape1.Source == null) {
1032                                                 escape1.Source = s1;
1033                                                 escape1.Start = start1;
1034                                                 escape1.Index = cur1 + ct1.Source.Length;
1035                                                 escape1.End = end1;
1036                                                 escape1.Optional = quickCheckPos1;
1037                                                 s1 = ct1.Replacement;
1038                                                 idx1 = 0;
1039                                                 start1 = 0;
1040                                                 end1 = s1.Length;
1041                                                 quickCheckPos1 = 0;
1042                                                 continue;
1043                                         }
1044                                 }
1045                                 else {
1046                                         sk1 = ctx.Buffer1;
1047                                         sk1 [0] = cat1;
1048                                         sk1 [1] = Level1 (i1);
1049                                         if (!ignoreNonSpace && currentLevel > 1)
1050                                                 sk1 [2] = Level2 (i1, ext1);
1051                                         if (currentLevel > 2)
1052                                                 sk1 [3] = Uni.Level3 (i1);
1053                                         if (currentLevel > 3)
1054                                                 special1 = Uni.HasSpecialWeight ((char) i1);
1055                                         if (cat1 > 1)
1056                                                 ctx.PrevCode = i1;
1057                                 }
1058
1059                                 Contraction ct2 = null;
1060                                 if (ext2 == ExtenderType.None)
1061                                         ct2 = GetContraction (s2, idx2, end2);
1062
1063                                 if (sk2 != null)
1064                                         idx2++;
1065                                 else if (ct2 != null) {
1066                                         idx2 += ct2.Source.Length;
1067                                         if (ct2.SortKey != null) {
1068                                                 sk2 = ctx.Buffer2;
1069                                                 for (int i = 0; i < ct2.SortKey.Length; i++)
1070                                                         sk2 [i] = ct2.SortKey [i];
1071                                                 prev2.Code = -1;
1072                                                 prev2.SortKey = sk2;
1073                                         }
1074                                         else if (escape2.Source == null) {
1075                                                 escape2.Source = s2;
1076                                                 escape2.Start = start2;
1077                                                 escape2.Index = cur2 + ct2.Source.Length;
1078                                                 escape2.End = end2;
1079                                                 escape2.Optional = quickCheckPos2;
1080                                                 s2 = ct2.Replacement;
1081                                                 idx2 = 0;
1082                                                 start2 = 0;
1083                                                 end2 = s2.Length;
1084                                                 quickCheckPos2 = 0;
1085                                                 continue;
1086                                         }
1087                                 }
1088                                 else {
1089                                         sk2 = ctx.Buffer2;
1090                                         sk2 [0] = cat2;
1091                                         sk2 [1] = Level1 (i2);
1092                                         if (!ignoreNonSpace && currentLevel > 1)
1093                                                 sk2 [2] = Level2 (i2, ext2);
1094                                         if (currentLevel > 2)
1095                                                 sk2 [3] = Uni.Level3 (i2);
1096                                         if (currentLevel > 3)
1097                                                 special2 = Uni.HasSpecialWeight ((char) i2);
1098                                         if (cat2 > 1)
1099                                                 prev2.Code = i2;
1100                                         idx2++;
1101                                 }
1102
1103                                 // Add offset here so that it does not skip
1104                                 // idx1 while s2 has a replacement.
1105                                 idx1 += offset1;
1106
1107                                 if (!ignoreNonSpace) {
1108                                         // add diacritical marks in s1 here
1109                                         while (idx1 < end1) {
1110                                                 if (Category (s1 [idx1]) != 1)
1111                                                         break;
1112                                                 if (sk1 [2] == 0)
1113                                                         sk1 [2] = 2;
1114                                                 sk1 [2] = (byte) (sk1 [2] + 
1115                                                         Level2 (s1 [idx1], ExtenderType.None));
1116                                                 idx1++;
1117                                         }
1118
1119                                         // add diacritical marks in s2 here
1120                                         while (idx2 < end2) {
1121                                                 if (Category (s2 [idx2]) != 1)
1122                                                         break;
1123                                                 if (sk2 [2] == 0)
1124                                                         sk2 [2] = 2;
1125                                                 sk2 [2] = (byte) (sk2 [2] + 
1126                                                         Level2 (s2 [idx2], ExtenderType.None));
1127                                                 idx2++;
1128                                         }
1129                                 }
1130
1131                                 int ret = sk1 [0] - sk2 [0];
1132                                 ret = ret != 0 ? ret : sk1 [1] - sk2 [1];
1133                                 if (ret != 0)
1134                                         return ret;
1135                                 if (currentLevel == 1)
1136                                         continue;
1137                                 if (!ignoreNonSpace) {
1138                                         ret = sk1 [2] - sk2 [2];
1139                                         if (ret != 0) {
1140                                                 finalResult = ret;
1141                                                 if (immediateBreakup)
1142                                                         return -1; // different
1143                                                 currentLevel = frenchSort ? 2 : 1;
1144                                                 continue;
1145                                         }
1146                                 }
1147                                 if (currentLevel == 2)
1148                                         continue;
1149                                 ret = sk1 [3] - sk2 [3];
1150                                 if (ret != 0) {
1151                                         finalResult = ret;
1152                                         if (immediateBreakup)
1153                                                 return -1; // different
1154                                         currentLevel = 2;
1155                                         continue;
1156                                 }
1157                                 if (currentLevel == 3)
1158                                         continue;
1159                                 if (special1 != special2) {
1160                                         if (immediateBreakup)
1161                                                 return -1; // different
1162                                         finalResult = special1 ? 1 : -1;
1163                                         currentLevel = 3;
1164                                         continue;
1165                                 }
1166                                 if (special1) {
1167                                         ret = CompareFlagPair (
1168                                                 !Uni.IsJapaneseSmallLetter ((char) i1),
1169                                                 !Uni.IsJapaneseSmallLetter ((char) i2));
1170                                         ret = ret != 0 ? ret :
1171                                                 ToDashTypeValue (ext1, opt) -
1172                                                 ToDashTypeValue (ext2, opt);
1173                                         ret = ret != 0 ? ret : CompareFlagPair (
1174                                                 Uni.IsHiragana ((char) i1),
1175                                                 Uni.IsHiragana ((char) i2));
1176                                         ret = ret != 0 ? ret : CompareFlagPair (
1177                                                 !IsHalfKana ((char) i1, opt),
1178                                                 !IsHalfKana ((char) i2, opt));
1179                                         if (ret != 0) {
1180                                                 if (immediateBreakup)
1181                                                         return -1; // different
1182                                                 finalResult = ret;
1183                                                 currentLevel = 3;
1184                                                 continue;
1185                                         }
1186                                 }
1187                         }
1188
1189                         // If there were only level 3 or lower differences,
1190                         // then we still have to find diacritical differences
1191                         // if any.
1192                         if (!ignoreNonSpace &&
1193                                 finalResult != 0 && currentLevel > 2) {
1194                                 while (idx1 < end1 && idx2 < end2) {
1195                                         if (!Uni.IsIgnorableNonSpacing (s1 [idx1]))
1196                                                 break;
1197                                         if (!Uni.IsIgnorableNonSpacing (s2 [idx2]))
1198                                                 break;
1199                                         finalResult = Level2 (FilterOptions (s1 [idx1], opt), ext1) - Level2 (FilterOptions (s2 [idx2], opt), ext2);
1200                                         if (finalResult != 0)
1201                                                 break;
1202                                         idx1++;
1203                                         idx2++;
1204                                         // they should work only for the first character
1205                                         ext1 = ExtenderType.None;
1206                                         ext2 = ExtenderType.None;
1207                                 }
1208                         }
1209                         if (currentLevel == 1 && finalResult != 0) {
1210                                 while (idx1 < end1) {
1211                                         if (Uni.IsIgnorableNonSpacing (s1 [idx1]))
1212                                                 idx1++;
1213                                         else
1214                                                 break;
1215                                 }
1216                                 while (idx2 < end2) {
1217                                         if (Uni.IsIgnorableNonSpacing (s2 [idx2]))
1218                                                 idx2++;
1219                                         else
1220                                                 break;
1221                                 }
1222                         }
1223                         // we still have to check level 5
1224                         if (finalResult == 0) {
1225                                 if (lv5At1 < 0 && lv5At2 >= 0)
1226                                         finalResult = -1;
1227                                 else if (lv5At2 < 0 && lv5At1 >= 0)
1228                                         finalResult = 1;
1229                                 else {
1230                                         finalResult = lv5At1 - lv5At2;
1231                                         if (finalResult == 0)
1232                                                 finalResult = lv5Value1 - lv5Value2;
1233                                 }
1234                         }
1235                         if (finalResult == 0) {
1236                                 if (idx2 == end2)
1237                                         targetConsumed = true;
1238                                 if (idx1 == end1)
1239                                         sourceConsumed = true;
1240                         }
1241                         return idx1 != end1 ? 1 : idx2 == end2 ? finalResult : -1;
1242                 }
1243
1244                 int CompareFlagPair (bool b1, bool b2)
1245                 {
1246                         return b1 == b2 ? 0 : b1 ? 1 : -1;
1247                 }
1248
1249                 #endregion
1250
1251                 #region IsPrefix() and IsSuffix()
1252
1253                 public bool IsPrefix (string src, string target, CompareOptions opt)
1254                 {
1255                         return IsPrefix (src, target, 0, src.Length, opt);
1256                 }
1257
1258                 public unsafe bool IsPrefix (string s, string target, int start, int length, CompareOptions opt)
1259                 {
1260                         if (target.Length == 0)
1261                                 return true;
1262                         byte* sk1 = stackalloc byte [4];
1263                         byte* sk2 = stackalloc byte [4];
1264                         ClearBuffer (sk1, 4);
1265                         ClearBuffer (sk2, 4);
1266                         Context ctx = new Context (opt, null, null, sk1, sk2, null); 
1267                                 //QuickCheckPossible (s, start, start + length, target, 0, target.Length));
1268                         return IsPrefix (s, target, start, length, true, ref ctx);
1269                 }
1270
1271                 unsafe bool IsPrefix (string s, string target, int start, int length, bool skipHeadingExtenders, ref Context ctx)
1272                 {
1273                         bool consumed, dummy;
1274                         CompareInternal (s, start, length,
1275                                 target, 0, target.Length,
1276                                 out consumed, out dummy, skipHeadingExtenders,
1277                                 true, ref ctx);
1278                         return consumed;
1279                 }
1280
1281                 // IsSuffix()
1282
1283                 public bool IsSuffix (string src, string target, CompareOptions opt)
1284                 {
1285                         return IsSuffix (src, target, src.Length - 1, src.Length, opt);
1286                 }
1287
1288                 public unsafe bool IsSuffix (string s, string target, int start, int length, CompareOptions opt)
1289                 {
1290                         if (target.Length == 0)
1291                                 return true;
1292                         int last = LastIndexOf (s, target, start, length, opt);
1293                         return last >= 0 && Compare (s, last, s.Length - last, target, 0, target.Length, opt) == 0;
1294 /*
1295                         // quick check : simple codepoint comparison
1296                         if (s.Length >= target.Length) {
1297                                 int si = start;
1298                                 int se = start - length;
1299                                 for (int i = target.Length - 1; si >= se && i >= 0; i--, si--)
1300                                         if (s [si] != target [i])
1301                                                 break;
1302                                 if (si == start + target.Length)
1303                                         return true;
1304                         }
1305
1306                         PreviousInfo prev = new PreviousInfo (false);
1307                         byte* sk1 = stackalloc byte [4];
1308                         byte* sk2 = stackalloc byte [4];
1309                         ClearBuffer (sk1, 4);
1310                         ClearBuffer (sk2, 4);
1311                         return IsSuffix (opt, s, target, start, length, ref prev, sk1, sk2);
1312 */
1313                 }
1314 /*
1315                 unsafe bool IsSuffix (string s, string t, int start, int length, ref Context ctx)
1316                 {
1317                         int tstart = 0;
1318                         COpt opt = ctx.Option;
1319
1320                         for (;tstart < t.Length; tstart++)
1321                                 if (!IsIgnorable (t [tstart], opt))
1322                                         break;
1323                         if (tstart == t.Length)
1324                                 return true; // as if target is String.Empty.
1325
1326 #if false
1327 // This is still not working. If it is not likely to get working, then just remove it.
1328                         int si = start;
1329                         int send = start - length;
1330                         int ti = t.Length - 1;
1331                         int tend = -1;
1332
1333                         int sStep = start + 1;
1334                         int tStep = t.Length;
1335                         bool sourceConsumed, targetConsumed;
1336                         while (true) {
1337                                 for (; send < si; si--)
1338                                         if (!IsIgnorable (s [si]))
1339                                                 break;
1340                                 for (; tend < ti; ti--)
1341                                         if (!IsIgnorable (t [ti]))
1342                                                 break;
1343                                 if (tend == ti)
1344                                         break;
1345                                 for (; send < si; si--)
1346                                         if (IsSafe (s [si]))
1347                                                 break;
1348                                 for (; tend < ti; ti--)
1349                                         if (IsSafe (t [ti]))
1350                                                 break;
1351 Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, length, t, ti, tstart, sStep - si, tStep - ti);
1352                                 if (CompareInternal (opt, s, si, sStep - si,
1353                                         t, ti, tStep - ti,
1354                                         out targetConsumed, out sourceConsumed,
1355                                         true) != 0)
1356                                         return false;
1357                                 if (send == si)
1358                                         return false;
1359                                 sStep = si;
1360                                 tStep = ti;
1361                                 si--;
1362                                 ti--;
1363                         }
1364                         return true;
1365 #else
1366                         // FIXME: it is not efficient for very long target.
1367                         bool sourceConsumed, targetConsumed;
1368                         int mismatchCount = 0;
1369                         for (int i = 0; i < length; i++) {
1370                                 ctx.ClearPrevInfo ();
1371
1372                                 int ret = CompareInternal (s, start - i, i + 1,
1373                                         t, tstart, t.Length - tstart,
1374                                         out targetConsumed,
1375                                         // FIXME: could immediately breakup
1376                                         out sourceConsumed, true, true, ref ctx);
1377                                 if (ret == 0)
1378                                         return true;
1379                                 if (!sourceConsumed && targetConsumed)
1380                                         return false;
1381                                 if (!targetConsumed) {
1382                                         mismatchCount++;
1383                                         if (mismatchCount > Uni.MaxExpansionLength)
1384                                                 // The largest length of an
1385                                                 // expansion is 3, so if the
1386                                                 // target was not consumed more
1387                                                 // than 3 times, then the tail
1388                                                 // character does not match.
1389                                                 return false;
1390                                 }
1391                         }
1392                         return false;
1393 #endif
1394                 }
1395 */
1396                 #endregion
1397
1398                 #region IndexOf() / LastIndexOf()
1399
1400                 // string
1401
1402                 public int IndexOf (string s, string target, CompareOptions opt)
1403                 {
1404                         return IndexOf (s, target, 0, s.Length, opt);
1405                 }
1406
1407                 int QuickIndexOf (string s, string target, int start, int length, out bool testWasUnable)
1408                 {
1409                         int testedSourcePos = -1, testedTargetPos = -1;
1410
1411                         testWasUnable = true;
1412                         if (target.Length == 0)
1413                                 return 0;
1414                         else if (target.Length > length)
1415                                 return -1;
1416                         testWasUnable = false;
1417
1418                         int end = start + length - target.Length + 1;
1419                         for (int i = start; i < end; i++) {
1420                                 bool no = false;
1421                                 for (int j = 0; j < target.Length; j++) {
1422                                         if (testedTargetPos < j) {
1423                                                 char c = target [j];
1424                                                 if (c == 0 || c >= 0x80) {
1425                                                         testWasUnable = true;
1426                                                         return -1;
1427                                                 }
1428                                                 else
1429                                                         testedTargetPos = j;
1430                                         }
1431                                         if (testedSourcePos < i + j) {
1432                                                 char c = s [i + j];
1433                                                 if (c == 0 || c >= 0x80) {
1434                                                         testWasUnable = true;
1435                                                         return -1;
1436                                                 }
1437                                                 else
1438                                                         testedSourcePos = i + j;
1439                                         }
1440                                         if (s [i + j] != target [j]) {
1441                                                 no = true;
1442                                                 break;
1443                                         }
1444                                 }
1445                                 if (no)
1446                                         continue;
1447                                 return i;
1448                         }
1449                         return -1;
1450                 }
1451
1452                 public unsafe int IndexOf (string s, string target, int start, int length, CompareOptions opt)
1453                 {
1454                         if (opt == CompareOptions.Ordinal)
1455                                 return IndexOfOrdinal (s, target, start, length);
1456                         if (opt == CompareOptions.OrdinalIgnoreCase)
1457                                 return IndexOfOrdinalIgnoreCase (s, target, start, length);
1458                         if (opt == CompareOptions.None) {
1459                                 bool testWasUnable;
1460                                 int ret = QuickIndexOf (s, target, start, length, out testWasUnable);
1461                                 if (!testWasUnable)
1462                                         return ret;
1463                         }
1464
1465                         byte* alwaysMatchFlags = stackalloc byte [16];
1466                         byte* neverMatchFlags = stackalloc byte [16];
1467                         byte* targetSortKey = stackalloc byte [4];
1468                         byte* sk1 = stackalloc byte [4];
1469                         byte* sk2 = stackalloc byte [4];
1470                         ClearBuffer (alwaysMatchFlags, 16);
1471                         ClearBuffer (neverMatchFlags, 16);
1472                         ClearBuffer (targetSortKey, 4);
1473                         ClearBuffer (sk1, 4);
1474                         ClearBuffer (sk2, 4);
1475                         Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null);
1476
1477                         return IndexOf (s, target, start, length,
1478                                 targetSortKey, ref ctx);
1479                 }
1480
1481                 // note that it wouldn't be used since ordinal IndexOf()
1482                 // should be directed to icall.
1483                 int IndexOfOrdinal (string s, string target, int start, int length)
1484                 {
1485                         if (target.Length == 0)
1486                                 return 0;
1487                         else if (target.Length > length)
1488                                 return -1;
1489
1490                         int end = start + length - target.Length + 1;
1491                         for (int i = start; i < end; i++) {
1492                                 bool no = false;
1493                                 for (int j = 0; j < target.Length; j++) {
1494                                         if (s [i + j] != target [j]) {
1495                                                 no = true;
1496                                                 break;
1497                                         }
1498                                 }
1499                                 if (no)
1500                                         continue;
1501                                 return i;
1502                         }
1503                         return -1;
1504                 }
1505
1506                 int IndexOfOrdinalIgnoreCase (string s, string target, int start, int length)
1507                 {
1508                         if (target.Length == 0)
1509                                 return 0;
1510                         else if (target.Length > length)
1511                                 return -1;
1512
1513                         int end = start + length - target.Length + 1;
1514                         for (int i = start; i < end; i++) {
1515                                 bool no = false;
1516                                 for (int j = 0; j < target.Length; j++) {
1517                                         // I think almost all text has more lower letters than upper ones. Thus with this invariant comparison ToLower() should be faster since it costs less operations.
1518                                         if (textInfo.ToLower (s [i + j]) != textInfo.ToLower (target [j])) {
1519                                                 no = true;
1520                                                 break;
1521                                         }
1522                                 }
1523                                 if (no)
1524                                         continue;
1525                                 return i;
1526                         }
1527                         return -1;
1528                 }
1529
1530                 // char
1531
1532                 public int IndexOf (string s, char target, CompareOptions opt)
1533                 {
1534                         return IndexOf (s, target, 0, s.Length, opt);
1535                 }
1536
1537                 public unsafe int IndexOf (string s, char target, int start, int length, CompareOptions opt)
1538                 {
1539                         if (opt == CompareOptions.Ordinal)
1540                                 return IndexOfOrdinal (s, target, start, length);
1541                         if (opt == CompareOptions.OrdinalIgnoreCase)
1542                                 return IndexOfOrdinalIgnoreCase (s, target, start, length);
1543                         byte* alwaysMatchFlags = stackalloc byte [16];
1544                         byte* neverMatchFlags = stackalloc byte [16];
1545                         byte* targetSortKey = stackalloc byte [4];
1546                         byte* sk1 = stackalloc byte [4];
1547                         byte* sk2 = stackalloc byte [4];
1548                         ClearBuffer (alwaysMatchFlags, 16);
1549                         ClearBuffer (neverMatchFlags, 16);
1550                         ClearBuffer (targetSortKey, 4);
1551                         ClearBuffer (sk1, 4);
1552                         ClearBuffer (sk2, 4);
1553                         Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null);
1554
1555                         // If target is contraction, then use string search.
1556                         Contraction ct = GetContraction (target);
1557                         if (ct != null) {
1558                                 if (ct.Replacement != null)
1559                                         return IndexOf (s, ct.Replacement,
1560                                                 start, length, targetSortKey, ref ctx);
1561                                 else {
1562                                         for (int i = 0; i < ct.SortKey.Length; i++)
1563                                                 sk2 [i] = ct.SortKey [i];
1564                                         return IndexOfSortKey (s, start, length, sk2, char.MinValue, -1, true, ref ctx);
1565                                 }
1566                         } else {
1567                                 int ti = FilterOptions ((int) target, opt);
1568                                 targetSortKey [0] = Category (ti);
1569                                 targetSortKey [1] = Level1 (ti);
1570                                 if ((opt & COpt.IgnoreNonSpace) == 0)
1571                                         targetSortKey [2] =
1572                                                 Level2 (ti, ExtenderType.None);
1573                                 targetSortKey [3] = Uni.Level3 (ti);
1574                                 return IndexOfSortKey (s, start, length,
1575                                         targetSortKey, target, ti,
1576                                         !Uni.HasSpecialWeight ((char) ti), ref ctx);
1577                         }
1578                 }
1579
1580                 // note that it wouldn't be used since ordinal IndexOf()
1581                 // should be directed to icall.
1582                 int IndexOfOrdinal (string s, char target, int start, int length)
1583                 {
1584                         int end = start + length;
1585                         for (int i = start; i < end; i++)
1586                                 if (s [i] == target)
1587                                         return i;
1588                         return -1;
1589                 }
1590
1591                 int IndexOfOrdinalIgnoreCase (string s, char target, int start, int length)
1592                 {
1593                         int end = start + length;
1594                         target = textInfo.ToLower (target);
1595                         for (int i = start; i < end; i++)
1596                                 if (textInfo.ToLower (s [i]) == target)
1597                                         return i;
1598                         return -1;
1599                 }
1600
1601                 // Searches target byte[] keydata
1602                 unsafe int IndexOfSortKey (string s, int start, int length, byte* sortkey, char target, int ti, bool noLv4, ref Context ctx)
1603                 {
1604                         int end = start + length;
1605                         int idx = start;
1606
1607                         while (idx < end) {
1608                                 int cur = idx;
1609                                 if (MatchesForward (s, ref idx, end, ti, sortkey, noLv4, ref ctx))
1610                                         return cur;
1611                         }
1612                         return -1;
1613                 }
1614
1615                 // Searches string. Search head character (or keydata when
1616                 // the head is contraction sortkey) and try IsPrefix().
1617                 unsafe int IndexOf (string s, string target, int start, int length, byte* targetSortKey, ref Context ctx)
1618                 {
1619                         COpt opt = ctx.Option;
1620                         int tidx = 0;
1621                         for (; tidx < target.Length; tidx++)
1622                                 if (!IsIgnorable (target [tidx], opt))
1623                                         break;
1624                         if (tidx == target.Length)
1625                                 // FIXME: this is likely a hack. A string that is consists of \0 differs from those of other ignorable characters.
1626                                 return IndexOfOrdinal (target, '\0', 0, target.Length) >= 0 ? IndexOfOrdinal (s, target, start, length) : start;
1627                         Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1628                         string replace = ct != null ? ct.Replacement : null;
1629                         byte* sk = replace == null ? targetSortKey : null;
1630                         bool noLv4 = true;
1631                         char tc = char.MinValue;
1632                         int ti = -1;
1633                         if (ct != null && sk != null) {
1634                                 for (int i = 0; i < ct.SortKey.Length; i++)
1635                                         sk [i] = ct.SortKey [i];
1636                         } else if (sk != null) {
1637                                 tc = target [tidx];
1638                                 ti = FilterOptions (target [tidx], opt);
1639                                 sk [0] = Category (ti);
1640                                 sk [1] = Level1 (ti);
1641                                 if ((opt & COpt.IgnoreNonSpace) == 0)
1642                                         sk [2] = Level2 (ti, ExtenderType.None);
1643                                 sk [3] = Uni.Level3 (ti);
1644                                 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1645                         }
1646                         if (sk != null) {
1647                                 for (tidx++; tidx < target.Length; tidx++) {
1648                                         if (Category (target [tidx]) != 1)
1649                                                 break;
1650                                         if (sk [2] == 0)
1651                                                 sk [2] = 2;
1652                                         sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1653                                 }
1654                         }
1655
1656                         do {
1657                                 int idx = 0;
1658                                 if (replace != null)
1659                                         idx = IndexOf (s, replace, start, length, targetSortKey, ref ctx);
1660                                 else
1661                                         idx = IndexOfSortKey (s, start, length, sk, tc, ti, noLv4, ref ctx);
1662                                 if (idx < 0)
1663                                         return -1;
1664                                 length -= idx - start;
1665                                 start = idx;
1666                                 if (IsPrefix (s, target, start, length, false, ref ctx))
1667                                         return idx;
1668                                 Contraction cts = GetContraction (s, start, length);
1669                                 if (cts != null) {
1670                                         start += cts.Source.Length;
1671                                         length -= cts.Source.Length;
1672                                 } else {
1673                                         start++;
1674                                         length--;
1675                                 }
1676                         } while (length > 0);
1677                         return -1;
1678                 }
1679
1680                 //
1681                 // There are the same number of LastIndexOf() methods as
1682                 // IndexOf() related methods, with the same functionalities
1683                 // for each.
1684                 //
1685
1686                 // string
1687
1688                 public int LastIndexOf (string s, string target, CompareOptions opt)
1689                 {
1690                         return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1691                 }
1692
1693                 public unsafe int LastIndexOf (string s, string target, int start, int length, CompareOptions opt)
1694                 {
1695                         if (opt == CompareOptions.Ordinal)
1696                                 return LastIndexOfOrdinal (s, target, start, length);
1697                         if (opt == CompareOptions.OrdinalIgnoreCase)
1698                                 return LastIndexOfOrdinalIgnoreCase (s, target, start, length);
1699                         byte* alwaysMatchFlags = stackalloc byte [16];
1700                         byte* neverMatchFlags = stackalloc byte [16];
1701                         byte* targetSortKey = stackalloc byte [4];
1702                         byte* sk1 = stackalloc byte [4];
1703                         byte* sk2 = stackalloc byte [4];
1704                         ClearBuffer (alwaysMatchFlags, 16);
1705                         ClearBuffer (neverMatchFlags, 16);
1706                         ClearBuffer (targetSortKey, 4);
1707                         ClearBuffer (sk1, 4);
1708                         ClearBuffer (sk2, 4);
1709                         // For some unknown reason CompareQuick() does not work fine w/ LastIndexOf().
1710                         Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null);
1711                         return LastIndexOf (s, target, start, length,
1712                                 targetSortKey, ref ctx);
1713                 }
1714
1715                 int LastIndexOfOrdinal (string s, string target, int start, int length)
1716                 {
1717                         if (target.Length == 0)
1718                                 return start;
1719                         if (s.Length < target.Length || target.Length > length)
1720                                 return -1;
1721                         int end = start - length + target.Length -1;
1722                         char tail = target [target.Length - 1];
1723                         for (int i = start; i > end;) {
1724                                 if (s [i] != tail) {
1725                                         i--;
1726                                         continue;
1727                                 }
1728                                 int x = i - target.Length + 1;
1729                                 i--;
1730                                 bool mismatch = false;
1731                                 for (int j = target.Length - 2; j >= 0; j--)
1732                                         if (s [x + j] != target [j]) {
1733                                                 mismatch = true;
1734                                                 break;
1735                                         }
1736                                 if (mismatch)
1737                                         continue;
1738                                 return x;
1739                         }
1740                         return -1;
1741                 }
1742
1743                 int LastIndexOfOrdinalIgnoreCase (string s, string target, int start, int length)
1744                 {
1745                         if (target.Length == 0)
1746                                 return start;
1747                         if (s.Length < length || target.Length > length)
1748                                 return -1;
1749                         int end = start - length + target.Length - 1;
1750                         char tail = textInfo.ToLower (target [target.Length - 1]);
1751                         for (int i = start; i > end;) {
1752                                 if (textInfo.ToLower (s [i]) != tail) {
1753                                         i--;
1754                                         continue;
1755                                 }
1756                                 int x = i - target.Length + 1;
1757                                 i--;
1758                                 bool mismatch = false;
1759                                 for (int j = target.Length - 2; j >= 0; j--)
1760                                         if (textInfo.ToLower (s [x + j]) != textInfo.ToLower (target [j])) {
1761                                                 mismatch = true;
1762                                                 break;
1763                                         }
1764                                 if (mismatch)
1765                                         continue;
1766                                 return x;
1767                         }
1768                         return -1;
1769                 }
1770
1771                 // char
1772
1773                 public int LastIndexOf (string s, char target, CompareOptions opt)
1774                 {
1775                         return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1776                 }
1777
1778                 public unsafe int LastIndexOf (string s, char target, int start, int length, CompareOptions opt)
1779                 {
1780                         if (opt == CompareOptions.Ordinal)
1781                                 return LastIndexOfOrdinal (s, target, start, length);
1782                         if (opt == CompareOptions.OrdinalIgnoreCase)
1783                                 return LastIndexOfOrdinalIgnoreCase (s, target, start, length);
1784                         byte* alwaysMatchFlags = stackalloc byte [16];
1785                         byte* neverMatchFlags = stackalloc byte [16];
1786                         byte* targetSortKey = stackalloc byte [4];
1787                         byte* sk1 = stackalloc byte [4];
1788                         byte* sk2 = stackalloc byte [4];
1789                         ClearBuffer (alwaysMatchFlags, 16);
1790                         ClearBuffer (neverMatchFlags, 16);
1791                         ClearBuffer (targetSortKey, 4);
1792                         ClearBuffer (sk1, 4);
1793                         ClearBuffer (sk2, 4);
1794                         Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null);
1795
1796                         // If target is a replacement contraction, then use 
1797                         // string search.
1798                         Contraction ct = GetContraction (target);
1799                         if (ct != null) {
1800                                 if (ct.Replacement != null)
1801                                         return LastIndexOf (s,
1802                                                 ct.Replacement, start, length,
1803                                                 targetSortKey, ref ctx);
1804                                 else {
1805                                         for (int bi = 0; bi < ct.SortKey.Length; bi++)
1806                                                 sk2 [bi] = ct.SortKey [bi];
1807                                         return LastIndexOfSortKey (s, start,
1808                                                 start, length, sk2,
1809                                                 -1, true, ref ctx);
1810                                 }
1811                         }
1812                         else {
1813                                 int ti = FilterOptions ((int) target, opt);
1814                                 targetSortKey [0] = Category (ti);
1815                                 targetSortKey [1] = Level1 (ti);
1816                                 if ((opt & COpt.IgnoreNonSpace) == 0)
1817                                         targetSortKey [2] = Level2 (ti, ExtenderType.None);
1818                                 targetSortKey [3] = Uni.Level3 (ti);
1819                                 return LastIndexOfSortKey (s, start, start,
1820                                         length, targetSortKey,
1821                                         ti, !Uni.HasSpecialWeight ((char) ti),
1822                                         ref ctx);
1823                         }
1824                 }
1825
1826                 int LastIndexOfOrdinal (string s, char target, int start, int length)
1827                 {
1828                         if (s.Length == 0)
1829                                 return -1;
1830                         int end = start - length;
1831                         for (int i = start; i > end; i--)
1832                                 if (s [i] == target)
1833                                         return i;
1834                         return -1;
1835                 }
1836
1837                 int LastIndexOfOrdinalIgnoreCase (string s, char target, int start, int length)
1838                 {
1839                         if (s.Length == 0)
1840                                 return -1;
1841                         int end = start - length;
1842                         char c = textInfo.ToUpper (target);
1843                         for (int i = start; i > end; i--)
1844                                 if (textInfo.ToUpper (s [i]) == c)
1845                                         return i;
1846                         return -1;
1847                 }
1848
1849                 // Searches target byte[] keydata
1850                 unsafe int LastIndexOfSortKey (string s, int start, int orgStart, int length, byte* sortkey, int ti, bool noLv4, ref Context ctx)
1851                 {
1852                         int end = start - length;
1853                         int idx = start;
1854                         while (idx > end) {
1855                                 int cur = idx;
1856                                 if (MatchesBackward (s, ref idx, end, orgStart,
1857                                         ti, sortkey, noLv4, ref ctx))
1858                                         return cur;
1859                         }
1860                         return -1;
1861                 }
1862
1863                 // Searches string. Search head character (or keydata when
1864                 // the head is contraction sortkey) and try IsPrefix().
1865                 unsafe int LastIndexOf (string s, string target, int start, int length, byte* targetSortKey, ref Context ctx)
1866                 {
1867                         COpt opt = ctx.Option;
1868                         int orgStart = start;
1869                         int tidx = 0;
1870                         for (; tidx < target.Length; tidx++)
1871                                 if (!IsIgnorable (target [tidx], opt))
1872                                         break;
1873                         if (tidx == target.Length)
1874                                 // FIXME: this is likely a hack. A string that is consists of \0 differs from those of other ignorable characters.
1875                                 return IndexOfOrdinal (target, '\0', 0, target.Length) >= 0 ? LastIndexOfOrdinal (s, target, start, length) : start;
1876                         Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1877                         string replace = ct != null ? ct.Replacement : null;
1878                         byte* sk = replace == null ? targetSortKey : null;
1879
1880                         bool noLv4 = true;
1881                         int ti = -1;
1882                         if (ct != null && sk != null) {
1883                                 for (int i = 0; i < ct.SortKey.Length; i++)
1884                                         sk [i] = ct.SortKey [i];
1885                         } else if (sk != null) {
1886                                 ti = FilterOptions (target [tidx], opt);
1887                                 sk [0] = Category (ti);
1888                                 sk [1] = Level1 (ti);
1889                                 if ((opt & COpt.IgnoreNonSpace) == 0)
1890                                         sk [2] = Level2 (ti, ExtenderType.None);
1891                                 sk [3] = Uni.Level3 (ti);
1892                                 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1893                         }
1894                         if (sk != null) {
1895                                 for (tidx++; tidx < target.Length; tidx++) {
1896                                         if (Category (target [tidx]) != 1)
1897                                                 break;
1898                                         if (sk [2] == 0)
1899                                                 sk [2] = 2;
1900                                         sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1901                                 }
1902                         }
1903
1904                         do {
1905                                 int idx = 0;
1906
1907                                 if (replace != null)
1908                                         idx = LastIndexOf (s, replace,
1909                                                 start, length,
1910                                                 targetSortKey, ref ctx);
1911                                 else
1912                                         idx = LastIndexOfSortKey (s, start, orgStart, length, sk, ti, noLv4, ref ctx);
1913                                 if (idx < 0)
1914                                         return -1;
1915                                 length -= start - idx;
1916                                 start = idx;
1917
1918                                 if (IsPrefix (s, target, idx, orgStart - idx + 1, false, ref ctx)) {
1919                                         for (;idx < orgStart; idx++)
1920                                                 if (!IsIgnorable (s [idx], opt))
1921                                                         break;
1922                                         return idx;
1923                                 }
1924                                 Contraction cts = GetContraction (s, idx, orgStart - idx + 1);
1925                                 if (cts != null) {
1926                                         start -= cts.Source.Length;
1927                                         length -= cts.Source.Length;
1928                                 } else {
1929                                         start--;
1930                                         length--;
1931                                 }
1932                         } while (length > 0);
1933                         return -1;
1934                 }
1935
1936                 unsafe bool MatchesForward (string s, ref int idx, int end, int ti, byte* sortkey, bool noLv4, ref Context ctx)
1937                 {
1938                         int si = s [idx];
1939                         if (ctx.AlwaysMatchFlags != null && si < 128 && (ctx.AlwaysMatchFlags [si / 8] & (1 << (si % 8))) != 0)
1940                                 return true;
1941                         if (ctx.NeverMatchFlags != null &&
1942                                         si < 128 &&
1943                                         (ctx.NeverMatchFlags [si / 8] & (1 << (si % 8))) != 0) {
1944                                 idx++;
1945                                 return false;
1946                         }
1947                         ExtenderType ext = GetExtenderType (s [idx]);
1948                         Contraction ct = null;
1949                         if (MatchesForwardCore (s, ref idx, end, ti, sortkey, noLv4, ext, ref ct, ref ctx)) {
1950                                 if (ctx.AlwaysMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
1951                                         ctx.AlwaysMatchFlags [si / 8] |= (byte) (1 << (si % 8));
1952                                 return true;
1953                         }
1954                         if (ctx.NeverMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
1955                                 ctx.NeverMatchFlags [si / 8] |= (byte) (1 << (si % 8));
1956                         return false;
1957                 }
1958
1959                 unsafe bool MatchesForwardCore (string s, ref int idx, int end, int ti, byte* sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, ref Context ctx)
1960                 {
1961                         COpt opt = ctx.Option;
1962                         byte* charSortKey = ctx.Buffer1;
1963                         bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
1964                         int si = -1;
1965                         if (ext == ExtenderType.None)
1966                                 ct = GetContraction (s, idx, end);
1967                         else if (ctx.PrevCode < 0) {
1968                                 if (ctx.PrevSortKey == null) {
1969                                         idx++;
1970                                         return false;
1971                                 }
1972                                 charSortKey = ctx.PrevSortKey;
1973                         }
1974                         else
1975                                 si = FilterExtender (ctx.PrevCode, ext, opt);
1976                         // if lv4 exists, it never matches contraction
1977                         if (ct != null) {
1978                                 idx += ct.Source.Length;
1979                                 if (!noLv4)
1980                                         return false;
1981                                 if (ct.SortKey != null) {
1982                                         for (int i = 0; i < 4; i++)
1983                                                 charSortKey [i] = sortkey [i];
1984                                         ctx.PrevCode = -1;
1985                                         ctx.PrevSortKey = charSortKey;
1986                                 } else {
1987                                         // Here is the core of LAMESPEC
1988                                         // described at the top of the source.
1989                                         int dummy = 0;
1990                                         return MatchesForward (ct.Replacement, ref dummy,
1991                                                 ct.Replacement.Length, ti, sortkey, noLv4, ref ctx);
1992                                 }
1993                         } else {
1994                                 if (si < 0)
1995                                         si = FilterOptions (s [idx], opt);
1996                                 idx++;
1997                                 charSortKey [0] = Category (si);
1998                                 bool noMatch = false;
1999                                 if (sortkey [0] == charSortKey [0])
2000                                         charSortKey [1] = Level1 (si);
2001                                 else
2002                                         noMatch = true;
2003                                 if (!ignoreNonSpace && sortkey [1] == charSortKey [1])
2004                                         charSortKey [2] = Level2 (si, ext);
2005                                 else if (!ignoreNonSpace)
2006                                         noMatch = true;
2007                                 if (noMatch) {
2008                                         for (; idx < end; idx++) {
2009                                                 if (Category (s [idx]) != 1)
2010                                                         break;
2011                                         }
2012                                         return false;
2013                                 }
2014                                 charSortKey [3] = Uni.Level3 (si);
2015                                 if (charSortKey [0] != 1)
2016                                         ctx.PrevCode = si;
2017                         }
2018                         for (; idx < end; idx++) {
2019                                 if (Category (s [idx]) != 1)
2020                                         break;
2021                                 if (ignoreNonSpace)
2022                                         continue;
2023                                 if (charSortKey [2] == 0)
2024                                                 charSortKey [2] = 2;
2025                                         charSortKey [2] = (byte) (charSortKey [2]
2026                                                 + Level2 (s [idx], ExtenderType.None));
2027                         }
2028
2029                         return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4);
2030                 }
2031
2032                 unsafe bool MatchesPrimitive (COpt opt, byte* source, int si, ExtenderType ext, byte* target, int ti, bool noLv4)
2033                 {
2034                         bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
2035                         if (source [0] != target [0] ||
2036                                 source [1] != target [1] ||
2037                                 (!ignoreNonSpace && source [2] != target [2]) ||
2038                                 source [3] != target [3])
2039                                 return false;
2040                         if (noLv4 && (si < 0 || !Uni.HasSpecialWeight ((char) si)))
2041                                 return true;
2042                         else if (noLv4)
2043                                 return false;
2044                         // Since target can never be an extender, if the source
2045                         // is an expander and it matters, then they never match.
2046                         if (!ignoreNonSpace && ext == ExtenderType.Conditional)
2047                                 return false;
2048                         if (Uni.IsJapaneseSmallLetter ((char) si) !=
2049                                 Uni.IsJapaneseSmallLetter ((char) ti) ||
2050                                 ToDashTypeValue (ext, opt) !=
2051                                 // FIXME: we will have to specify correct value for target
2052                                 ToDashTypeValue (ExtenderType.None, opt) ||
2053                                 !Uni.IsHiragana ((char) si) !=
2054                                 !Uni.IsHiragana ((char) ti) ||
2055                                 IsHalfKana ((char) si, opt) !=
2056                                 IsHalfKana ((char) ti, opt))
2057                                 return false;
2058                         return true;
2059                 }
2060
2061                 unsafe bool MatchesBackward (string s, ref int idx, int end, int orgStart, int ti, byte* sortkey, bool noLv4, ref Context ctx)
2062                 {
2063                         int si = s [idx];
2064                         if (ctx.AlwaysMatchFlags != null && si < 128 && (ctx.AlwaysMatchFlags [si / 8] & (1 << (si % 8))) != 0)
2065                                 return true;
2066                         if (ctx.NeverMatchFlags != null && si < 128 && (ctx.NeverMatchFlags [si / 8] & (1 << (si % 8))) != 0) {
2067                                 idx--;
2068                                 return false;
2069                         }
2070                         ExtenderType ext = GetExtenderType (s [idx]);
2071                         Contraction ct = null;
2072                         if (MatchesBackwardCore (s, ref idx, end, orgStart, ti, sortkey, noLv4, ext, ref ct, ref ctx)) {
2073                                 if (ctx.AlwaysMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
2074                                         ctx.AlwaysMatchFlags [si / 8] |= (byte) (1 << (si % 8));
2075                                 return true;
2076                         }
2077                         if (ctx.NeverMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128) {
2078                                 ctx.NeverMatchFlags [si / 8] |= (byte) (1 << (si % 8));
2079                         }
2080                         return false;
2081                 }
2082
2083                 unsafe bool MatchesBackwardCore (string s, ref int idx, int end, int orgStart, int ti, byte* sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, ref Context ctx)
2084                 {
2085                         COpt opt = ctx.Option;
2086                         byte* charSortKey = ctx.Buffer1;
2087                         bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
2088                         int cur = idx;
2089                         int si = -1;
2090                         // To handle extenders in source, we need to
2091                         // check next _primary_ character.
2092                         if (ext != ExtenderType.None) {
2093                                 byte diacritical = 0;
2094                                 for (int tmp = idx; ; tmp--) {
2095                                         if (tmp < 0) // heading extender
2096                                                 return false;
2097                                         if (IsIgnorable (s [tmp], opt))
2098                                                 continue;
2099                                         int tmpi = FilterOptions (s [tmp], opt);
2100                                         byte category = Category (tmpi);
2101                                         if (category == 1) {
2102                                                 diacritical = Level2 (tmpi, ExtenderType.None);
2103                                                 continue;
2104                                         }
2105                                         si = FilterExtender (tmpi, ext, opt);
2106                                         charSortKey [0] = category;
2107                                         charSortKey [1] = Level1 (si);
2108                                         if (!ignoreNonSpace)
2109                                                 charSortKey [2] = Level2 (si, ext);
2110                                         charSortKey [3] = Uni.Level3 (si);
2111                                         if (ext != ExtenderType.Conditional &&
2112                                                 diacritical != 0)
2113                                                 charSortKey [2] =
2114                                                         (charSortKey [2] == 0) ?
2115                                                         (byte) (diacritical + 2) :
2116                                                         diacritical;
2117                                         break;
2118                                 }
2119                                 idx--;
2120                         }
2121                         if (ext == ExtenderType.None)
2122                                 ct = GetTailContraction (s, idx, end);
2123                         // if lv4 exists, it never matches contraction
2124                         if (ct != null) {
2125                                 idx -= ct.Source.Length;
2126                                 if (!noLv4)
2127                                         return false;
2128                                 if (ct.SortKey != null) {
2129                                         for (int i = 0; i < 4; i++)
2130                                                 charSortKey [i] = sortkey [i];
2131                                         ctx.PrevCode = -1;
2132                                         ctx.PrevSortKey = charSortKey;
2133                                 } else {
2134                                         // Here is the core of LAMESPEC
2135                                         // described at the top of the source.
2136                                         int dummy = ct.Replacement.Length - 1;
2137                                         return 0 <= LastIndexOfSortKey (
2138                                                 ct.Replacement, dummy, dummy,
2139                                                 ct.Replacement.Length, sortkey,
2140                                                 ti, noLv4, ref ctx);
2141                                 }
2142                         } else if (ext == ExtenderType.None) {
2143                                 if (si < 0)
2144                                         si = FilterOptions (s [idx], opt);
2145                                 idx--;
2146                                 bool noMatch = false;
2147                                 charSortKey [0] = Category (si);
2148                                 if (charSortKey [0] == sortkey [0])
2149                                         charSortKey [1] = Level1 (si);
2150                                 else
2151                                         noMatch = true;
2152                                 if (!ignoreNonSpace && charSortKey [1] == sortkey [1])
2153                                         charSortKey [2] = Level2 (si, ext);
2154                                 else if (!ignoreNonSpace)
2155                                         noMatch = true;
2156                                 if (noMatch)
2157                                         return false;
2158                                 charSortKey [3] = Uni.Level3 (si);
2159                                 if (charSortKey [0] != 1)
2160                                         ctx.PrevCode = si;
2161                         }
2162                         if (ext == ExtenderType.None) {
2163                                 for (int tmp = cur + 1; tmp < orgStart; tmp++) {
2164                                         if (Category (s [tmp]) != 1)
2165                                                 break;
2166                                         if (ignoreNonSpace)
2167                                                 continue;
2168                                         if (charSortKey [2] == 0)
2169                                                         charSortKey [2] = 2;
2170                                         charSortKey [2] = (byte) (charSortKey [2]
2171                                                 + Level2 (s [tmp], ExtenderType.None));
2172                                 }
2173                         }
2174                         return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4);
2175                 }
2176                 #endregion
2177         }
2178 }