9a7306598502a7ae228c30d1175fd69d93892fcd
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / SimpleCollator.cs
1 //
2 // SimpleCollator.cs : the core collator implementation
3 //
4 // Author:
5 //      Atsushi Enomoto  <atsushi@ximian.com>
6 //
7 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
8 //
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
16 // 
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
19 // 
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 //
28
29 //
30 // Here's a summary for supporting contractions, expansions and diacritical 
31 // remappings.
32 //
33 // Diacritical mapping is a simple tailoring rule that "remaps" diacritical
34 // weight value from one to another. For now it applies to all range of
35 // characters, but at some stage we might need to limit the remapping ranges.
36 //
37 // A Contraction consists of a string (actually char[]) and a sortkey entry
38 // (i.e. byte[]). It indicates that a sequence of characters is interpreted
39 // as a single character that has the mapped sortkey value. There is no
40 // character which goes across "special rules". When the collator encountered
41 // such a sequence of characters, it just returns the sortkey value without
42 // further replacement.
43 //
44 // Since it is still likely to happen that a contraction sequence matches
45 // other character than the identical sequence (depending on CompareOptions
46 // and of course, defined sortkey value itself), comparison cannot be done
47 // at source char[] level.
48 //
49 // In IndexOf(), the first character in the target string or the target char
50 // itself is turned into sortkey bytes. If the character has a contraction and
51 // that is sortkey map, then it is used instead. If the contraction exists and
52 // that is replacement map, then the first character of the replacement string
53 // is searched instead. IndexOf() always searches only for the top character,
54 // and if it returned negative value, then it returns -1. Otherwise, it then
55 // tries IsPrefix() from that location. If it returns true, then it returns
56 // the index.
57 //
58 // LAMESPEC: IndexOf() is lame as a whole API. It never matches in the middle
59 // of expansion and there is no proper way to return such indexes within
60 // a single int return value.
61 //
62 // For example, try below in .NET:
63 //      IndexOf("\u00E6", "a")
64 //      IndexOf("\u00E6", "e")
65 //
66
67
68 using System;
69 using System.Collections;
70 using System.Globalization;
71
72 using Uni = Mono.Globalization.Unicode.MSCompatUnicodeTable;
73 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
74 using COpt = System.Globalization.CompareOptions;
75
76 namespace Mono.Globalization.Unicode
77 {
78         internal class SimpleCollator
79         {
80                 // this environment variable is for debugging quick check.
81                 static bool QuickCheckDisabled =
82                         Environment.GetEnvironmentVariable (
83                         "MONO_COLLATION_QUICK_CHECK_DISABLED") == "yes";
84
85                 unsafe internal struct Context
86                 {
87                         public Context (CompareOptions opt, byte* alwaysMatchFlags, byte* neverMatchFlags, byte* buffer1, byte* buffer2, byte* prev1, bool quickCheckPossible)
88                         {
89                                 Option = opt;
90                                 AlwaysMatchFlags = alwaysMatchFlags;
91                                 NeverMatchFlags = neverMatchFlags;
92                                 Buffer1 = buffer1;
93                                 Buffer2 = buffer2;
94                                 PrevSortKey = prev1;
95                                 PrevCode = -1;
96                                 QuickCheckPossible = quickCheckPossible;
97                         }
98
99                         public readonly CompareOptions Option;
100                         public readonly byte* NeverMatchFlags;
101                         public readonly byte* AlwaysMatchFlags;
102                         public byte* Buffer1;
103                         public byte* Buffer2;
104                         public int PrevCode;
105                         public byte* PrevSortKey;
106                         public readonly bool QuickCheckPossible;
107
108                         public void ClearPrevInfo ()
109                         {
110                                 PrevCode = -1;
111                                 PrevSortKey = null;
112                         }
113                 }
114
115                 unsafe struct PreviousInfo
116                 {
117                         public int Code;
118                         public byte* SortKey;
119
120                         public PreviousInfo (bool dummy)
121                         {
122                                 Code = -1;
123                                 SortKey = null;
124                         }
125                 }
126
127                 struct Escape
128                 {
129                         public string Source;
130                         public int Index;
131                         public int Start;
132                         public int End;
133                         public int Optional;
134                 }
135
136                 static SimpleCollator invariant =
137                         new SimpleCollator (CultureInfo.InvariantCulture);
138
139                 readonly TextInfo textInfo; // for ToLower().
140                 readonly bool frenchSort;
141                 unsafe readonly byte* cjkCatTable;
142                 unsafe readonly byte* cjkLv1Table;
143                 readonly CodePointIndexer cjkIndexer;
144                 unsafe readonly byte* cjkLv2Table;
145                 readonly CodePointIndexer cjkLv2Indexer;
146                 readonly int lcid;
147                 readonly Contraction [] contractions;
148                 readonly Level2Map [] level2Maps;
149
150                 // This flag marks characters as "unsafe", where the character
151                 // could be used as part of a contraction (whose length > 1).
152                 readonly byte [] unsafeFlags;
153
154                 const int UnsafeFlagLength = 0x300 / 8;
155
156 //              readonly byte [] contractionFlags = new byte [16];
157
158                 // This array is internally used inside IndexOf() to store
159                 // "no need to check" ASCII characters.
160                 //
161                 // Now that it should be thread safe, this array is allocated
162                 // at every time.
163 //              byte [] neverMatchFlags = new byte [128 / 8];
164
165                 #region .ctor() and split functions
166
167                 public SimpleCollator (CultureInfo culture)
168                 {
169                         lcid = culture.LCID;
170                         textInfo = culture.TextInfo;
171
172                         unsafe {
173                                 SetCJKTable (culture, ref cjkIndexer,
174                                         ref cjkCatTable, ref cjkLv1Table,
175                                         ref cjkLv2Indexer, ref cjkLv2Table);
176                         }
177
178                         // Get tailoring info
179                         TailoringInfo t = null;
180                         for (CultureInfo ci = culture; ci.LCID != 127; ci = ci.Parent) {
181                                 t = Uni.GetTailoringInfo (ci.LCID);
182                                 if (t != null)
183                                         break;
184                         }
185                         if (t == null) // then use invariant
186                                 t = Uni.GetTailoringInfo (127);
187
188                         frenchSort = t.FrenchSort;
189                         Uni.BuildTailoringTables (culture, t, ref contractions,
190                                 ref level2Maps);
191                         unsafeFlags = new byte [UnsafeFlagLength];
192                         foreach (Contraction c in contractions) {
193                                 if (c.Source.Length > 1)
194                                         foreach (char ch in c.Source)
195                                                 unsafeFlags [(int) ch / 8 ]
196                                                         |= (byte) (1 << ((int) ch & 7));
197 //                              for (int i = 0; i < c.Source.Length; i++) {
198 //                                      int ch = c.Source [i];
199 //                                      if (ch > 127)
200 //                                              continue;
201 //                                      contractionFlags [ch / 8] |= (byte) (1 << (ch & 7));
202 //                              }
203                         }
204                         if (lcid != 127)
205                                 foreach (Contraction c in invariant.contractions) {
206                                         if (c.Source.Length > 1)
207                                                 foreach (char ch in c.Source)
208                                                         unsafeFlags [(int) ch / 8 ] 
209                                                                 |= (byte) (1 << ((int) ch & 7));
210 //                                      for (int i = 0; i < c.Source.Length; i++) {
211 //                                              int ch = c.Source [i];
212 //                                              if (ch > 127)
213 //                                                      continue;
214 //                                              contractionFlags [ch / 8] |= (byte) (1 << (ch & 7));
215 //                                      }
216                                 }
217
218                         // FIXME: Since tailorings are mostly for latin
219                         // (and in some cases Cyrillic) characters, it would
220                         // be much better for performance to store "start 
221                         // indexes" for > 370 (culture-specific letters).
222
223 /*
224 // dump tailoring table
225 Console.WriteLine ("******** building table for {0} : contractions - {1} diacritical - {2}",
226 culture.LCID, contractions.Length, level2Maps.Length);
227 foreach (Contraction c in contractions) {
228 foreach (char cc in c.Source)
229 Console.Write ("{0:X4} ", (int) cc);
230 Console.WriteLine (" -> '{0}'", c.Replacement);
231 }
232 */
233                 }
234
235                 unsafe private void SetCJKTable (
236                         CultureInfo culture, ref CodePointIndexer cjkIndexer,
237                         ref byte* catTable, ref byte* lv1Table,
238                         ref CodePointIndexer lv2Indexer, ref byte* lv2Table)
239                 {
240                         string name = GetNeutralCulture (culture).Name;
241
242                         Uni.FillCJK (name, ref cjkIndexer, ref catTable,
243                                 ref lv1Table, ref lv2Indexer, ref lv2Table);
244                 }
245
246                 static CultureInfo GetNeutralCulture (CultureInfo info)
247                 {
248                         CultureInfo ret = info;
249                         while (ret.Parent != null && ret.Parent.LCID != 127)
250                                 ret = ret.Parent;
251                         return ret;
252                 }
253
254                 #endregion
255
256                 unsafe byte Category (int cp)
257                 {
258                         if (cp < 0x3000 || cjkCatTable == null)
259                                 return Uni.Category (cp);
260                         int idx = cjkIndexer.ToIndex (cp);
261                         return idx < 0 ? Uni.Category (cp) : cjkCatTable [idx];
262                 }
263
264                 unsafe byte Level1 (int cp)
265                 {
266                         if (cp < 0x3000 || cjkLv1Table == null)
267                                 return Uni.Level1 (cp);
268                         int idx = cjkIndexer.ToIndex (cp);
269                         return idx < 0 ? Uni.Level1 (cp) : cjkLv1Table [idx];
270                 }
271
272                 unsafe byte Level2 (int cp, ExtenderType ext)
273                 {
274                         if (ext == ExtenderType.Buggy)
275                                 return 5;
276                         else if (ext == ExtenderType.Conditional)
277                                 return 0;
278
279                         if (cp < 0x3000 || cjkLv2Table == null)
280                                 return Uni.Level2 (cp);
281                         int idx = cjkLv2Indexer.ToIndex (cp);
282                         byte ret = idx < 0 ? (byte) 0 : cjkLv2Table [idx];
283                         if (ret != 0)
284                                 return ret;
285                         ret = Uni.Level2 (cp);
286                         if (level2Maps.Length == 0)
287                                 return ret;
288                         for (int i = 0; i < level2Maps.Length; i++) {
289                                 if (level2Maps [i].Source == ret)
290                                         return level2Maps [i].Replace;
291                                 else if (level2Maps [i].Source > ret)
292                                         break;
293                         }
294                         return ret;
295                 }
296
297                 static bool IsHalfKana (int cp, COpt opt)
298                 {
299                         return (opt & COpt.IgnoreWidth) != 0 ||
300                                 Uni.IsHalfWidthKana ((char) cp);
301                 }
302
303                 Contraction GetContraction (string s, int start, int end)
304                 {
305 //                      int si = s [start];
306 //                      if (si < 128 && (contractionFlags [si / 8] & (1 << (si & 7))) == 0)
307 //                              return null;
308                         Contraction c = GetContraction (s, start, end, contractions);
309                         if (c != null || lcid == 127)
310                                 return c;
311                         return GetContraction (s, start, end, invariant.contractions);
312                 }
313
314                 Contraction GetContraction (string s, int start, int end, Contraction [] clist)
315                 {
316                         for (int i = 0; i < clist.Length; i++) {
317                                 Contraction ct = clist [i];
318                                 int diff = ct.Source [0] - s [start];
319                                 if (diff > 0)
320                                         return null; // it's already sorted
321                                 else if (diff < 0)
322                                         continue;
323                                 char [] chars = ct.Source;
324                                 if (end - start < chars.Length)
325                                         continue;
326                                 bool match = true;
327                                 for (int n = 0; n < chars.Length; n++)
328                                         if (s [start + n] != chars [n]) {
329                                                 match = false;
330                                                 break;
331                                         }
332                                 if (match)
333                                         return ct;
334                         }
335                         return null;
336                 }
337
338                 Contraction GetTailContraction (string s, int start, int end)
339                 {
340 //                      int si = s [end - 1];
341 //                      if (si < 128 && (contractionFlags [si / 8] & (1 << (si & 7))) == 0)
342 //                              return null;
343                         Contraction c = GetTailContraction (s, start, end, contractions);
344                         if (c != null || lcid == 127)
345                                 return c;
346                         return GetTailContraction (s, start, end, invariant.contractions);
347                 }
348
349                 Contraction GetTailContraction (string s, int start, int end, Contraction [] clist)
350                 {
351                         if (start == end || end < -1 || start >= s.Length || s.Length <= end + 1)
352                                 throw new SystemException (String.Format ("MONO internal error. Failed to get TailContraction. start = {0} end = {1} string = '{2}'", start, end, s));
353                         for (int i = 0; i < clist.Length; i++) {
354                                 Contraction ct = clist [i];
355                                 int diff = ct.Source [0] - s [end + 1];
356                                 if (diff > 0)
357                                         return null; // it's already sorted
358                                 else if (diff < 0)
359                                         continue;
360                                 char [] chars = ct.Source;
361
362                                 bool match = true;
363                                 if (chars.Length > start - end)
364                                         continue;
365                                 for (int n = 0; n < chars.Length; n++)
366                                         if (s [start - n] != chars [chars.Length - 1 - n]) {
367                                                 match = false;
368                                                 break;
369                                         }
370                                 if (match)
371                                         return ct;
372                         }
373                         return null;
374                 }
375
376                 Contraction GetContraction (char c)
377                 {
378                         Contraction ct = GetContraction (c, contractions);
379                         if (ct != null || lcid == 127)
380                                 return ct;
381                         return GetContraction (c, invariant.contractions);
382                 }
383
384                 Contraction GetContraction (char c, Contraction [] clist)
385                 {
386                         for (int i = 0; i < clist.Length; i++) {
387                                 Contraction ct = clist [i];
388                                 if (ct.Source [0] > c)
389                                         return null; // it's already sorted
390                                 if (ct.Source [0] == c && ct.Source.Length == 1)
391                                         return ct;
392                         }
393                         return null;
394                 }
395
396                 int FilterOptions (int i, COpt opt)
397                 {
398                         if ((opt & COpt.IgnoreWidth) != 0) {
399                                 int x = Uni.ToWidthCompat (i);
400                                 if (x != 0)
401                                         i = x;
402                         }
403                         if ((opt & COpt.IgnoreCase) != 0)
404                                 i = textInfo.ToLower ((char) i);
405                         if ((opt & COpt.IgnoreKanaType) != 0)
406                                 i = Uni.ToKanaTypeInsensitive (i);
407                         return i;
408                 }
409
410                 enum ExtenderType {
411                         None,
412                         Simple,
413                         Voiced,
414                         Conditional,
415                         Buggy,
416                 }
417
418                 ExtenderType GetExtenderType (int i)
419                 {
420                         // LAMESPEC: Windows expects true for U+3005, but 
421                         // sometimes it does not represent to repeat just
422                         // one character.
423                         // Windows also expects true for U+3031 and U+3032,
424                         // but they should *never* repeat one character.
425
426                         // U+2015 becomes an extender only when it is Japanese
427                         if (i == 0x2015)
428                                 return lcid == 16 ? ExtenderType.Conditional :
429                                         ExtenderType.None;
430
431                         if (i < 0x3005 || i > 0xFF70)
432                                 return ExtenderType.None;
433                         if (i >= 0xFE7C) {
434                                 switch (i) {
435                                 case 0xFE7C:
436                                 case 0xFE7D:
437                                         return ExtenderType.Simple;
438                                 case 0xFF70:
439                                         return ExtenderType.Conditional;
440                                 case 0xFF9E:
441                                 case 0xFF9F:
442                                         return ExtenderType.Voiced;
443                                 }
444                         }
445                         if (i > 0x30FE)
446                                 return ExtenderType.None;
447                         switch (i) {
448                         case 0x3005: // LAMESPEC: see above.
449                                 return ExtenderType.Buggy;
450                         case 0x3031: // LAMESPEC: see above.
451                         case 0x3032: // LAMESPEC: see above.
452                         case 0x309D:
453                         case 0x30FD:
454                                 return ExtenderType.Simple;
455                         case 0x309E:
456                         case 0x30FE:
457                                 return ExtenderType.Voiced;
458                         case 0x30FC:
459                                 return ExtenderType.Conditional;
460                         default:
461                                 return ExtenderType.None;
462                         }
463                 }
464
465                 static byte ToDashTypeValue (ExtenderType ext, COpt opt)
466                 {
467                         if ((opt & COpt.IgnoreNonSpace) != 0) // LAMESPEC: huh, why?
468                                 return 3;
469                         switch (ext) {
470                         case ExtenderType.None:
471                                 return 3;
472                         case ExtenderType.Conditional:
473                                 return 5;
474                         default:
475                                 return 4;
476                         }
477                 }
478
479                 int FilterExtender (int i, ExtenderType ext, COpt opt)
480                 {
481                         if (ext == ExtenderType.Conditional &&
482                                 Uni.HasSpecialWeight ((char) i)) {
483                                 bool half = IsHalfKana ((char) i, opt);
484                                 bool katakana = !Uni.IsHiragana ((char) i);
485                                 switch (Level1 (i) & 7) {
486                                 case 2:
487                                         return half ? 0xFF71 : katakana ? 0x30A2 : 0x3042;
488                                 case 3:
489                                         return half ? 0xFF72 : katakana ? 0x30A4 : 0x3044;
490                                 case 4:
491                                         return half ? 0xFF73 : katakana ? 0x30A6 : 0x3046;
492                                 case 5:
493                                         return half ? 0xFF74 : katakana ? 0x30A8 : 0x3048;
494                                 case 6:
495                                         return half ? 0xFF75 : katakana ? 0x30AA : 0x304A;
496                                 }
497                         }
498                         return i;
499                 }
500
501                 static bool IsIgnorable (int i, COpt opt)
502                 {
503                         return Uni.IsIgnorable (i, (byte) (1 +
504                                 ((opt & COpt.IgnoreSymbols) != 0 ? 2 : 0) +
505                                 ((opt & COpt.IgnoreNonSpace) != 0 ? 4 : 0)));
506                         
507                 }
508
509                 bool IsSafe (int i)
510                 {
511                         return i / 8 >= unsafeFlags.Length ? true : (unsafeFlags [i / 8] & (1 << (i % 8))) == 0;
512                 }
513
514                 #region GetSortKey()
515
516                 public SortKey GetSortKey (string s)
517                 {
518                         return GetSortKey (s, CompareOptions.None);
519                 }
520
521                 public SortKey GetSortKey (string s, CompareOptions options)
522                 {
523                         return GetSortKey (s, 0, s.Length, options);
524                 }
525
526                 public SortKey GetSortKey (string s, int start, int length, CompareOptions options)
527                 {
528                         SortKeyBuffer buf = new SortKeyBuffer (lcid);
529                         buf.Initialize (options, lcid, s, frenchSort);
530                         int end = start + length;
531                         GetSortKey (s, start, end, buf, options);
532                         return buf.GetResultAndReset ();
533                 }
534
535                 unsafe void GetSortKey (string s, int start, int end,
536                         SortKeyBuffer buf, CompareOptions opt)
537                 {
538                         byte* prevbuf = stackalloc byte [4];
539                         ClearBuffer (prevbuf, 4);
540                         Context ctx = new Context (opt, null, null, null, null, prevbuf, false);
541
542                         for (int n = start; n < end; n++) {
543                                 int i = s [n];
544
545                                 ExtenderType ext = GetExtenderType (i);
546                                 if (ext != ExtenderType.None) {
547                                         i = FilterExtender (ctx.PrevCode, ext, opt);
548                                         if (i >= 0)
549                                                 FillSortKeyRaw (i, ext, buf, opt);
550                                         else if (ctx.PrevSortKey != null) {
551                                                 byte* b = ctx.PrevSortKey;
552                                                 buf.AppendNormal (
553                                                         b [0],
554                                                         b [1],
555                                                         b [2] != 1 ? b [2] : Level2 (i, ext),
556                                                         b [3] != 1 ? b [3] : Uni.Level3 (i));
557                                         }
558                                         // otherwise do nothing.
559                                         // (if the extender is the first char
560                                         // in the string, then just ignore.)
561                                         continue;
562                                 }
563
564                                 if (IsIgnorable (i, opt))
565                                         continue;
566                                 i = FilterOptions (i, opt);
567
568                                 Contraction ct = GetContraction (s, n, end);
569                                 if (ct != null) {
570                                         if (ct.Replacement != null) {
571                                                 GetSortKey (ct.Replacement, 0, ct.Replacement.Length, buf, opt);
572                                         } else {
573                                                 byte* b = ctx.PrevSortKey;
574                                                 for (int bi = 0; bi < ct.SortKey.Length; bi++)
575                                                         b [bi] = ct.SortKey [bi];
576                                                 buf.AppendNormal (
577                                                         b [0],
578                                                         b [1],
579                                                         b [2] != 1 ? b [2] : Level2 (i, ext),
580                                                         b [3] != 1 ? b [3] : Uni.Level3 (i));
581                                                 ctx.PrevCode = -1;
582                                         }
583                                         n += ct.Source.Length - 1;
584                                 }
585                                 else {
586                                         if (!Uni.IsIgnorableNonSpacing (i))
587                                                 ctx.PrevCode = i;
588                                         FillSortKeyRaw (i, ExtenderType.None, buf, opt);
589                                 }
590                         }
591                 }
592
593                 void FillSortKeyRaw (int i, ExtenderType ext,
594                         SortKeyBuffer buf, CompareOptions opt)
595                 {
596                         if (0x3400 <= i && i <= 0x4DB5) {
597                                 int diff = i - 0x3400;
598                                 buf.AppendCJKExtension (
599                                         (byte) (0x10 + diff / 254),
600                                         (byte) (diff % 254 + 2));
601                                 return;
602                         }
603
604                         UnicodeCategory uc = char.GetUnicodeCategory ((char) i);
605                         switch (uc) {
606                         case UnicodeCategory.PrivateUse:
607                                 int diff = i - 0xE000;
608                                 buf.AppendNormal (
609                                         (byte) (0xE5 + diff / 254),
610                                         (byte) (diff % 254 + 2),
611                                         0,
612                                         0);
613                                 return;
614                         case UnicodeCategory.Surrogate:
615                                 FillSurrogateSortKeyRaw (i, buf);
616                                 return;
617                         }
618
619                         byte level2 = Level2 (i, ext);
620                         if (Uni.HasSpecialWeight ((char) i)) {
621                                 byte level1 = Level1 (i);
622                                 buf.AppendKana (
623                                         Category (i),
624                                         level1,
625                                         level2,
626                                         Uni.Level3 (i),
627                                         Uni.IsJapaneseSmallLetter ((char) i),
628                                         ToDashTypeValue (ext, opt),
629                                         !Uni.IsHiragana ((char) i),
630                                         IsHalfKana ((char) i, opt)
631                                         );
632                                 if ((opt & COpt.IgnoreNonSpace) == 0 && ext == ExtenderType.Voiced)
633                                         // Append voice weight
634                                         buf.AppendNormal (1, 1, 1, 0);
635                         }
636                         else
637                                 buf.AppendNormal (
638                                         Category (i),
639                                         Level1 (i),
640                                         level2,
641                                         Uni.Level3 (i));
642                 }
643
644                 void FillSurrogateSortKeyRaw (int i, SortKeyBuffer buf)
645                 {
646                         int diffbase = 0;
647                         int segment = 0;
648                         byte lower = 0;
649
650                         if (i < 0xD840) {
651                                 diffbase = 0xD800;
652                                 segment = 0x41;
653                                 lower = (byte) ((i == 0xD800) ? 0x3E : 0x3F);
654                         } else if (0xD840 <= i && i < 0xD880) {
655                                 diffbase = 0xD840;
656                                 segment = 0xF2;
657                                 lower = 0x3E;
658                         } else if (0xDB80 <= i && i < 0xDC00) {
659                                 diffbase = 0xDB80 - 0x40;
660                                 segment = 0xFE;
661                                 lower = 0x3E;
662                         } else {
663                                 diffbase = 0xDC00 - 0xF8 + 2;
664                                 segment = 0x41;
665                                 lower = 0x3F;
666                         }
667                         int diff = i - diffbase;
668
669                         buf.AppendNormal (
670                                 (byte) (segment + diff / 254),
671                                 (byte) (diff % 254 + 2),
672                                 lower,
673                                 lower);
674                 }
675
676                 #endregion
677
678                 #region Compare()
679
680                 public int Compare (string s1, string s2)
681                 {
682                         return Compare (s1, s2, CompareOptions.None);
683                 }
684
685                 public int Compare (string s1, string s2, CompareOptions options)
686                 {
687                         return Compare (s1, 0, s1.Length, s2, 0, s2.Length, options);
688                 }
689
690                 private int CompareOrdinal (string s1, int idx1, int len1,
691                         string s2, int idx2, int len2)
692                 {
693                         int min = len1 < len2 ? len1 : len2;
694                         int end1 = idx1 + min;
695                         int end2 = idx2 + min;
696                         if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
697                                 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
698                         for (int i1 = idx1, i2 = idx2;
699                                 i1 < end1 && i2 < end2; i1++, i2++)
700                                 if (s1 [i1] != s2 [i2])
701                                         return s1 [i1] - s2 [i2];
702                         return len1 == len2 ? 0 :
703                                 len1 == min ? - 1 : 1;
704                 }
705
706                 // mostly equivalent to CompareOrdinal, but the return value is
707                 // not based on codepoints.
708                 private int CompareQuick (string s1, int idx1, int len1,
709                         string s2, int idx2, int len2, out bool sourceConsumed,
710                         out bool targetConsumed, bool immediateBreakup)
711                 {
712                         sourceConsumed = false;
713                         targetConsumed = false;
714                         int min = len1 < len2 ? len1 : len2;
715                         int end1 = idx1 + min;
716                         int end2 = idx2 + min;
717                         if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
718                                 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
719                         for (int i1 = idx1, i2 = idx2;
720                                 i1 < end1 && i2 < end2; i1++, i2++)
721                                 if (s1 [i1] != s2 [i2]) {
722                                         if (immediateBreakup)
723                                                 return -1;
724                                         int ret = Category (s1 [i1]) - Category (s2 [i2]);
725                                         if (ret == 0)
726                                                 ret = Level1 (s1 [i1]) - Level1 (s2 [i2]);
727                                         // no level2 and 4
728                                         if (ret == 0)
729                                                 ret = Uni.Level3 (s1 [i1]) - Uni.Level3 (s2 [i2]);
730                                         if (ret == 0)
731                                                 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. '{0}' {2} {3} '{1}' {4} {5}", s1, s2, idx1, end1, idx2, end2));
732                                         return ret;
733                                 }
734                         sourceConsumed = len1 <= len2;
735                         targetConsumed = len1 >= len2;
736                         return len1 == len2 ? 0 :
737                                 len1 == min ? - 1 : 1;
738                 }
739
740                 private int CompareOrdinalIgnoreCase (string s1, int idx1, int len1,
741                         string s2, int idx2, int len2)
742                 {
743                         int min = len1 < len2 ? len1 : len2;
744                         int end1 = idx1 + min;
745                         int end2 = idx2 + min;
746                         if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
747                                 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
748                         TextInfo ti = invariant.textInfo;
749                         for (int i1 = idx1, i2 = idx2;
750                                 i1 < end1 && i2 < end2; i1++, i2++)
751                                 if (ti.ToLower (s1 [i1]) != ti.ToLower (s2 [i2]))
752                                         return ti.ToLower (s1 [i1]) - ti.ToLower (s2 [i2]);
753                         return len1 == len2 ? 0 :
754                                 len1 == min ? - 1 : 1;
755                 }
756
757                 public unsafe int Compare (string s1, int idx1, int len1,
758                         string s2, int idx2, int len2, CompareOptions options)
759                 {
760                         // quick equality check
761                         if (idx1 == idx2 && len1 == len2 &&
762                                 Object.ReferenceEquals (s1, s2))
763                                 return 0;
764                         // FIXME: this should be done inside Compare() at
765                         // any time.
766 //                      int ord = CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
767 //                      if (ord == 0)
768 //                              return 0;
769                         if (options == CompareOptions.Ordinal)
770                                 return CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
771 #if NET_2_0
772                         if (options == CompareOptions.OrdinalIgnoreCase)
773                                 return CompareOrdinalIgnoreCase (s1, idx1, len1, s2, idx2, len2);
774 #endif
775
776 #if false // stable easy version, depends on GetSortKey().
777                         SortKey sk1 = GetSortKey (s1, idx1, len1, options);
778                         SortKey sk2 = GetSortKey (s2, idx2, len2, options);
779                         byte [] d1 = sk1.KeyData;
780                         byte [] d2 = sk2.KeyData;
781                         int len = d1.Length > d2.Length ? d2.Length : d1.Length;
782                         for (int i = 0; i < len; i++)
783                                 if (d1 [i] != d2 [i])
784                                         return d1 [i] < d2 [i] ? -1 : 1;
785                         return d1.Length == d2.Length ? 0 : d1.Length < d2.Length ? -1 : 1;
786 #else
787                         byte* sk1 = stackalloc byte [4];
788                         byte* sk2 = stackalloc byte [4];
789                         ClearBuffer (sk1, 4);
790                         ClearBuffer (sk2, 4);
791                         Context ctx = new Context (options, null, null, sk1, sk2, null,
792                                 QuickCheckPossible (s1, idx1, idx1 + len1, s2, idx2, idx2 + len2));
793
794                         bool dummy, dummy2;
795                         int ret = CompareInternal (s1, idx1, len1, s2, idx2, len2, out dummy, out dummy2, true, false, ref ctx);
796                         return ret == 0 ? 0 : ret < 0 ? -1 : 1;
797 #endif
798                 }
799
800                 unsafe void ClearBuffer (byte* buffer, int size)
801                 {
802                         for (int i = 0; i < size; i++)
803                                 buffer [i] = 0;
804                 }
805
806                 bool QuickCheckPossible (string s1, int idx1, int end1,
807                         string s2, int idx2, int end2)
808                 {
809                         if (QuickCheckDisabled)
810                                 return false;
811 //                      if (s1.Length > 100 || s2.Length > 100)
812 //                              return false;
813                         for (int i = idx1; i < end1; i++)
814                                 if (s1 [i] < 0x20 && (s1 [i] < '\x9' || s1 [i] > '\xD') || s1 [i] >= 0x80 || s1 [i] == '-' || s1 [i] == '\'')
815                                         return false;
816                         for (int i = idx2; i < end2; i++)
817                                 if (s2 [i] < 0x20 && (s2 [i] < '\x9' || s2 [i] > '\xD') || s2 [i] >= 0x80 || s2 [i] == '-' || s2 [i] == '\'')
818                                         return false;
819                         return true;
820                 }
821
822                 unsafe int CompareInternal (string s1, int idx1, int len1, string s2,
823                         int idx2, int len2,
824                         out bool targetConsumed, out bool sourceConsumed,
825                         bool skipHeadingExtenders, bool immediateBreakup,
826                         ref Context ctx)
827                 {
828                         COpt opt = ctx.Option;
829                         int start1 = idx1;
830                         int start2 = idx2;
831                         int end1 = idx1 + len1;
832                         int end2 = idx2 + len2;
833                         targetConsumed = false;
834                         sourceConsumed = false;
835                         PreviousInfo prev2 = new PreviousInfo (false);
836
837                         if (opt == CompareOptions.None && ctx.QuickCheckPossible)
838                                 return CompareQuick (s1, idx1, len1, s2, idx2, len2, out sourceConsumed, out targetConsumed, immediateBreakup);
839
840                         // It holds final result that comes from the comparison
841                         // at level 2 or lower. Even if Compare() found the
842                         // difference at level 2 or lower, it still has to
843                         // continue level 1 comparison. FinalResult is used
844                         // when there was no further differences.
845                         int finalResult = 0;
846                         // It holds the comparison level to do. It starts from
847                         // 5, and becomes 1 when we do primary-only comparison.
848                         int currentLevel = 5;
849
850                         int lv5At1 = -1;
851                         int lv5At2 = -1;
852                         int lv5Value1 = 0;
853                         int lv5Value2 = 0;
854
855                         // Skip heading extenders
856                         if (skipHeadingExtenders) {
857                                 for (; idx1 < end1; idx1++)
858                                         if (GetExtenderType (s1 [idx1]) == ExtenderType.None)
859                                                 break;
860                                 for (; idx2 < end2; idx2++)
861                                         if (GetExtenderType (s2 [idx2]) == ExtenderType.None)
862                                                 break;
863                         }
864
865                         ExtenderType ext1 = ExtenderType.None;
866                         ExtenderType ext2 = ExtenderType.None;
867
868                         int quickCheckPos1 = idx1;
869                         int quickCheckPos2 = idx2;
870                         bool stringSort = (opt & COpt.StringSort) != 0;
871                         bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
872                         Escape escape1 = new Escape ();
873                         Escape escape2 = new Escape ();
874
875                         while (true) {
876                                 for (; idx1 < end1; idx1++)
877                                         if (!IsIgnorable (s1 [idx1], opt))
878                                                 break;
879                                 for (; idx2 < end2; idx2++)
880                                         if (!IsIgnorable (s2 [idx2], opt))
881                                                 break;
882
883                                 if (idx1 >= end1) {
884                                         if (escape1.Source == null)
885                                                 break;
886                                         s1 = escape1.Source;
887                                         start1 = escape1.Start;
888                                         idx1 = escape1.Index;
889                                         end1 = escape1.End;
890                                         quickCheckPos1 = escape1.Optional;
891                                         escape1.Source = null;
892                                         continue;
893                                 }
894                                 if (idx2 >= end2) {
895                                         if (escape2.Source == null)
896                                                 break;
897                                         s2 = escape2.Source;
898                                         start2 = escape2.Start;
899                                         idx2 = escape2.Index;
900                                         end2 = escape2.End;
901                                         quickCheckPos2 = escape2.Optional;
902                                         escape2.Source = null;
903                                         continue;
904                                 }
905 #if true
906 // If comparison is unstable, then this part is one of the most doubtful part.
907 // Here we run quick codepoint comparison and run back to "stable" area to
908 // compare characters.
909
910                                 // Strictly to say, even the first character
911                                 // could be compared here, but it messed
912                                 // backward step, so I just avoided mess.
913                                 if (quickCheckPos1 < idx1 && quickCheckPos2 < idx2) {
914                                         while (idx1 < end1 && idx2 < end2 &&
915                                                 s1 [idx1] == s2 [idx2]) {
916                                                 idx1++;
917                                                 idx2++;
918                                         }
919                                         if (idx1 == end1 || idx2 == end2)
920                                                 continue; // check replacement
921
922                                         int backwardEnd1 = quickCheckPos1;
923                                         int backwardEnd2 = quickCheckPos2;
924                                         quickCheckPos1 = idx1;
925                                         quickCheckPos2 = idx2;
926
927                                         idx1--;
928                                         idx2--;
929
930                                         for (;idx1 > backwardEnd1; idx1--)
931                                                 if (Category (s1 [idx1]) != 1)
932                                                         break;
933                                         for (;idx2 > backwardEnd2; idx2--)
934                                                 if (Category (s2 [idx2]) != 1)
935                                                         break;
936                                         for (;idx1 > backwardEnd1; idx1--)
937                                                 if (IsSafe (s1 [idx1]))
938                                                         break;
939                                         for (;idx2 > backwardEnd2; idx2--)
940                                                 if (IsSafe (s2 [idx2]))
941                                                         break;
942                                 }
943 #endif
944
945                                 int cur1 = idx1;
946                                 int cur2 = idx2;
947                                 byte* sk1 = null;
948                                 byte* sk2 = null;
949                                 int i1 = FilterOptions (s1 [idx1], opt);
950                                 int i2 = FilterOptions (s2 [idx2], opt);
951                                 bool special1 = false;
952                                 bool special2 = false;
953
954                                 // If current character is an expander, then
955                                 // repeat the previous character.
956                                 ext1 = GetExtenderType (i1);
957                                 if (ext1 != ExtenderType.None) {
958                                         if (ctx.PrevCode < 0) {
959                                                 if (ctx.PrevSortKey == null) {
960                                                         // nothing to extend
961                                                         idx1++;
962                                                         continue;
963                                                 }
964                                                 sk1 = ctx.PrevSortKey;
965                                         }
966                                         else
967                                                 i1 = FilterExtender (ctx.PrevCode, ext1, opt);
968                                 }
969                                 ext2 = GetExtenderType (i2);
970                                 if (ext2 != ExtenderType.None) {
971                                         if (prev2.Code < 0) {
972                                                 if (prev2.SortKey == null) {
973                                                         // nothing to extend
974                                                         idx2++;
975                                                         continue;
976                                                 }
977                                                 sk2 = prev2.SortKey;
978                                         }
979                                         else
980                                                 i2 = FilterExtender (prev2.Code, ext2, opt);
981                                 }
982
983                                 byte cat1 = Category (i1);
984                                 byte cat2 = Category (i2);
985
986                                 // Handle special weight characters
987                                 if (!stringSort && currentLevel > 4) {
988                                         if (cat1 == 6) {
989                                                 lv5At1 = escape1.Source != null ?
990                                                         escape1.Index - escape1.Start :
991                                                         cur1 - start1;
992                                                 // here Windows has a bug that it does
993                                                 // not consider thirtiary weight.
994                                                 lv5Value1 = Level1 (i1) << 8 + Uni.Level3 (i1);
995                                                 ctx.PrevCode = i1;
996                                                 idx1++;
997                                         }
998                                         if (cat2 == 6) {
999                                                 lv5At2 = escape2.Source != null ?
1000                                                         escape2.Index - escape2.Start :
1001                                                         cur2 - start2;
1002                                                 // here Windows has a bug that it does
1003                                                 // not consider thirtiary weight.
1004                                                 lv5Value2 = Level1 (i2) << 8 + Uni.Level3 (i2);
1005                                                 prev2.Code = i2;
1006                                                 idx2++;
1007                                         }
1008                                         if (cat1 == 6 || cat2 == 6) {
1009                                                 currentLevel = 4;
1010                                                 continue;
1011                                         }
1012                                 }
1013
1014                                 Contraction ct1 = null;
1015                                 if (ext1 == ExtenderType.None)
1016                                         ct1 = GetContraction (s1, idx1, end1);
1017
1018                                 int offset1 = 1;
1019                                 if (sk1 != null)
1020                                         offset1 = 1;
1021                                 else if (ct1 != null) {
1022                                         offset1 = ct1.Source.Length;
1023                                         if (ct1.SortKey != null) {
1024                                                 sk1 = ctx.Buffer1;
1025                                                 for (int i = 0; i < ct1.SortKey.Length; i++)
1026                                                         sk1 [i] = ct1.SortKey [i];
1027                                                 ctx.PrevCode = -1;
1028                                                 ctx.PrevSortKey = sk1;
1029                                         }
1030                                         else if (escape1.Source == null) {
1031                                                 escape1.Source = s1;
1032                                                 escape1.Start = start1;
1033                                                 escape1.Index = cur1 + ct1.Source.Length;
1034                                                 escape1.End = end1;
1035                                                 escape1.Optional = quickCheckPos1;
1036                                                 s1 = ct1.Replacement;
1037                                                 idx1 = 0;
1038                                                 start1 = 0;
1039                                                 end1 = s1.Length;
1040                                                 quickCheckPos1 = 0;
1041                                                 continue;
1042                                         }
1043                                 }
1044                                 else {
1045                                         sk1 = ctx.Buffer1;
1046                                         sk1 [0] = cat1;
1047                                         sk1 [1] = Level1 (i1);
1048                                         if (!ignoreNonSpace && currentLevel > 1)
1049                                                 sk1 [2] = Level2 (i1, ext1);
1050                                         if (currentLevel > 2)
1051                                                 sk1 [3] = Uni.Level3 (i1);
1052                                         if (currentLevel > 3)
1053                                                 special1 = Uni.HasSpecialWeight ((char) i1);
1054                                         if (cat1 > 1)
1055                                                 ctx.PrevCode = i1;
1056                                 }
1057
1058                                 Contraction ct2 = null;
1059                                 if (ext2 == ExtenderType.None)
1060                                         ct2 = GetContraction (s2, idx2, end2);
1061
1062                                 if (sk2 != null)
1063                                         idx2++;
1064                                 else if (ct2 != null) {
1065                                         idx2 += ct2.Source.Length;
1066                                         if (ct2.SortKey != null) {
1067                                                 sk2 = ctx.Buffer2;
1068                                                 for (int i = 0; i < ct2.SortKey.Length; i++)
1069                                                         sk2 [i] = ct2.SortKey [i];
1070                                                 prev2.Code = -1;
1071                                                 prev2.SortKey = sk2;
1072                                         }
1073                                         else if (escape2.Source == null) {
1074                                                 escape2.Source = s2;
1075                                                 escape2.Start = start2;
1076                                                 escape2.Index = cur2 + ct2.Source.Length;
1077                                                 escape2.End = end2;
1078                                                 escape2.Optional = quickCheckPos2;
1079                                                 s2 = ct2.Replacement;
1080                                                 idx2 = 0;
1081                                                 start2 = 0;
1082                                                 end2 = s2.Length;
1083                                                 quickCheckPos2 = 0;
1084                                                 continue;
1085                                         }
1086                                 }
1087                                 else {
1088                                         sk2 = ctx.Buffer2;
1089                                         sk2 [0] = cat2;
1090                                         sk2 [1] = Level1 (i2);
1091                                         if (!ignoreNonSpace && currentLevel > 1)
1092                                                 sk2 [2] = Level2 (i2, ext2);
1093                                         if (currentLevel > 2)
1094                                                 sk2 [3] = Uni.Level3 (i2);
1095                                         if (currentLevel > 3)
1096                                                 special2 = Uni.HasSpecialWeight ((char) i2);
1097                                         if (cat2 > 1)
1098                                                 prev2.Code = i2;
1099                                         idx2++;
1100                                 }
1101
1102                                 // Add offset here so that it does not skip
1103                                 // idx1 while s2 has a replacement.
1104                                 idx1 += offset1;
1105
1106                                 // add diacritical marks in s1 here
1107                                 if (!ignoreNonSpace) {
1108                                         while (idx1 < end1) {
1109                                                 if (Category (s1 [idx1]) != 1)
1110                                                         break;
1111                                                 if (sk1 [2] == 0)
1112                                                         sk1 [2] = 2;
1113                                                 sk1 [2] = (byte) (sk1 [2] + 
1114                                                         Level2 (s1 [idx1], ExtenderType.None));
1115                                                 idx1++;
1116                                         }
1117
1118                                         // add diacritical marks in s2 here
1119                                         while (idx2 < end2) {
1120                                                 if (Category (s2 [idx2]) != 1)
1121                                                         break;
1122                                                 if (sk2 [2] == 0)
1123                                                         sk2 [2] = 2;
1124                                                 sk2 [2] = (byte) (sk2 [2] + 
1125                                                         Level2 (s2 [idx2], ExtenderType.None));
1126                                                 idx2++;
1127                                         }
1128                                 }
1129
1130                                 int ret = sk1 [0] - sk2 [0];
1131                                 ret = ret != 0 ? ret : sk1 [1] - sk2 [1];
1132                                 if (ret != 0)
1133                                         return ret;
1134                                 if (currentLevel == 1)
1135                                         continue;
1136                                 if (!ignoreNonSpace) {
1137                                         ret = sk1 [2] - sk2 [2];
1138                                         if (ret != 0) {
1139                                                 finalResult = ret;
1140                                                 if (immediateBreakup)
1141                                                         return -1; // different
1142                                                 currentLevel = frenchSort ? 2 : 1;
1143                                                 continue;
1144                                         }
1145                                 }
1146                                 if (currentLevel == 2)
1147                                         continue;
1148                                 ret = sk1 [3] - sk2 [3];
1149                                 if (ret != 0) {
1150                                         finalResult = ret;
1151                                         if (immediateBreakup)
1152                                                 return -1; // different
1153                                         currentLevel = 2;
1154                                         continue;
1155                                 }
1156                                 if (currentLevel == 3)
1157                                         continue;
1158                                 if (special1 != special2) {
1159                                         if (immediateBreakup)
1160                                                 return -1; // different
1161                                         finalResult = special1 ? 1 : -1;
1162                                         currentLevel = 3;
1163                                         continue;
1164                                 }
1165                                 if (special1) {
1166                                         ret = CompareFlagPair (
1167                                                 !Uni.IsJapaneseSmallLetter ((char) i1),
1168                                                 !Uni.IsJapaneseSmallLetter ((char) i2));
1169                                         ret = ret != 0 ? ret :
1170                                                 ToDashTypeValue (ext1, opt) -
1171                                                 ToDashTypeValue (ext2, opt);
1172                                         ret = ret != 0 ? ret : CompareFlagPair (
1173                                                 Uni.IsHiragana ((char) i1),
1174                                                 Uni.IsHiragana ((char) i2));
1175                                         ret = ret != 0 ? ret : CompareFlagPair (
1176                                                 !IsHalfKana ((char) i1, opt),
1177                                                 !IsHalfKana ((char) i2, opt));
1178                                         if (ret != 0) {
1179                                                 if (immediateBreakup)
1180                                                         return -1; // different
1181                                                 finalResult = ret;
1182                                                 currentLevel = 3;
1183                                                 continue;
1184                                         }
1185                                 }
1186                         }
1187
1188                         // If there were only level 3 or lower differences,
1189                         // then we still have to find diacritical differences
1190                         // if any.
1191                         if (!ignoreNonSpace &&
1192                                 finalResult != 0 && currentLevel > 2) {
1193                                 while (idx1 < end1 && idx2 < end2) {
1194                                         if (!Uni.IsIgnorableNonSpacing (s1 [idx1]))
1195                                                 break;
1196                                         if (!Uni.IsIgnorableNonSpacing (s2 [idx2]))
1197                                                 break;
1198                                         finalResult = Level2 (FilterOptions (s1 [idx1], opt), ext1) - Level2 (FilterOptions (s2 [idx2], opt), ext2);
1199                                         if (finalResult != 0)
1200                                                 break;
1201                                         idx1++;
1202                                         idx2++;
1203                                         // they should work only for the first character
1204                                         ext1 = ExtenderType.None;
1205                                         ext2 = ExtenderType.None;
1206                                 }
1207                         }
1208                         if (currentLevel == 1 && finalResult != 0) {
1209                                 while (idx1 < end1) {
1210                                         if (Uni.IsIgnorableNonSpacing (s1 [idx1]))
1211                                                 idx1++;
1212                                         else
1213                                                 break;
1214                                 }
1215                                 while (idx2 < end2) {
1216                                         if (Uni.IsIgnorableNonSpacing (s2 [idx2]))
1217                                                 idx2++;
1218                                         else
1219                                                 break;
1220                                 }
1221                         }
1222                         // we still have to handle level 5
1223                         if (finalResult == 0) {
1224                                 finalResult = lv5At1 - lv5At2;
1225                                 if (finalResult == 0)
1226                                         finalResult = lv5Value1 - lv5Value2;
1227                         }
1228                         if (finalResult == 0) {
1229                                 if (idx2 == end2)
1230                                         targetConsumed = true;
1231                                 if (idx1 == end1)
1232                                         sourceConsumed = true;
1233                         }
1234                         return idx1 != end1 ? 1 : idx2 == end2 ? finalResult : -1;
1235                 }
1236
1237                 int CompareFlagPair (bool b1, bool b2)
1238                 {
1239                         return b1 == b2 ? 0 : b1 ? 1 : -1;
1240                 }
1241
1242                 #endregion
1243
1244                 #region IsPrefix() and IsSuffix()
1245
1246                 public bool IsPrefix (string src, string target, CompareOptions opt)
1247                 {
1248                         return IsPrefix (src, target, 0, src.Length, opt);
1249                 }
1250
1251                 public unsafe bool IsPrefix (string s, string target, int start, int length, CompareOptions opt)
1252                 {
1253                         if (target.Length == 0)
1254                                 return true;
1255                         byte* sk1 = stackalloc byte [4];
1256                         byte* sk2 = stackalloc byte [4];
1257                         ClearBuffer (sk1, 4);
1258                         ClearBuffer (sk2, 4);
1259                         Context ctx = new Context (opt, null, null, sk1, sk2, null,
1260                                 QuickCheckPossible (s, start, start + length, target, 0, target.Length));
1261                         return IsPrefix (s, target, start, length, true, ref ctx);
1262                 }
1263
1264                 unsafe bool IsPrefix (string s, string target, int start, int length, bool skipHeadingExtenders, ref Context ctx)
1265                 {
1266                         bool consumed, dummy;
1267                         CompareInternal (s, start, length,
1268                                 target, 0, target.Length,
1269                                 out consumed, out dummy, skipHeadingExtenders,
1270                                 true, ref ctx);
1271                         return consumed;
1272                 }
1273
1274                 // IsSuffix()
1275
1276                 public bool IsSuffix (string src, string target, CompareOptions opt)
1277                 {
1278                         return IsSuffix (src, target, src.Length - 1, src.Length, opt);
1279                 }
1280
1281                 public unsafe bool IsSuffix (string s, string target, int start, int length, CompareOptions opt)
1282                 {
1283                         if (target.Length == 0)
1284                                 return true;
1285                         int last = LastIndexOf (s, target, start, length, opt);
1286                         return last >= 0 && Compare (s, last, s.Length - last, target, 0, target.Length, opt) == 0;
1287 /*
1288                         // quick check : simple codepoint comparison
1289                         if (s.Length >= target.Length) {
1290                                 int si = start;
1291                                 int se = start - length;
1292                                 for (int i = target.Length - 1; si >= se && i >= 0; i--, si--)
1293                                         if (s [si] != target [i])
1294                                                 break;
1295                                 if (si == start + target.Length)
1296                                         return true;
1297                         }
1298
1299                         PreviousInfo prev = new PreviousInfo (false);
1300                         byte* sk1 = stackalloc byte [4];
1301                         byte* sk2 = stackalloc byte [4];
1302                         ClearBuffer (sk1, 4);
1303                         ClearBuffer (sk2, 4);
1304                         return IsSuffix (opt, s, target, start, length, ref prev, sk1, sk2);
1305 */
1306                 }
1307
1308                 unsafe bool IsSuffix (string s, string t, int start, int length, ref Context ctx)
1309                 {
1310                         int tstart = 0;
1311                         COpt opt = ctx.Option;
1312
1313                         for (;tstart < t.Length; tstart++)
1314                                 if (!IsIgnorable (t [tstart], opt))
1315                                         break;
1316                         if (tstart == t.Length)
1317                                 return true; // as if target is String.Empty.
1318
1319 #if false
1320 // This is still not working. If it is not likely to get working, then just remove it.
1321                         int si = start;
1322                         int send = start - length;
1323                         int ti = t.Length - 1;
1324                         int tend = -1;
1325
1326                         int sStep = start + 1;
1327                         int tStep = t.Length;
1328                         bool sourceConsumed, targetConsumed;
1329                         while (true) {
1330                                 for (; send < si; si--)
1331                                         if (!IsIgnorable (s [si]))
1332                                                 break;
1333                                 for (; tend < ti; ti--)
1334                                         if (!IsIgnorable (t [ti]))
1335                                                 break;
1336                                 if (tend == ti)
1337                                         break;
1338                                 for (; send < si; si--)
1339                                         if (IsSafe (s [si]))
1340                                                 break;
1341                                 for (; tend < ti; ti--)
1342                                         if (IsSafe (t [ti]))
1343                                                 break;
1344 Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, length, t, ti, tstart, sStep - si, tStep - ti);
1345                                 if (CompareInternal (opt, s, si, sStep - si,
1346                                         t, ti, tStep - ti,
1347                                         out targetConsumed, out sourceConsumed,
1348                                         true) != 0)
1349                                         return false;
1350                                 if (send == si)
1351                                         return false;
1352                                 sStep = si;
1353                                 tStep = ti;
1354                                 si--;
1355                                 ti--;
1356                         }
1357                         return true;
1358 #else
1359                         // FIXME: it is not efficient for very long target.
1360                         bool sourceConsumed, targetConsumed;
1361                         int mismatchCount = 0;
1362                         for (int i = 0; i < length; i++) {
1363                                 ctx.ClearPrevInfo ();
1364
1365                                 int ret = CompareInternal (s, start - i, i + 1,
1366                                         t, tstart, t.Length - tstart,
1367                                         out targetConsumed,
1368                                         // FIXME: could immediately breakup
1369                                         out sourceConsumed, true, true, ref ctx);
1370                                 if (ret == 0)
1371                                         return true;
1372                                 if (!sourceConsumed && targetConsumed)
1373                                         return false;
1374                                 if (!targetConsumed) {
1375                                         mismatchCount++;
1376                                         if (mismatchCount > Uni.MaxExpansionLength)
1377                                                 // The largest length of an
1378                                                 // expansion is 3, so if the
1379                                                 // target was not consumed more
1380                                                 // than 3 times, then the tail
1381                                                 // character does not match.
1382                                                 return false;
1383                                 }
1384                         }
1385                         return false;
1386 #endif
1387                 }
1388
1389                 #endregion
1390
1391                 #region IndexOf() / LastIndexOf()
1392
1393                 public int IndexOf (string s, string target, CompareOptions opt)
1394                 {
1395                         return IndexOf (s, target, 0, s.Length, opt);
1396                 }
1397
1398                 public unsafe int IndexOf (string s, string target, int start, int length, CompareOptions opt)
1399                 {
1400                         byte* alwaysMatchFlags = stackalloc byte [16];
1401                         byte* neverMatchFlags = stackalloc byte [16];
1402                         byte* targetSortKey = stackalloc byte [4];
1403                         byte* sk1 = stackalloc byte [4];
1404                         byte* sk2 = stackalloc byte [4];
1405                         ClearBuffer (alwaysMatchFlags, 16);
1406                         ClearBuffer (neverMatchFlags, 16);
1407                         ClearBuffer (targetSortKey, 4);
1408                         ClearBuffer (sk1, 4);
1409                         ClearBuffer (sk2, 4);
1410                         Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null,
1411                                 QuickCheckPossible (s, start, start + length, target, 0, target.Length));
1412
1413                         return IndexOf (s, target, start, length,
1414                                 targetSortKey, ref ctx);
1415                 }
1416
1417                 public int IndexOf (string s, char target, CompareOptions opt)
1418                 {
1419                         return IndexOf (s, target, 0, s.Length, opt);
1420                 }
1421
1422                 public unsafe int IndexOf (string s, char target, int start, int length, CompareOptions opt)
1423                 {
1424                         byte* alwaysMatchFlags = stackalloc byte [16];
1425                         byte* neverMatchFlags = stackalloc byte [16];
1426                         byte* targetSortKey = stackalloc byte [4];
1427                         byte* sk1 = stackalloc byte [4];
1428                         byte* sk2 = stackalloc byte [4];
1429                         ClearBuffer (alwaysMatchFlags, 16);
1430                         ClearBuffer (neverMatchFlags, 16);
1431                         ClearBuffer (targetSortKey, 4);
1432                         ClearBuffer (sk1, 4);
1433                         ClearBuffer (sk2, 4);
1434                         Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1435
1436                         // If target is contraction, then use string search.
1437                         Contraction ct = GetContraction (target);
1438                         if (ct != null) {
1439                                 if (ct.Replacement != null)
1440                                         return IndexOf (s, ct.Replacement,
1441                                                 start, length, targetSortKey, ref ctx);
1442                                 else {
1443                                         for (int i = 0; i < ct.SortKey.Length; i++)
1444                                                 sk2 [i] = ct.SortKey [i];
1445                                         return IndexOfSortKey (s, start, length, sk2, char.MinValue, -1, true, ref ctx);
1446                                 }
1447                         } else {
1448                                 int ti = FilterOptions ((int) target, opt);
1449                                 targetSortKey [0] = Category (ti);
1450                                 targetSortKey [1] = Level1 (ti);
1451                                 if ((opt & COpt.IgnoreNonSpace) == 0)
1452                                         targetSortKey [2] =
1453                                                 Level2 (ti, ExtenderType.None);
1454                                 targetSortKey [3] = Uni.Level3 (ti);
1455                                 return IndexOfSortKey (s, start, length,
1456                                         targetSortKey, target, ti,
1457                                         !Uni.HasSpecialWeight ((char) ti), ref ctx);
1458                         }
1459                 }
1460
1461                 // Searches target byte[] keydata
1462                 unsafe int IndexOfSortKey (string s, int start, int length, byte* sortkey, char target, int ti, bool noLv4, ref Context ctx)
1463                 {
1464                         int end = start + length;
1465                         int idx = start;
1466
1467                         while (idx < end) {
1468                                 int cur = idx;
1469                                 if (MatchesForward (s, ref idx, end, ti, sortkey, noLv4, ref ctx))
1470                                         return cur;
1471                         }
1472                         return -1;
1473                 }
1474
1475                 // Searches string. Search head character (or keydata when
1476                 // the head is contraction sortkey) and try IsPrefix().
1477                 unsafe int IndexOf (string s, string target, int start, int length, byte* targetSortKey, ref Context ctx)
1478                 {
1479                         COpt opt = ctx.Option;
1480                         int tidx = 0;
1481                         for (; tidx < target.Length; tidx++)
1482                                 if (!IsIgnorable (target [tidx], opt))
1483                                         break;
1484                         if (tidx == target.Length)
1485                                 return start;
1486                         Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1487                         string replace = ct != null ? ct.Replacement : null;
1488                         byte* sk = replace == null ? targetSortKey : null;
1489                         bool noLv4 = true;
1490                         char tc = char.MinValue;
1491                         int ti = -1;
1492                         if (ct != null && sk != null) {
1493                                 for (int i = 0; i < ct.SortKey.Length; i++)
1494                                         sk [i] = ct.SortKey [i];
1495                         } else if (sk != null) {
1496                                 tc = target [tidx];
1497                                 ti = FilterOptions (target [tidx], opt);
1498                                 sk [0] = Category (ti);
1499                                 sk [1] = Level1 (ti);
1500                                 if ((opt & COpt.IgnoreNonSpace) == 0)
1501                                         sk [2] = Level2 (ti, ExtenderType.None);
1502                                 sk [3] = Uni.Level3 (ti);
1503                                 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1504                         }
1505                         if (sk != null) {
1506                                 for (tidx++; tidx < target.Length; tidx++) {
1507                                         if (Category (target [tidx]) != 1)
1508                                                 break;
1509                                         if (sk [2] == 0)
1510                                                 sk [2] = 2;
1511                                         sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1512                                 }
1513                         }
1514
1515                         do {
1516                                 int idx = 0;
1517                                 if (replace != null)
1518                                         idx = IndexOf (s, replace, start, length, targetSortKey, ref ctx);
1519                                 else
1520                                         idx = IndexOfSortKey (s, start, length, sk, tc, ti, noLv4, ref ctx);
1521                                 if (idx < 0)
1522                                         return -1;
1523                                 length -= idx - start;
1524                                 start = idx;
1525                                 if (IsPrefix (s, target, start, length, false, ref ctx))
1526                                         return idx;
1527                                 Contraction cts = GetContraction (s, start, length);
1528                                 if (cts != null) {
1529                                         start += cts.Source.Length;
1530                                         length -= cts.Source.Length;
1531                                 } else {
1532                                         start++;
1533                                         length--;
1534                                 }
1535                         } while (length > 0);
1536                         return -1;
1537                 }
1538
1539                 //
1540                 // There are the same number of IndexOf() related methods,
1541                 // with the same functionalities for each.
1542                 //
1543
1544                 public int LastIndexOf (string s, string target, CompareOptions opt)
1545                 {
1546                         return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1547                 }
1548
1549                 public unsafe int LastIndexOf (string s, string target, int start, int length, CompareOptions opt)
1550                 {
1551                         byte* alwaysMatchFlags = stackalloc byte [16];
1552                         byte* neverMatchFlags = stackalloc byte [16];
1553                         byte* targetSortKey = stackalloc byte [4];
1554                         byte* sk1 = stackalloc byte [4];
1555                         byte* sk2 = stackalloc byte [4];
1556                         ClearBuffer (alwaysMatchFlags, 16);
1557                         ClearBuffer (neverMatchFlags, 16);
1558                         ClearBuffer (targetSortKey, 4);
1559                         ClearBuffer (sk1, 4);
1560                         ClearBuffer (sk2, 4);
1561                         // For some unknown reason CompareQuick() does not work fine w/ LastIndexOf().
1562                         Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1563                         return LastIndexOf (s, target, start, length,
1564                                 targetSortKey, ref ctx);
1565                 }
1566
1567                 public int LastIndexOf (string s, char target, CompareOptions opt)
1568                 {
1569                         return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1570                 }
1571
1572                 public unsafe int LastIndexOf (string s, char target, int start, int length, CompareOptions opt)
1573                 {
1574                         byte* alwaysMatchFlags = stackalloc byte [16];
1575                         byte* neverMatchFlags = stackalloc byte [16];
1576                         byte* targetSortKey = stackalloc byte [4];
1577                         byte* sk1 = stackalloc byte [4];
1578                         byte* sk2 = stackalloc byte [4];
1579                         ClearBuffer (alwaysMatchFlags, 16);
1580                         ClearBuffer (neverMatchFlags, 16);
1581                         ClearBuffer (targetSortKey, 4);
1582                         ClearBuffer (sk1, 4);
1583                         ClearBuffer (sk2, 4);
1584                         Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1585
1586                         // If target is a replacement contraction, then use 
1587                         // string search.
1588                         Contraction ct = GetContraction (target);
1589                         if (ct != null) {
1590                                 if (ct.Replacement != null)
1591                                         return LastIndexOf (s,
1592                                                 ct.Replacement, start, length,
1593                                                 targetSortKey, ref ctx);
1594                                 else {
1595                                         for (int bi = 0; bi < ct.SortKey.Length; bi++)
1596                                                 sk2 [bi] = ct.SortKey [bi];
1597                                         return LastIndexOfSortKey (s, start,
1598                                                 start, length, sk2,
1599                                                 -1, true, ref ctx);
1600                                 }
1601                         }
1602                         else {
1603                                 int ti = FilterOptions ((int) target, opt);
1604                                 targetSortKey [0] = Category (ti);
1605                                 targetSortKey [1] = Level1 (ti);
1606                                 if ((opt & COpt.IgnoreNonSpace) == 0)
1607                                         targetSortKey [2] = Level2 (ti, ExtenderType.None);
1608                                 targetSortKey [3] = Uni.Level3 (ti);
1609                                 return LastIndexOfSortKey (s, start, start,
1610                                         length, targetSortKey,
1611                                         ti, !Uni.HasSpecialWeight ((char) ti),
1612                                         ref ctx);
1613                         }
1614                 }
1615
1616                 // Searches target byte[] keydata
1617                 unsafe int LastIndexOfSortKey (string s, int start, int orgStart, int length, byte* sortkey, int ti, bool noLv4, ref Context ctx)
1618                 {
1619                         int end = start - length;
1620                         int idx = start;
1621                         while (idx > end) {
1622                                 int cur = idx;
1623                                 if (MatchesBackward (s, ref idx, end, orgStart,
1624                                         ti, sortkey, noLv4, ref ctx))
1625                                         return cur;
1626                         }
1627                         return -1;
1628                 }
1629
1630                 // Searches string. Search head character (or keydata when
1631                 // the head is contraction sortkey) and try IsPrefix().
1632                 unsafe int LastIndexOf (string s, string target, int start, int length, byte* targetSortKey, ref Context ctx)
1633                 {
1634                         COpt opt = ctx.Option;
1635                         int orgStart = start;
1636                         int tidx = 0;
1637                         for (; tidx < target.Length; tidx++)
1638                                 if (!IsIgnorable (target [tidx], opt))
1639                                         break;
1640                         if (tidx == target.Length)
1641                                 return start;
1642                         Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1643                         string replace = ct != null ? ct.Replacement : null;
1644                         byte* sk = replace == null ? targetSortKey : null;
1645
1646                         bool noLv4 = true;
1647                         int ti = -1;
1648                         if (ct != null && sk != null) {
1649                                 for (int i = 0; i < ct.SortKey.Length; i++)
1650                                         sk [i] = ct.SortKey [i];
1651                         } else if (sk != null) {
1652                                 ti = FilterOptions (target [tidx], opt);
1653                                 sk [0] = Category (ti);
1654                                 sk [1] = Level1 (ti);
1655                                 if ((opt & COpt.IgnoreNonSpace) == 0)
1656                                         sk [2] = Level2 (ti, ExtenderType.None);
1657                                 sk [3] = Uni.Level3 (ti);
1658                                 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1659                         }
1660                         if (sk != null) {
1661                                 for (tidx++; tidx < target.Length; tidx++) {
1662                                         if (Category (target [tidx]) != 1)
1663                                                 break;
1664                                         if (sk [2] == 0)
1665                                                 sk [2] = 2;
1666                                         sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1667                                 }
1668                         }
1669
1670                         do {
1671                                 int idx = 0;
1672
1673                                 if (replace != null)
1674                                         idx = LastIndexOf (s, replace,
1675                                                 start, length,
1676                                                 targetSortKey, ref ctx);
1677                                 else
1678                                         idx = LastIndexOfSortKey (s, start, orgStart, length, sk, ti, noLv4, ref ctx);
1679                                 if (idx < 0)
1680                                         return -1;
1681                                 length -= start - idx;
1682                                 start = idx;
1683
1684                                 if (IsPrefix (s, target, idx, orgStart - idx + 1, false, ref ctx)) {
1685                                         for (;idx < orgStart; idx++)
1686                                                 if (!IsIgnorable (s [idx], opt))
1687                                                         break;
1688                                         return idx;
1689                                 }
1690                                 Contraction cts = GetContraction (s, idx, orgStart - idx + 1);
1691                                 if (cts != null) {
1692                                         start -= cts.Source.Length;
1693                                         length -= cts.Source.Length;
1694                                 } else {
1695                                         start--;
1696                                         length--;
1697                                 }
1698                         } while (length > 0);
1699                         return -1;
1700                 }
1701
1702                 unsafe bool MatchesForward (string s, ref int idx, int end, int ti, byte* sortkey, bool noLv4, ref Context ctx)
1703                 {
1704                         COpt opt = ctx.Option;
1705                         int si = s [idx];
1706                         if (ctx.AlwaysMatchFlags != null && si < 128 && (ctx.AlwaysMatchFlags [si / 8] & (1 << (si % 8))) != 0)
1707                                 return true;
1708                         if (ctx.NeverMatchFlags != null &&
1709                                         si < 128 &&
1710                                         (ctx.NeverMatchFlags [si / 8] & (1 << (si % 8))) != 0) {
1711                                 idx++;
1712                                 return false;
1713                         }
1714                         ExtenderType ext = GetExtenderType (s [idx]);
1715                         Contraction ct = null;
1716                         if (MatchesForwardCore (s, ref idx, end, ti, sortkey, noLv4, ext, ref ct, ref ctx)) {
1717                                 if (ctx.AlwaysMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
1718                                         ctx.AlwaysMatchFlags [si / 8] |= (byte) (1 << (si % 8));
1719                                 return true;
1720                         }
1721                         if (ctx.NeverMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
1722                                 ctx.NeverMatchFlags [si / 8] |= (byte) (1 << (si % 8));
1723                         return false;
1724                 }
1725
1726                 unsafe bool MatchesForwardCore (string s, ref int idx, int end, int ti, byte* sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, ref Context ctx)
1727                 {
1728                         COpt opt = ctx.Option;
1729                         byte* charSortKey = ctx.Buffer1;
1730                         bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
1731                         int si = -1;
1732                         if (ext == ExtenderType.None)
1733                                 ct = GetContraction (s, idx, end);
1734                         else if (ctx.PrevCode < 0) {
1735                                 if (ctx.PrevSortKey == null) {
1736                                         idx++;
1737                                         return false;
1738                                 }
1739                                 charSortKey = ctx.PrevSortKey;
1740                         }
1741                         else
1742                                 si = FilterExtender (ctx.PrevCode, ext, opt);
1743                         // if lv4 exists, it never matches contraction
1744                         if (ct != null) {
1745                                 idx += ct.Source.Length;
1746                                 if (!noLv4)
1747                                         return false;
1748                                 if (ct.SortKey != null) {
1749                                         for (int i = 0; i < 4; i++)
1750                                                 charSortKey [i] = sortkey [i];
1751                                         ctx.PrevCode = -1;
1752                                         ctx.PrevSortKey = charSortKey;
1753                                 } else {
1754                                         // Here is the core of LAMESPEC
1755                                         // described at the top of the source.
1756                                         int dummy = 0;
1757                                         return MatchesForward (ct.Replacement, ref dummy,
1758                                                 ct.Replacement.Length, ti, sortkey, noLv4, ref ctx);
1759                                 }
1760                         } else {
1761                                 if (si < 0)
1762                                         si = FilterOptions (s [idx], opt);
1763                                 idx++;
1764                                 charSortKey [0] = Category (si);
1765                                 bool noMatch = false;
1766                                 if (sortkey [0] == charSortKey [0])
1767                                         charSortKey [1] = Level1 (si);
1768                                 else
1769                                         noMatch = true;
1770                                 if (!ignoreNonSpace && sortkey [1] == charSortKey [1])
1771                                         charSortKey [2] = Level2 (si, ext);
1772                                 else if (!ignoreNonSpace)
1773                                         noMatch = true;
1774                                 if (noMatch) {
1775                                         for (; idx < end; idx++) {
1776                                                 if (Category (s [idx]) != 1)
1777                                                         break;
1778                                         }
1779                                         return false;
1780                                 }
1781                                 charSortKey [3] = Uni.Level3 (si);
1782                                 if (charSortKey [0] != 1)
1783                                         ctx.PrevCode = si;
1784                         }
1785                         for (; idx < end; idx++) {
1786                                 if (Category (s [idx]) != 1)
1787                                         break;
1788                                 if (ignoreNonSpace)
1789                                         continue;
1790                                 if (charSortKey [2] == 0)
1791                                                 charSortKey [2] = 2;
1792                                         charSortKey [2] = (byte) (charSortKey [2]
1793                                                 + Level2 (s [idx], ExtenderType.None));
1794                         }
1795
1796                         return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4);
1797                 }
1798
1799                 unsafe bool MatchesPrimitive (COpt opt, byte* source, int si, ExtenderType ext, byte* target, int ti, bool noLv4)
1800                 {
1801                         bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
1802                         if (source [0] != target [0] ||
1803                                 source [1] != target [1] ||
1804                                 (!ignoreNonSpace && source [2] != target [2]) ||
1805                                 source [3] != target [3])
1806                                 return false;
1807                         if (noLv4 && (si < 0 || !Uni.HasSpecialWeight ((char) si)))
1808                                 return true;
1809                         else if (noLv4)
1810                                 return false;
1811                         // Since target can never be an extender, if the source
1812                         // is an expander and it matters, then they never match.
1813                         if (!ignoreNonSpace && ext == ExtenderType.Conditional)
1814                                 return false;
1815                         if (Uni.IsJapaneseSmallLetter ((char) si) !=
1816                                 Uni.IsJapaneseSmallLetter ((char) ti) ||
1817                                 ToDashTypeValue (ext, opt) !=
1818                                 // FIXME: we will have to specify correct value for target
1819                                 ToDashTypeValue (ExtenderType.None, opt) ||
1820                                 !Uni.IsHiragana ((char) si) !=
1821                                 !Uni.IsHiragana ((char) ti) ||
1822                                 IsHalfKana ((char) si, opt) !=
1823                                 IsHalfKana ((char) ti, opt))
1824                                 return false;
1825                         return true;
1826                 }
1827
1828                 unsafe bool MatchesBackward (string s, ref int idx, int end, int orgStart, int ti, byte* sortkey, bool noLv4, ref Context ctx)
1829                 {
1830                         int si = s [idx];
1831                         if (ctx.AlwaysMatchFlags != null && si < 128 && (ctx.AlwaysMatchFlags [si / 8] & (1 << (si % 8))) != 0)
1832                                 return true;
1833                         if (ctx.NeverMatchFlags != null && si < 128 && (ctx.NeverMatchFlags [si / 8] & (1 << (si % 8))) != 0) {
1834                                 idx--;
1835                                 return false;
1836                         }
1837                         ExtenderType ext = GetExtenderType (s [idx]);
1838                         Contraction ct = null;
1839                         if (MatchesBackwardCore (s, ref idx, end, orgStart, ti, sortkey, noLv4, ext, ref ct, ref ctx)) {
1840                                 if (ctx.AlwaysMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
1841                                         ctx.AlwaysMatchFlags [si / 8] |= (byte) (1 << (si % 8));
1842                                 return true;
1843                         }
1844                         if (ctx.NeverMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128) {
1845                                 ctx.NeverMatchFlags [si / 8] |= (byte) (1 << (si % 8));
1846                         }
1847                         return false;
1848                 }
1849
1850                 unsafe bool MatchesBackwardCore (string s, ref int idx, int end, int orgStart, int ti, byte* sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, ref Context ctx)
1851                 {
1852                         COpt opt = ctx.Option;
1853                         byte* charSortKey = ctx.Buffer1;
1854                         bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
1855                         int cur = idx;
1856                         int si = -1;
1857                         // To handle extenders in source, we need to
1858                         // check next _primary_ character.
1859                         if (ext != ExtenderType.None) {
1860                                 byte diacritical = 0;
1861                                 for (int tmp = 0; ; tmp--) {
1862                                         if (tmp < 0) // heading extender
1863                                                 return false;
1864                                         if (IsIgnorable (s [tmp], opt))
1865                                                 continue;
1866                                         int tmpi = FilterOptions (s [tmp], opt);
1867                                         byte category = Category (tmpi);
1868                                         if (category == 1) {
1869                                                 diacritical = Level2 (tmpi, ExtenderType.None);
1870                                                 continue;
1871                                         }
1872                                         si = FilterExtender (tmpi, ext, opt);
1873                                         charSortKey [0] = category;
1874                                         charSortKey [1] = Level1 (si);
1875                                         if (!ignoreNonSpace)
1876                                                 charSortKey [2] = Level2 (si, ext);
1877                                         charSortKey [3] = Uni.Level3 (si);
1878                                         if (ext != ExtenderType.Conditional &&
1879                                                 diacritical != 0)
1880                                                 charSortKey [2] =
1881                                                         (charSortKey [2] == 0) ?
1882                                                         (byte) (diacritical + 2) :
1883                                                         diacritical;
1884                                         break;
1885                                 }
1886                                 idx--;
1887                         }
1888                         if (ext == ExtenderType.None)
1889                                 ct = GetTailContraction (s, idx, end);
1890                         // if lv4 exists, it never matches contraction
1891                         if (ct != null) {
1892                                 idx -= ct.Source.Length;
1893                                 if (!noLv4)
1894                                         return false;
1895                                 if (ct.SortKey != null) {
1896                                         for (int i = 0; i < 4; i++)
1897                                                 charSortKey [i] = sortkey [i];
1898                                         ctx.PrevCode = -1;
1899                                         ctx.PrevSortKey = charSortKey;
1900                                 } else {
1901                                         // Here is the core of LAMESPEC
1902                                         // described at the top of the source.
1903                                         int dummy = ct.Replacement.Length - 1;
1904                                         return 0 <= LastIndexOfSortKey (
1905                                                 ct.Replacement, dummy, dummy,
1906                                                 ct.Replacement.Length, sortkey,
1907                                                 ti, noLv4, ref ctx);
1908                                 }
1909                         } else if (ext == ExtenderType.None) {
1910                                 if (si < 0)
1911                                         si = FilterOptions (s [idx], opt);
1912                                 idx--;
1913                                 bool noMatch = false;
1914                                 charSortKey [0] = Category (si);
1915                                 if (charSortKey [0] == sortkey [0])
1916                                         charSortKey [1] = Level1 (si);
1917                                 else
1918                                         noMatch = true;
1919                                 if (!ignoreNonSpace && charSortKey [1] == sortkey [1])
1920                                         charSortKey [2] = Level2 (si, ext);
1921                                 else if (!ignoreNonSpace)
1922                                         noMatch = true;
1923                                 if (noMatch)
1924                                         return false;
1925                                 charSortKey [3] = Uni.Level3 (si);
1926                                 if (charSortKey [0] != 1)
1927                                         ctx.PrevCode = si;
1928                         }
1929                         if (ext == ExtenderType.None) {
1930                                 for (int tmp = cur + 1; tmp < orgStart; tmp++) {
1931                                         if (Category (s [tmp]) != 1)
1932                                                 break;
1933                                         if (ignoreNonSpace)
1934                                                 continue;
1935                                         if (charSortKey [2] == 0)
1936                                                         charSortKey [2] = 2;
1937                                         charSortKey [2] = (byte) (charSortKey [2]
1938                                                 + Level2 (s [tmp], ExtenderType.None));
1939                                 }
1940                         }
1941                         return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4);
1942                 }
1943                 #endregion
1944         }
1945 }