2 // SimpleCollator.cs : the core collator implementation
5 // Atsushi Enomoto <atsushi@ximian.com>
7 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 // Here's a summary for supporting contractions, expansions and diacritical
33 // Diacritical mapping is a simple tailoring rule that "remaps" diacritical
34 // weight value from one to another. For now it applies to all range of
35 // characters, but at some stage we might need to limit the remapping ranges.
37 // A Contraction consists of a string (actually char[]) and a sortkey entry
38 // (i.e. byte[]). It indicates that a sequence of characters is interpreted
39 // as a single character that has the mapped sortkey value. There is no
40 // character which goes across "special rules". When the collator encountered
41 // such a sequence of characters, it just returns the sortkey value without
42 // further replacement.
44 // Since it is still likely to happen that a contraction sequence matches
45 // other character than the identical sequence (depending on CompareOptions
46 // and of course, defined sortkey value itself), comparison cannot be done
47 // at source char[] level.
49 // In IndexOf(), the first character in the target string or the target char
50 // itself is turned into sortkey bytes. If the character has a contraction and
51 // that is sortkey map, then it is used instead. If the contraction exists and
52 // that is replacement map, then the first character of the replacement string
53 // is searched instead. IndexOf() always searches only for the top character,
54 // and if it returned negative value, then it returns -1. Otherwise, it then
55 // tries IsPrefix() from that location. If it returns true, then it returns
58 // LAMESPEC: IndexOf() is lame as a whole API. It never matches in the middle
59 // of expansion and there is no proper way to return such indexes within
60 // a single int return value.
62 // For example, try below in .NET:
63 // IndexOf("\u00E6", "a")
64 // IndexOf("\u00E6", "e")
69 using System.Collections;
70 using System.Globalization;
72 using Uni = Mono.Globalization.Unicode.MSCompatUnicodeTable;
73 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
74 using COpt = System.Globalization.CompareOptions;
76 namespace Mono.Globalization.Unicode
78 internal class SimpleCollator
80 // this environment variable is for debugging quick check.
81 #pragma warning disable 169, 414
82 static bool QuickCheckDisabled =
83 Environment.internalGetEnvironmentVariable (
84 "MONO_COLLATION_QUICK_CHECK_DISABLED") == "yes";
85 #pragma warning restore 169, 414
87 unsafe internal struct Context
89 public Context (CompareOptions opt, byte* alwaysMatchFlags, byte* neverMatchFlags, byte* buffer1, byte* buffer2, byte* prev1, bool quickCheckPossible)
92 AlwaysMatchFlags = alwaysMatchFlags;
93 NeverMatchFlags = neverMatchFlags;
98 QuickCheckPossible = quickCheckPossible;
101 public readonly CompareOptions Option;
102 public readonly byte* NeverMatchFlags;
103 public readonly byte* AlwaysMatchFlags;
104 public byte* Buffer1;
105 public byte* Buffer2;
107 public byte* PrevSortKey;
108 public readonly bool QuickCheckPossible;
110 public void ClearPrevInfo ()
117 unsafe struct PreviousInfo
120 public byte* SortKey;
122 public PreviousInfo (bool dummy)
131 public string Source;
138 static SimpleCollator invariant =
139 new SimpleCollator (CultureInfo.InvariantCulture);
141 readonly TextInfo textInfo; // for ToLower().
142 readonly bool frenchSort;
143 unsafe readonly byte* cjkCatTable;
144 unsafe readonly byte* cjkLv1Table;
145 readonly CodePointIndexer cjkIndexer;
146 unsafe readonly byte* cjkLv2Table;
147 readonly CodePointIndexer cjkLv2Indexer;
149 readonly Contraction [] contractions;
150 readonly Level2Map [] level2Maps;
152 // This flag marks characters as "unsafe", where the character
153 // could be used as part of a contraction (whose length > 1).
154 readonly byte [] unsafeFlags;
156 const int UnsafeFlagLength = 0x300 / 8;
158 // readonly byte [] contractionFlags = new byte [16];
160 // This array is internally used inside IndexOf() to store
161 // "no need to check" ASCII characters.
163 // Now that it should be thread safe, this array is allocated
165 // byte [] neverMatchFlags = new byte [128 / 8];
167 #region .ctor() and split functions
169 public SimpleCollator (CultureInfo culture)
172 textInfo = culture.TextInfo;
175 SetCJKTable (culture, ref cjkIndexer,
176 ref cjkCatTable, ref cjkLv1Table,
177 ref cjkLv2Indexer, ref cjkLv2Table);
180 // Get tailoring info
181 TailoringInfo t = null;
182 for (CultureInfo ci = culture; ci.LCID != 127; ci = ci.Parent) {
183 t = Uni.GetTailoringInfo (ci.LCID);
187 if (t == null) // then use invariant
188 t = Uni.GetTailoringInfo (127);
190 frenchSort = t.FrenchSort;
191 Uni.BuildTailoringTables (culture, t, ref contractions,
193 unsafeFlags = new byte [UnsafeFlagLength];
194 foreach (Contraction c in contractions) {
195 if (c.Source.Length > 1)
196 foreach (char ch in c.Source)
197 unsafeFlags [(int) ch / 8 ]
198 |= (byte) (1 << ((int) ch & 7));
199 // for (int i = 0; i < c.Source.Length; i++) {
200 // int ch = c.Source [i];
203 // contractionFlags [ch / 8] |= (byte) (1 << (ch & 7));
207 foreach (Contraction c in invariant.contractions) {
208 if (c.Source.Length > 1)
209 foreach (char ch in c.Source)
210 unsafeFlags [(int) ch / 8 ]
211 |= (byte) (1 << ((int) ch & 7));
212 // for (int i = 0; i < c.Source.Length; i++) {
213 // int ch = c.Source [i];
216 // contractionFlags [ch / 8] |= (byte) (1 << (ch & 7));
220 // FIXME: Since tailorings are mostly for latin
221 // (and in some cases Cyrillic) characters, it would
222 // be much better for performance to store "start
223 // indexes" for > 370 (culture-specific letters).
226 // dump tailoring table
227 Console.WriteLine ("******** building table for {0} : contractions - {1} diacritical - {2}",
228 culture.LCID, contractions.Length, level2Maps.Length);
229 foreach (Contraction c in contractions) {
230 foreach (char cc in c.Source)
231 Console.Write ("{0:X4} ", (int) cc);
232 Console.WriteLine (" -> '{0}'", c.Replacement);
237 unsafe private void SetCJKTable (
238 CultureInfo culture, ref CodePointIndexer cjkIndexer,
239 ref byte* catTable, ref byte* lv1Table,
240 ref CodePointIndexer lv2Indexer, ref byte* lv2Table)
242 string name = GetNeutralCulture (culture).Name;
244 Uni.FillCJK (name, ref cjkIndexer, ref catTable,
245 ref lv1Table, ref lv2Indexer, ref lv2Table);
248 static CultureInfo GetNeutralCulture (CultureInfo info)
250 CultureInfo ret = info;
251 while (ret.Parent != null && ret.Parent.LCID != 127)
258 unsafe byte Category (int cp)
260 if (cp < 0x3000 || cjkCatTable == null)
261 return Uni.Category (cp);
262 int idx = cjkIndexer.ToIndex (cp);
263 return idx < 0 ? Uni.Category (cp) : cjkCatTable [idx];
266 unsafe byte Level1 (int cp)
268 if (cp < 0x3000 || cjkLv1Table == null)
269 return Uni.Level1 (cp);
270 int idx = cjkIndexer.ToIndex (cp);
271 return idx < 0 ? Uni.Level1 (cp) : cjkLv1Table [idx];
274 unsafe byte Level2 (int cp, ExtenderType ext)
276 if (ext == ExtenderType.Buggy)
278 else if (ext == ExtenderType.Conditional)
281 if (cp < 0x3000 || cjkLv2Table == null)
282 return Uni.Level2 (cp);
283 int idx = cjkLv2Indexer.ToIndex (cp);
284 byte ret = idx < 0 ? (byte) 0 : cjkLv2Table [idx];
287 ret = Uni.Level2 (cp);
288 if (level2Maps.Length == 0)
290 for (int i = 0; i < level2Maps.Length; i++) {
291 if (level2Maps [i].Source == ret)
292 return level2Maps [i].Replace;
293 else if (level2Maps [i].Source > ret)
299 static bool IsHalfKana (int cp, COpt opt)
301 return (opt & COpt.IgnoreWidth) != 0 ||
302 Uni.IsHalfWidthKana ((char) cp);
305 Contraction GetContraction (string s, int start, int end)
307 // int si = s [start];
308 // if (si < 128 && (contractionFlags [si / 8] & (1 << (si & 7))) == 0)
310 Contraction c = GetContraction (s, start, end, contractions);
311 if (c != null || lcid == 127)
313 return GetContraction (s, start, end, invariant.contractions);
316 Contraction GetContraction (string s, int start, int end, Contraction [] clist)
318 for (int i = 0; i < clist.Length; i++) {
319 Contraction ct = clist [i];
320 int diff = ct.Source [0] - s [start];
322 return null; // it's already sorted
325 char [] chars = ct.Source;
326 if (end - start < chars.Length)
329 for (int n = 0; n < chars.Length; n++)
330 if (s [start + n] != chars [n]) {
340 Contraction GetTailContraction (string s, int start, int end)
342 // int si = s [end - 1];
343 // if (si < 128 && (contractionFlags [si / 8] & (1 << (si & 7))) == 0)
345 Contraction c = GetTailContraction (s, start, end, contractions);
346 if (c != null || lcid == 127)
348 return GetTailContraction (s, start, end, invariant.contractions);
351 Contraction GetTailContraction (string s, int start, int end, Contraction [] clist)
353 if (start == end || end < -1 || start >= s.Length || s.Length <= end + 1)
354 throw new SystemException (String.Format ("MONO internal error. Failed to get TailContraction. start = {0} end = {1} string = '{2}'", start, end, s));
355 for (int i = 0; i < clist.Length; i++) {
356 Contraction ct = clist [i];
357 char [] chars = ct.Source;
358 if (chars.Length > start - end)
360 if (chars [chars.Length - 1] != s [start])
364 for (int n = 0, spos = start - chars.Length + 1;
367 if (s [spos] != chars [n]) {
378 Contraction GetContraction (char c)
380 Contraction ct = GetContraction (c, contractions);
381 if (ct != null || lcid == 127)
383 return GetContraction (c, invariant.contractions);
386 Contraction GetContraction (char c, Contraction [] clist)
388 for (int i = 0; i < clist.Length; i++) {
389 Contraction ct = clist [i];
390 if (ct.Source [0] > c)
391 return null; // it's already sorted
392 if (ct.Source [0] == c && ct.Source.Length == 1)
398 int FilterOptions (int i, COpt opt)
400 if ((opt & COpt.IgnoreWidth) != 0) {
401 int x = Uni.ToWidthCompat (i);
405 if ((opt & COpt.OrdinalIgnoreCase) != 0)
406 i = textInfo.ToLower ((char) i);
407 if ((opt & COpt.IgnoreCase) != 0)
408 i = textInfo.ToLower ((char) i);
409 if ((opt & COpt.IgnoreKanaType) != 0)
410 i = Uni.ToKanaTypeInsensitive (i);
422 ExtenderType GetExtenderType (int i)
424 // LAMESPEC: Windows expects true for U+3005, but
425 // sometimes it does not represent to repeat just
427 // Windows also expects true for U+3031 and U+3032,
428 // but they should *never* repeat one character.
430 // U+2015 becomes an extender only when it is Japanese
432 return lcid == 16 ? ExtenderType.Conditional :
435 if (i < 0x3005 || i > 0xFF70)
436 return ExtenderType.None;
441 return ExtenderType.Simple;
443 return ExtenderType.Conditional;
446 return ExtenderType.Voiced;
450 return ExtenderType.None;
452 case 0x3005: // LAMESPEC: see above.
453 return ExtenderType.Buggy;
454 case 0x3031: // LAMESPEC: see above.
455 case 0x3032: // LAMESPEC: see above.
458 return ExtenderType.Simple;
461 return ExtenderType.Voiced;
463 return ExtenderType.Conditional;
465 return ExtenderType.None;
469 static byte ToDashTypeValue (ExtenderType ext, COpt opt)
471 if ((opt & COpt.IgnoreNonSpace) != 0) // LAMESPEC: huh, why?
474 case ExtenderType.None:
476 case ExtenderType.Conditional:
483 int FilterExtender (int i, ExtenderType ext, COpt opt)
485 if (ext == ExtenderType.Conditional &&
486 Uni.HasSpecialWeight ((char) i)) {
487 bool half = IsHalfKana ((char) i, opt);
488 bool katakana = !Uni.IsHiragana ((char) i);
489 switch (Level1 (i) & 7) {
491 return half ? 0xFF71 : katakana ? 0x30A2 : 0x3042;
493 return half ? 0xFF72 : katakana ? 0x30A4 : 0x3044;
495 return half ? 0xFF73 : katakana ? 0x30A6 : 0x3046;
497 return half ? 0xFF74 : katakana ? 0x30A8 : 0x3048;
499 return half ? 0xFF75 : katakana ? 0x30AA : 0x304A;
505 static bool IsIgnorable (int i, COpt opt)
507 return Uni.IsIgnorable (i, (byte) (1 +
508 ((opt & COpt.IgnoreSymbols) != 0 ? 2 : 0) +
509 ((opt & COpt.IgnoreNonSpace) != 0 ? 4 : 0)));
515 return i / 8 >= unsafeFlags.Length ? true : (unsafeFlags [i / 8] & (1 << (i % 8))) == 0;
520 public SortKey GetSortKey (string s)
522 return GetSortKey (s, CompareOptions.None);
525 public SortKey GetSortKey (string s, CompareOptions options)
527 return GetSortKey (s, 0, s.Length, options);
530 public SortKey GetSortKey (string s, int start, int length, CompareOptions options)
532 SortKeyBuffer buf = new SortKeyBuffer (lcid);
533 buf.Initialize (options, lcid, s, frenchSort);
534 int end = start + length;
535 GetSortKey (s, start, end, buf, options);
536 return buf.GetResultAndReset ();
539 unsafe void GetSortKey (string s, int start, int end,
540 SortKeyBuffer buf, CompareOptions opt)
542 byte* prevbuf = stackalloc byte [4];
543 ClearBuffer (prevbuf, 4);
544 Context ctx = new Context (opt, null, null, null, null, prevbuf, false);
546 for (int n = start; n < end; n++) {
549 ExtenderType ext = GetExtenderType (i);
550 if (ext != ExtenderType.None) {
551 i = FilterExtender (ctx.PrevCode, ext, opt);
553 FillSortKeyRaw (i, ext, buf, opt);
554 else if (ctx.PrevSortKey != null) {
555 byte* b = ctx.PrevSortKey;
559 b [2] != 1 ? b [2] : Level2 (i, ext),
560 b [3] != 1 ? b [3] : Uni.Level3 (i));
562 // otherwise do nothing.
563 // (if the extender is the first char
564 // in the string, then just ignore.)
568 if (IsIgnorable (i, opt))
570 i = FilterOptions (i, opt);
572 Contraction ct = GetContraction (s, n, end);
574 if (ct.Replacement != null) {
575 GetSortKey (ct.Replacement, 0, ct.Replacement.Length, buf, opt);
577 byte* b = ctx.PrevSortKey;
578 for (int bi = 0; bi < ct.SortKey.Length; bi++)
579 b [bi] = ct.SortKey [bi];
583 b [2] != 1 ? b [2] : Level2 (i, ext),
584 b [3] != 1 ? b [3] : Uni.Level3 (i));
587 n += ct.Source.Length - 1;
590 if (!Uni.IsIgnorableNonSpacing (i))
592 FillSortKeyRaw (i, ExtenderType.None, buf, opt);
597 void FillSortKeyRaw (int i, ExtenderType ext,
598 SortKeyBuffer buf, CompareOptions opt)
600 if (0x3400 <= i && i <= 0x4DB5) {
601 int diff = i - 0x3400;
602 buf.AppendCJKExtension (
603 (byte) (0x10 + diff / 254),
604 (byte) (diff % 254 + 2));
608 UnicodeCategory uc = char.GetUnicodeCategory ((char) i);
610 case UnicodeCategory.PrivateUse:
611 int diff = i - 0xE000;
613 (byte) (0xE5 + diff / 254),
614 (byte) (diff % 254 + 2),
618 case UnicodeCategory.Surrogate:
619 FillSurrogateSortKeyRaw (i, buf);
623 byte level2 = Level2 (i, ext);
624 if (Uni.HasSpecialWeight ((char) i)) {
625 byte level1 = Level1 (i);
631 Uni.IsJapaneseSmallLetter ((char) i),
632 ToDashTypeValue (ext, opt),
633 !Uni.IsHiragana ((char) i),
634 IsHalfKana ((char) i, opt)
636 if ((opt & COpt.IgnoreNonSpace) == 0 && ext == ExtenderType.Voiced)
637 // Append voice weight
638 buf.AppendNormal (1, 1, 1, 0);
648 void FillSurrogateSortKeyRaw (int i, SortKeyBuffer buf)
657 lower = (byte) ((i == 0xD800) ? 0x3E : 0x3F);
658 } else if (0xD840 <= i && i < 0xD880) {
662 } else if (0xDB80 <= i && i < 0xDC00) {
663 diffbase = 0xDB80 - 0x40;
667 diffbase = 0xDC00 - 0xF8 + 2;
671 int diff = i - diffbase;
674 (byte) (segment + diff / 254),
675 (byte) (diff % 254 + 2),
684 public int Compare (string s1, string s2)
686 return Compare (s1, s2, CompareOptions.None);
689 public int Compare (string s1, string s2, CompareOptions options)
691 return Compare (s1, 0, s1.Length, s2, 0, s2.Length, options);
694 private int CompareOrdinal (string s1, int idx1, int len1,
695 string s2, int idx2, int len2)
697 int min = len1 < len2 ? len1 : len2;
698 int end1 = idx1 + min;
699 int end2 = idx2 + min;
700 if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
701 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
702 for (int i1 = idx1, i2 = idx2;
703 i1 < end1 && i2 < end2; i1++, i2++)
704 if (s1 [i1] != s2 [i2])
705 return s1 [i1] - s2 [i2];
706 return len1 == len2 ? 0 :
707 len1 == min ? - 1 : 1;
710 // mostly equivalent to CompareOrdinal, but the return value is
711 // not based on codepoints.
712 private int CompareQuick (string s1, int idx1, int len1,
713 string s2, int idx2, int len2, out bool sourceConsumed,
714 out bool targetConsumed, bool immediateBreakup)
716 sourceConsumed = false;
717 targetConsumed = false;
718 int min = len1 < len2 ? len1 : len2;
719 int end1 = idx1 + min;
720 int end2 = idx2 + min;
721 if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
722 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
723 for (int i1 = idx1, i2 = idx2;
724 i1 < end1 && i2 < end2; i1++, i2++)
725 if (s1 [i1] != s2 [i2]) {
726 if (immediateBreakup)
728 int ret = Category (s1 [i1]) - Category (s2 [i2]);
730 ret = Level1 (s1 [i1]) - Level1 (s2 [i2]);
733 ret = Uni.Level3 (s1 [i1]) - Uni.Level3 (s2 [i2]);
735 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. '{0}' {2} {3} '{1}' {4} {5}", s1, s2, idx1, end1, idx2, end2));
738 sourceConsumed = len1 <= len2;
739 targetConsumed = len1 >= len2;
740 return len1 == len2 ? 0 :
741 len1 == min ? - 1 : 1;
744 private int CompareOrdinalIgnoreCase (string s1, int idx1, int len1,
745 string s2, int idx2, int len2)
747 int min = len1 < len2 ? len1 : len2;
748 int end1 = idx1 + min;
749 int end2 = idx2 + min;
750 if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
751 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
752 TextInfo ti = invariant.textInfo;
753 for (int i1 = idx1, i2 = idx2;
754 i1 < end1 && i2 < end2; i1++, i2++)
755 if (ti.ToLower (s1 [i1]) != ti.ToLower (s2 [i2]))
756 return ti.ToLower (s1 [i1]) - ti.ToLower (s2 [i2]);
757 return len1 == len2 ? 0 :
758 len1 == min ? - 1 : 1;
761 public unsafe int Compare (string s1, int idx1, int len1,
762 string s2, int idx2, int len2, CompareOptions options)
764 // quick equality check
765 if (idx1 == idx2 && len1 == len2 &&
766 Object.ReferenceEquals (s1, s2))
768 if (options == CompareOptions.Ordinal)
769 return CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
770 if (options == CompareOptions.OrdinalIgnoreCase)
771 return CompareOrdinalIgnoreCase (s1, idx1, len1, s2, idx2, len2);
773 #if false // stable easy version, depends on GetSortKey().
774 SortKey sk1 = GetSortKey (s1, idx1, len1, options);
775 SortKey sk2 = GetSortKey (s2, idx2, len2, options);
776 byte [] d1 = sk1.KeyData;
777 byte [] d2 = sk2.KeyData;
778 int len = d1.Length > d2.Length ? d2.Length : d1.Length;
779 for (int i = 0; i < len; i++)
780 if (d1 [i] != d2 [i])
781 return d1 [i] < d2 [i] ? -1 : 1;
782 return d1.Length == d2.Length ? 0 : d1.Length < d2.Length ? -1 : 1;
784 byte* sk1 = stackalloc byte [4];
785 byte* sk2 = stackalloc byte [4];
786 ClearBuffer (sk1, 4);
787 ClearBuffer (sk2, 4);
788 Context ctx = new Context (options, null, null, sk1, sk2, null,
789 QuickCheckPossible (s1, idx1, idx1 + len1, s2, idx2, idx2 + len2));
792 int ret = CompareInternal (s1, idx1, len1, s2, idx2, len2, out dummy, out dummy2, true, false, ref ctx);
793 return ret == 0 ? 0 : ret < 0 ? -1 : 1;
797 unsafe void ClearBuffer (byte* buffer, int size)
799 for (int i = 0; i < size; i++)
803 bool QuickCheckPossible (string s1, int idx1, int end1,
804 string s2, int idx2, int end2)
807 // looks like it is buggy and inefficient, so just disable it.
810 if (QuickCheckDisabled)
812 // if (s1.Length > 100 || s2.Length > 100)
814 for (int i = idx1; i < end1; i++)
815 if (s1 [i] < 0x20 && (s1 [i] < '\x9' || s1 [i] > '\xD') || s1 [i] >= 0x80 || s1 [i] == '-' || s1 [i] == '\'')
817 for (int i = idx2; i < end2; i++)
818 if (s2 [i] < 0x20 && (s2 [i] < '\x9' || s2 [i] > '\xD') || s2 [i] >= 0x80 || s2 [i] == '-' || s2 [i] == '\'')
824 unsafe int CompareInternal (string s1, int idx1, int len1, string s2,
826 out bool targetConsumed, out bool sourceConsumed,
827 bool skipHeadingExtenders, bool immediateBreakup,
830 COpt opt = ctx.Option;
833 int end1 = idx1 + len1;
834 int end2 = idx2 + len2;
835 targetConsumed = false;
836 sourceConsumed = false;
837 PreviousInfo prev2 = new PreviousInfo (false);
839 if (opt == CompareOptions.None && ctx.QuickCheckPossible)
840 return CompareQuick (s1, idx1, len1, s2, idx2, len2, out sourceConsumed, out targetConsumed, immediateBreakup);
842 // It holds final result that comes from the comparison
843 // at level 2 or lower. Even if Compare() found the
844 // difference at level 2 or lower, it still has to
845 // continue level 1 comparison. FinalResult is used
846 // when there was no further differences.
848 // It holds the comparison level to do. It starts from
849 // 5, and becomes 1 when we do primary-only comparison.
850 int currentLevel = 5;
857 // Skip heading extenders
858 if (skipHeadingExtenders) {
859 for (; idx1 < end1; idx1++)
860 if (GetExtenderType (s1 [idx1]) == ExtenderType.None)
862 for (; idx2 < end2; idx2++)
863 if (GetExtenderType (s2 [idx2]) == ExtenderType.None)
867 ExtenderType ext1 = ExtenderType.None;
868 ExtenderType ext2 = ExtenderType.None;
870 int quickCheckPos1 = idx1;
871 int quickCheckPos2 = idx2;
872 bool stringSort = (opt & COpt.StringSort) != 0;
873 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
874 Escape escape1 = new Escape ();
875 Escape escape2 = new Escape ();
878 for (; idx1 < end1; idx1++)
879 if (!IsIgnorable (s1 [idx1], opt))
881 for (; idx2 < end2; idx2++)
882 if (!IsIgnorable (s2 [idx2], opt))
886 if (escape1.Source == null)
889 start1 = escape1.Start;
890 idx1 = escape1.Index;
892 quickCheckPos1 = escape1.Optional;
893 escape1.Source = null;
897 if (escape2.Source == null)
900 start2 = escape2.Start;
901 idx2 = escape2.Index;
903 quickCheckPos2 = escape2.Optional;
904 escape2.Source = null;
908 // If comparison is unstable, then this part is one of the most doubtful part.
909 // Here we run quick codepoint comparison and run back to "stable" area to
910 // compare characters.
912 // Strictly to say, even the first character
913 // could be compared here, but it messed
914 // backward step, so I just avoided mess.
915 if (quickCheckPos1 < idx1 && quickCheckPos2 < idx2) {
916 while (idx1 < end1 && idx2 < end2 &&
917 s1 [idx1] == s2 [idx2]) {
921 if (idx1 == end1 || idx2 == end2)
922 continue; // check replacement
924 int backwardEnd1 = quickCheckPos1;
925 int backwardEnd2 = quickCheckPos2;
926 quickCheckPos1 = idx1;
927 quickCheckPos2 = idx2;
932 for (;idx1 > backwardEnd1; idx1--)
933 if (Category (s1 [idx1]) != 1)
935 for (;idx2 > backwardEnd2; idx2--)
936 if (Category (s2 [idx2]) != 1)
938 for (;idx1 > backwardEnd1; idx1--)
939 if (IsSafe (s1 [idx1]))
941 for (;idx2 > backwardEnd2; idx2--)
942 if (IsSafe (s2 [idx2]))
951 int i1 = FilterOptions (s1 [idx1], opt);
952 int i2 = FilterOptions (s2 [idx2], opt);
953 bool special1 = false;
954 bool special2 = false;
956 // If current character is an expander, then
957 // repeat the previous character.
958 ext1 = GetExtenderType (i1);
959 if (ext1 != ExtenderType.None) {
960 if (ctx.PrevCode < 0) {
961 if (ctx.PrevSortKey == null) {
966 sk1 = ctx.PrevSortKey;
969 i1 = FilterExtender (ctx.PrevCode, ext1, opt);
971 ext2 = GetExtenderType (i2);
972 if (ext2 != ExtenderType.None) {
973 if (prev2.Code < 0) {
974 if (prev2.SortKey == null) {
982 i2 = FilterExtender (prev2.Code, ext2, opt);
985 byte cat1 = Category (i1);
986 byte cat2 = Category (i2);
988 // Handle special weight characters
990 if (!stringSort && currentLevel == 5) {
991 lv5At1 = escape1.Source != null ?
992 escape1.Index - escape1.Start :
994 // here Windows has a bug that it does
995 // not consider thirtiary weight.
996 lv5Value1 = Level1 (i1) << 8 + Uni.Level3 (i1);
1002 if (!stringSort && currentLevel == 5) {
1003 lv5At2 = escape2.Source != null ?
1004 escape2.Index - escape2.Start :
1006 // here Windows has a bug that it does
1007 // not consider thirtiary weight.
1008 lv5Value2 = Level1 (i2) << 8 + Uni.Level3 (i2);
1013 if (cat1 == 6 || cat2 == 6) {
1014 if (currentLevel == 5) {
1015 if (lv5Value1 == lv5Value2) {
1016 // There is not really difference here.
1017 lv5At1 = lv5At2 = -1;
1018 lv5Value1 = lv5Value2 = 0;
1026 Contraction ct1 = null;
1027 if (ext1 == ExtenderType.None)
1028 ct1 = GetContraction (s1, idx1, end1);
1033 else if (ct1 != null) {
1034 offset1 = ct1.Source.Length;
1035 if (ct1.SortKey != null) {
1037 for (int i = 0; i < ct1.SortKey.Length; i++)
1038 sk1 [i] = ct1.SortKey [i];
1040 ctx.PrevSortKey = sk1;
1042 else if (escape1.Source == null) {
1043 escape1.Source = s1;
1044 escape1.Start = start1;
1045 escape1.Index = cur1 + ct1.Source.Length;
1047 escape1.Optional = quickCheckPos1;
1048 s1 = ct1.Replacement;
1059 sk1 [1] = Level1 (i1);
1060 if (!ignoreNonSpace && currentLevel > 1)
1061 sk1 [2] = Level2 (i1, ext1);
1062 if (currentLevel > 2)
1063 sk1 [3] = Uni.Level3 (i1);
1064 if (currentLevel > 3)
1065 special1 = Uni.HasSpecialWeight ((char) i1);
1070 Contraction ct2 = null;
1071 if (ext2 == ExtenderType.None)
1072 ct2 = GetContraction (s2, idx2, end2);
1076 else if (ct2 != null) {
1077 idx2 += ct2.Source.Length;
1078 if (ct2.SortKey != null) {
1080 for (int i = 0; i < ct2.SortKey.Length; i++)
1081 sk2 [i] = ct2.SortKey [i];
1083 prev2.SortKey = sk2;
1085 else if (escape2.Source == null) {
1086 escape2.Source = s2;
1087 escape2.Start = start2;
1088 escape2.Index = cur2 + ct2.Source.Length;
1090 escape2.Optional = quickCheckPos2;
1091 s2 = ct2.Replacement;
1102 sk2 [1] = Level1 (i2);
1103 if (!ignoreNonSpace && currentLevel > 1)
1104 sk2 [2] = Level2 (i2, ext2);
1105 if (currentLevel > 2)
1106 sk2 [3] = Uni.Level3 (i2);
1107 if (currentLevel > 3)
1108 special2 = Uni.HasSpecialWeight ((char) i2);
1114 // Add offset here so that it does not skip
1115 // idx1 while s2 has a replacement.
1118 if (!ignoreNonSpace) {
1119 // add diacritical marks in s1 here
1120 while (idx1 < end1) {
1121 if (Category (s1 [idx1]) != 1)
1125 sk1 [2] = (byte) (sk1 [2] +
1126 Level2 (s1 [idx1], ExtenderType.None));
1130 // add diacritical marks in s2 here
1131 while (idx2 < end2) {
1132 if (Category (s2 [idx2]) != 1)
1136 sk2 [2] = (byte) (sk2 [2] +
1137 Level2 (s2 [idx2], ExtenderType.None));
1142 int ret = sk1 [0] - sk2 [0];
1143 ret = ret != 0 ? ret : sk1 [1] - sk2 [1];
1146 if (currentLevel == 1)
1148 if (!ignoreNonSpace) {
1149 ret = sk1 [2] - sk2 [2];
1152 if (immediateBreakup)
1153 return -1; // different
1154 currentLevel = frenchSort ? 2 : 1;
1158 if (currentLevel == 2)
1160 ret = sk1 [3] - sk2 [3];
1163 if (immediateBreakup)
1164 return -1; // different
1168 if (currentLevel == 3)
1170 if (special1 != special2) {
1171 if (immediateBreakup)
1172 return -1; // different
1173 finalResult = special1 ? 1 : -1;
1178 ret = CompareFlagPair (
1179 !Uni.IsJapaneseSmallLetter ((char) i1),
1180 !Uni.IsJapaneseSmallLetter ((char) i2));
1181 ret = ret != 0 ? ret :
1182 ToDashTypeValue (ext1, opt) -
1183 ToDashTypeValue (ext2, opt);
1184 ret = ret != 0 ? ret : CompareFlagPair (
1185 Uni.IsHiragana ((char) i1),
1186 Uni.IsHiragana ((char) i2));
1187 ret = ret != 0 ? ret : CompareFlagPair (
1188 !IsHalfKana ((char) i1, opt),
1189 !IsHalfKana ((char) i2, opt));
1191 if (immediateBreakup)
1192 return -1; // different
1200 // If there were only level 3 or lower differences,
1201 // then we still have to find diacritical differences
1203 if (!ignoreNonSpace &&
1204 finalResult != 0 && currentLevel > 2) {
1205 while (idx1 < end1 && idx2 < end2) {
1206 if (!Uni.IsIgnorableNonSpacing (s1 [idx1]))
1208 if (!Uni.IsIgnorableNonSpacing (s2 [idx2]))
1210 finalResult = Level2 (FilterOptions (s1 [idx1], opt), ext1) - Level2 (FilterOptions (s2 [idx2], opt), ext2);
1211 if (finalResult != 0)
1215 // they should work only for the first character
1216 ext1 = ExtenderType.None;
1217 ext2 = ExtenderType.None;
1220 if (currentLevel == 1 && finalResult != 0) {
1221 while (idx1 < end1) {
1222 if (Uni.IsIgnorableNonSpacing (s1 [idx1]))
1227 while (idx2 < end2) {
1228 if (Uni.IsIgnorableNonSpacing (s2 [idx2]))
1234 // we still have to check level 5
1235 if (finalResult == 0) {
1236 if (lv5At1 < 0 && lv5At2 >= 0)
1238 else if (lv5At2 < 0 && lv5At1 >= 0)
1241 finalResult = lv5At1 - lv5At2;
1242 if (finalResult == 0)
1243 finalResult = lv5Value1 - lv5Value2;
1246 if (finalResult == 0) {
1248 targetConsumed = true;
1250 sourceConsumed = true;
1252 return idx1 != end1 ? 1 : idx2 == end2 ? finalResult : -1;
1255 int CompareFlagPair (bool b1, bool b2)
1257 return b1 == b2 ? 0 : b1 ? 1 : -1;
1262 #region IsPrefix() and IsSuffix()
1264 public bool IsPrefix (string src, string target, CompareOptions opt)
1266 return IsPrefix (src, target, 0, src.Length, opt);
1269 public unsafe bool IsPrefix (string s, string target, int start, int length, CompareOptions opt)
1271 if (target.Length == 0)
1273 byte* sk1 = stackalloc byte [4];
1274 byte* sk2 = stackalloc byte [4];
1275 ClearBuffer (sk1, 4);
1276 ClearBuffer (sk2, 4);
1277 Context ctx = new Context (opt, null, null, sk1, sk2, null,
1278 QuickCheckPossible (s, start, start + length, target, 0, target.Length));
1279 return IsPrefix (s, target, start, length, true, ref ctx);
1282 unsafe bool IsPrefix (string s, string target, int start, int length, bool skipHeadingExtenders, ref Context ctx)
1284 bool consumed, dummy;
1285 CompareInternal (s, start, length,
1286 target, 0, target.Length,
1287 out consumed, out dummy, skipHeadingExtenders,
1294 public bool IsSuffix (string src, string target, CompareOptions opt)
1296 return IsSuffix (src, target, src.Length - 1, src.Length, opt);
1299 public unsafe bool IsSuffix (string s, string target, int start, int length, CompareOptions opt)
1301 if (target.Length == 0)
1303 int last = LastIndexOf (s, target, start, length, opt);
1304 return last >= 0 && Compare (s, last, s.Length - last, target, 0, target.Length, opt) == 0;
1306 // quick check : simple codepoint comparison
1307 if (s.Length >= target.Length) {
1309 int se = start - length;
1310 for (int i = target.Length - 1; si >= se && i >= 0; i--, si--)
1311 if (s [si] != target [i])
1313 if (si == start + target.Length)
1317 PreviousInfo prev = new PreviousInfo (false);
1318 byte* sk1 = stackalloc byte [4];
1319 byte* sk2 = stackalloc byte [4];
1320 ClearBuffer (sk1, 4);
1321 ClearBuffer (sk2, 4);
1322 return IsSuffix (opt, s, target, start, length, ref prev, sk1, sk2);
1326 unsafe bool IsSuffix (string s, string t, int start, int length, ref Context ctx)
1329 COpt opt = ctx.Option;
1331 for (;tstart < t.Length; tstart++)
1332 if (!IsIgnorable (t [tstart], opt))
1334 if (tstart == t.Length)
1335 return true; // as if target is String.Empty.
1338 // This is still not working. If it is not likely to get working, then just remove it.
1340 int send = start - length;
1341 int ti = t.Length - 1;
1344 int sStep = start + 1;
1345 int tStep = t.Length;
1346 bool sourceConsumed, targetConsumed;
1348 for (; send < si; si--)
1349 if (!IsIgnorable (s [si]))
1351 for (; tend < ti; ti--)
1352 if (!IsIgnorable (t [ti]))
1356 for (; send < si; si--)
1357 if (IsSafe (s [si]))
1359 for (; tend < ti; ti--)
1360 if (IsSafe (t [ti]))
1362 Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, length, t, ti, tstart, sStep - si, tStep - ti);
1363 if (CompareInternal (opt, s, si, sStep - si,
1365 out targetConsumed, out sourceConsumed,
1377 // FIXME: it is not efficient for very long target.
1378 bool sourceConsumed, targetConsumed;
1379 int mismatchCount = 0;
1380 for (int i = 0; i < length; i++) {
1381 ctx.ClearPrevInfo ();
1383 int ret = CompareInternal (s, start - i, i + 1,
1384 t, tstart, t.Length - tstart,
1386 // FIXME: could immediately breakup
1387 out sourceConsumed, true, true, ref ctx);
1390 if (!sourceConsumed && targetConsumed)
1392 if (!targetConsumed) {
1394 if (mismatchCount > Uni.MaxExpansionLength)
1395 // The largest length of an
1396 // expansion is 3, so if the
1397 // target was not consumed more
1398 // than 3 times, then the tail
1399 // character does not match.
1409 #region IndexOf() / LastIndexOf()
1413 public int IndexOf (string s, string target, CompareOptions opt)
1415 return IndexOf (s, target, 0, s.Length, opt);
1418 int QuickIndexOf (string s, string target, int start, int length, out bool testWasUnable)
1420 int testedSourcePos = -1, testedTargetPos = -1;
1422 testWasUnable = true;
1423 if (target.Length == 0)
1425 else if (target.Length > length)
1427 testWasUnable = false;
1429 int end = start + length - target.Length + 1;
1430 for (int i = start; i < end; i++) {
1432 for (int j = 0; j < target.Length; j++) {
1433 if (testedTargetPos < j) {
1434 if (target [j] >= 0x80) {
1435 testWasUnable = true;
1439 testedTargetPos = j;
1441 if (testedSourcePos < i + j) {
1442 if (s [i + j] >= 0x80) {
1443 testWasUnable = true;
1447 testedSourcePos = i + j;
1449 if (s [i + j] != target [j]) {
1461 public unsafe int IndexOf (string s, string target, int start, int length, CompareOptions opt)
1463 if (opt == CompareOptions.Ordinal)
1464 return IndexOfOrdinal (s, target, start, length);
1465 if (opt == CompareOptions.OrdinalIgnoreCase)
1466 return IndexOfOrdinalIgnoreCase (s, target, start, length);
1467 if (opt == CompareOptions.None) {
1469 int ret = QuickIndexOf (s, target, start, length, out testWasUnable);
1474 byte* alwaysMatchFlags = stackalloc byte [16];
1475 byte* neverMatchFlags = stackalloc byte [16];
1476 byte* targetSortKey = stackalloc byte [4];
1477 byte* sk1 = stackalloc byte [4];
1478 byte* sk2 = stackalloc byte [4];
1479 ClearBuffer (alwaysMatchFlags, 16);
1480 ClearBuffer (neverMatchFlags, 16);
1481 ClearBuffer (targetSortKey, 4);
1482 ClearBuffer (sk1, 4);
1483 ClearBuffer (sk2, 4);
1484 Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1486 return IndexOf (s, target, start, length,
1487 targetSortKey, ref ctx);
1490 // note that it wouldn't be used since ordinal IndexOf()
1491 // should be directed to icall.
1492 int IndexOfOrdinal (string s, string target, int start, int length)
1494 if (target.Length == 0)
1496 else if (target.Length > length)
1499 int end = start + length - target.Length + 1;
1500 for (int i = start; i < end; i++) {
1502 for (int j = 0; j < target.Length; j++) {
1503 if (s [i + j] != target [j]) {
1515 int IndexOfOrdinalIgnoreCase (string s, string target, int start, int length)
1517 if (target.Length == 0)
1519 else if (target.Length > length)
1522 int end = start + length - target.Length + 1;
1523 for (int i = start; i < end; i++) {
1525 for (int j = 0; j < target.Length; j++) {
1526 // I think almost all text has more lower letters than upper ones. Thus with this invariant comparison ToLower() should be faster since it costs less operations.
1527 if (textInfo.ToLower (s [i + j]) != textInfo.ToLower (target [j])) {
1541 public int IndexOf (string s, char target, CompareOptions opt)
1543 return IndexOf (s, target, 0, s.Length, opt);
1546 public unsafe int IndexOf (string s, char target, int start, int length, CompareOptions opt)
1548 if (opt == CompareOptions.Ordinal)
1549 return IndexOfOrdinal (s, target, start, length);
1550 if (opt == CompareOptions.OrdinalIgnoreCase)
1551 return IndexOfOrdinalIgnoreCase (s, target, start, length);
1552 byte* alwaysMatchFlags = stackalloc byte [16];
1553 byte* neverMatchFlags = stackalloc byte [16];
1554 byte* targetSortKey = stackalloc byte [4];
1555 byte* sk1 = stackalloc byte [4];
1556 byte* sk2 = stackalloc byte [4];
1557 ClearBuffer (alwaysMatchFlags, 16);
1558 ClearBuffer (neverMatchFlags, 16);
1559 ClearBuffer (targetSortKey, 4);
1560 ClearBuffer (sk1, 4);
1561 ClearBuffer (sk2, 4);
1562 Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1564 // If target is contraction, then use string search.
1565 Contraction ct = GetContraction (target);
1567 if (ct.Replacement != null)
1568 return IndexOf (s, ct.Replacement,
1569 start, length, targetSortKey, ref ctx);
1571 for (int i = 0; i < ct.SortKey.Length; i++)
1572 sk2 [i] = ct.SortKey [i];
1573 return IndexOfSortKey (s, start, length, sk2, char.MinValue, -1, true, ref ctx);
1576 int ti = FilterOptions ((int) target, opt);
1577 targetSortKey [0] = Category (ti);
1578 targetSortKey [1] = Level1 (ti);
1579 if ((opt & COpt.IgnoreNonSpace) == 0)
1581 Level2 (ti, ExtenderType.None);
1582 targetSortKey [3] = Uni.Level3 (ti);
1583 return IndexOfSortKey (s, start, length,
1584 targetSortKey, target, ti,
1585 !Uni.HasSpecialWeight ((char) ti), ref ctx);
1589 // note that it wouldn't be used since ordinal IndexOf()
1590 // should be directed to icall.
1591 int IndexOfOrdinal (string s, char target, int start, int length)
1593 int end = start + length;
1594 for (int i = start; i < end; i++)
1595 if (s [i] == target)
1600 int IndexOfOrdinalIgnoreCase (string s, char target, int start, int length)
1602 int end = start + length;
1603 target = textInfo.ToLower (target);
1604 for (int i = start; i < end; i++)
1605 if (textInfo.ToLower (s [i]) == target)
1610 // Searches target byte[] keydata
1611 unsafe int IndexOfSortKey (string s, int start, int length, byte* sortkey, char target, int ti, bool noLv4, ref Context ctx)
1613 int end = start + length;
1618 if (MatchesForward (s, ref idx, end, ti, sortkey, noLv4, ref ctx))
1624 // Searches string. Search head character (or keydata when
1625 // the head is contraction sortkey) and try IsPrefix().
1626 unsafe int IndexOf (string s, string target, int start, int length, byte* targetSortKey, ref Context ctx)
1628 COpt opt = ctx.Option;
1630 for (; tidx < target.Length; tidx++)
1631 if (!IsIgnorable (target [tidx], opt))
1633 if (tidx == target.Length)
1635 Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1636 string replace = ct != null ? ct.Replacement : null;
1637 byte* sk = replace == null ? targetSortKey : null;
1639 char tc = char.MinValue;
1641 if (ct != null && sk != null) {
1642 for (int i = 0; i < ct.SortKey.Length; i++)
1643 sk [i] = ct.SortKey [i];
1644 } else if (sk != null) {
1646 ti = FilterOptions (target [tidx], opt);
1647 sk [0] = Category (ti);
1648 sk [1] = Level1 (ti);
1649 if ((opt & COpt.IgnoreNonSpace) == 0)
1650 sk [2] = Level2 (ti, ExtenderType.None);
1651 sk [3] = Uni.Level3 (ti);
1652 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1655 for (tidx++; tidx < target.Length; tidx++) {
1656 if (Category (target [tidx]) != 1)
1660 sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1666 if (replace != null)
1667 idx = IndexOf (s, replace, start, length, targetSortKey, ref ctx);
1669 idx = IndexOfSortKey (s, start, length, sk, tc, ti, noLv4, ref ctx);
1672 length -= idx - start;
1674 if (IsPrefix (s, target, start, length, false, ref ctx))
1676 Contraction cts = GetContraction (s, start, length);
1678 start += cts.Source.Length;
1679 length -= cts.Source.Length;
1684 } while (length > 0);
1689 // There are the same number of LastIndexOf() methods as
1690 // IndexOf() related methods, with the same functionalities
1696 public int LastIndexOf (string s, string target, CompareOptions opt)
1698 return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1701 public unsafe int LastIndexOf (string s, string target, int start, int length, CompareOptions opt)
1703 if (opt == CompareOptions.Ordinal)
1704 return LastIndexOfOrdinal (s, target, start, length);
1705 if (opt == CompareOptions.OrdinalIgnoreCase)
1706 return LastIndexOfOrdinalIgnoreCase (s, target, start, length);
1707 byte* alwaysMatchFlags = stackalloc byte [16];
1708 byte* neverMatchFlags = stackalloc byte [16];
1709 byte* targetSortKey = stackalloc byte [4];
1710 byte* sk1 = stackalloc byte [4];
1711 byte* sk2 = stackalloc byte [4];
1712 ClearBuffer (alwaysMatchFlags, 16);
1713 ClearBuffer (neverMatchFlags, 16);
1714 ClearBuffer (targetSortKey, 4);
1715 ClearBuffer (sk1, 4);
1716 ClearBuffer (sk2, 4);
1717 // For some unknown reason CompareQuick() does not work fine w/ LastIndexOf().
1718 Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1719 return LastIndexOf (s, target, start, length,
1720 targetSortKey, ref ctx);
1723 int LastIndexOfOrdinal (string s, string target, int start, int length)
1725 if (target.Length == 0)
1727 if (s.Length < target.Length || target.Length > length)
1729 int end = start - length + target.Length -1;
1730 char tail = target [target.Length - 1];
1731 for (int i = start; i > end;) {
1732 if (s [i] != tail) {
1736 int x = i - target.Length + 1;
1738 bool mismatch = false;
1739 for (int j = target.Length - 2; j >= 0; j--)
1740 if (s [x + j] != target [j]) {
1751 int LastIndexOfOrdinalIgnoreCase (string s, string target, int start, int length)
1753 if (target.Length == 0)
1755 if (s.Length < length || target.Length > length)
1757 int end = start - length + target.Length - 1;
1758 char tail = textInfo.ToLower (target [target.Length - 1]);
1759 for (int i = start; i > end;) {
1760 if (textInfo.ToLower (s [i]) != tail) {
1764 int x = i - target.Length + 1;
1766 bool mismatch = false;
1767 for (int j = target.Length - 2; j >= 0; j--)
1768 if (textInfo.ToLower (s [x + j]) != textInfo.ToLower (target [j])) {
1781 public int LastIndexOf (string s, char target, CompareOptions opt)
1783 return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1786 public unsafe int LastIndexOf (string s, char target, int start, int length, CompareOptions opt)
1788 if (opt == CompareOptions.Ordinal)
1789 return LastIndexOfOrdinal (s, target, start, length);
1790 if (opt == CompareOptions.OrdinalIgnoreCase)
1791 return LastIndexOfOrdinalIgnoreCase (s, target, start, length);
1792 byte* alwaysMatchFlags = stackalloc byte [16];
1793 byte* neverMatchFlags = stackalloc byte [16];
1794 byte* targetSortKey = stackalloc byte [4];
1795 byte* sk1 = stackalloc byte [4];
1796 byte* sk2 = stackalloc byte [4];
1797 ClearBuffer (alwaysMatchFlags, 16);
1798 ClearBuffer (neverMatchFlags, 16);
1799 ClearBuffer (targetSortKey, 4);
1800 ClearBuffer (sk1, 4);
1801 ClearBuffer (sk2, 4);
1802 Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1804 // If target is a replacement contraction, then use
1806 Contraction ct = GetContraction (target);
1808 if (ct.Replacement != null)
1809 return LastIndexOf (s,
1810 ct.Replacement, start, length,
1811 targetSortKey, ref ctx);
1813 for (int bi = 0; bi < ct.SortKey.Length; bi++)
1814 sk2 [bi] = ct.SortKey [bi];
1815 return LastIndexOfSortKey (s, start,
1821 int ti = FilterOptions ((int) target, opt);
1822 targetSortKey [0] = Category (ti);
1823 targetSortKey [1] = Level1 (ti);
1824 if ((opt & COpt.IgnoreNonSpace) == 0)
1825 targetSortKey [2] = Level2 (ti, ExtenderType.None);
1826 targetSortKey [3] = Uni.Level3 (ti);
1827 return LastIndexOfSortKey (s, start, start,
1828 length, targetSortKey,
1829 ti, !Uni.HasSpecialWeight ((char) ti),
1834 int LastIndexOfOrdinal (string s, char target, int start, int length)
1838 int end = start - length;
1839 for (int i = start; i > end; i--)
1840 if (s [i] == target)
1845 int LastIndexOfOrdinalIgnoreCase (string s, char target, int start, int length)
1849 int end = start - length;
1850 char c = textInfo.ToUpper (target);
1851 for (int i = start; i > end; i--)
1852 if (textInfo.ToUpper (s [i]) == c)
1857 // Searches target byte[] keydata
1858 unsafe int LastIndexOfSortKey (string s, int start, int orgStart, int length, byte* sortkey, int ti, bool noLv4, ref Context ctx)
1860 int end = start - length;
1864 if (MatchesBackward (s, ref idx, end, orgStart,
1865 ti, sortkey, noLv4, ref ctx))
1871 // Searches string. Search head character (or keydata when
1872 // the head is contraction sortkey) and try IsPrefix().
1873 unsafe int LastIndexOf (string s, string target, int start, int length, byte* targetSortKey, ref Context ctx)
1875 COpt opt = ctx.Option;
1876 int orgStart = start;
1878 for (; tidx < target.Length; tidx++)
1879 if (!IsIgnorable (target [tidx], opt))
1881 if (tidx == target.Length)
1883 Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1884 string replace = ct != null ? ct.Replacement : null;
1885 byte* sk = replace == null ? targetSortKey : null;
1889 if (ct != null && sk != null) {
1890 for (int i = 0; i < ct.SortKey.Length; i++)
1891 sk [i] = ct.SortKey [i];
1892 } else if (sk != null) {
1893 ti = FilterOptions (target [tidx], opt);
1894 sk [0] = Category (ti);
1895 sk [1] = Level1 (ti);
1896 if ((opt & COpt.IgnoreNonSpace) == 0)
1897 sk [2] = Level2 (ti, ExtenderType.None);
1898 sk [3] = Uni.Level3 (ti);
1899 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1902 for (tidx++; tidx < target.Length; tidx++) {
1903 if (Category (target [tidx]) != 1)
1907 sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1914 if (replace != null)
1915 idx = LastIndexOf (s, replace,
1917 targetSortKey, ref ctx);
1919 idx = LastIndexOfSortKey (s, start, orgStart, length, sk, ti, noLv4, ref ctx);
1922 length -= start - idx;
1925 if (IsPrefix (s, target, idx, orgStart - idx + 1, false, ref ctx)) {
1926 for (;idx < orgStart; idx++)
1927 if (!IsIgnorable (s [idx], opt))
1931 Contraction cts = GetContraction (s, idx, orgStart - idx + 1);
1933 start -= cts.Source.Length;
1934 length -= cts.Source.Length;
1939 } while (length > 0);
1943 unsafe bool MatchesForward (string s, ref int idx, int end, int ti, byte* sortkey, bool noLv4, ref Context ctx)
1946 if (ctx.AlwaysMatchFlags != null && si < 128 && (ctx.AlwaysMatchFlags [si / 8] & (1 << (si % 8))) != 0)
1948 if (ctx.NeverMatchFlags != null &&
1950 (ctx.NeverMatchFlags [si / 8] & (1 << (si % 8))) != 0) {
1954 ExtenderType ext = GetExtenderType (s [idx]);
1955 Contraction ct = null;
1956 if (MatchesForwardCore (s, ref idx, end, ti, sortkey, noLv4, ext, ref ct, ref ctx)) {
1957 if (ctx.AlwaysMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
1958 ctx.AlwaysMatchFlags [si / 8] |= (byte) (1 << (si % 8));
1961 if (ctx.NeverMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
1962 ctx.NeverMatchFlags [si / 8] |= (byte) (1 << (si % 8));
1966 unsafe bool MatchesForwardCore (string s, ref int idx, int end, int ti, byte* sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, ref Context ctx)
1968 COpt opt = ctx.Option;
1969 byte* charSortKey = ctx.Buffer1;
1970 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
1972 if (ext == ExtenderType.None)
1973 ct = GetContraction (s, idx, end);
1974 else if (ctx.PrevCode < 0) {
1975 if (ctx.PrevSortKey == null) {
1979 charSortKey = ctx.PrevSortKey;
1982 si = FilterExtender (ctx.PrevCode, ext, opt);
1983 // if lv4 exists, it never matches contraction
1985 idx += ct.Source.Length;
1988 if (ct.SortKey != null) {
1989 for (int i = 0; i < 4; i++)
1990 charSortKey [i] = sortkey [i];
1992 ctx.PrevSortKey = charSortKey;
1994 // Here is the core of LAMESPEC
1995 // described at the top of the source.
1997 return MatchesForward (ct.Replacement, ref dummy,
1998 ct.Replacement.Length, ti, sortkey, noLv4, ref ctx);
2002 si = FilterOptions (s [idx], opt);
2004 charSortKey [0] = Category (si);
2005 bool noMatch = false;
2006 if (sortkey [0] == charSortKey [0])
2007 charSortKey [1] = Level1 (si);
2010 if (!ignoreNonSpace && sortkey [1] == charSortKey [1])
2011 charSortKey [2] = Level2 (si, ext);
2012 else if (!ignoreNonSpace)
2015 for (; idx < end; idx++) {
2016 if (Category (s [idx]) != 1)
2021 charSortKey [3] = Uni.Level3 (si);
2022 if (charSortKey [0] != 1)
2025 for (; idx < end; idx++) {
2026 if (Category (s [idx]) != 1)
2030 if (charSortKey [2] == 0)
2031 charSortKey [2] = 2;
2032 charSortKey [2] = (byte) (charSortKey [2]
2033 + Level2 (s [idx], ExtenderType.None));
2036 return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4);
2039 unsafe bool MatchesPrimitive (COpt opt, byte* source, int si, ExtenderType ext, byte* target, int ti, bool noLv4)
2041 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
2042 if (source [0] != target [0] ||
2043 source [1] != target [1] ||
2044 (!ignoreNonSpace && source [2] != target [2]) ||
2045 source [3] != target [3])
2047 if (noLv4 && (si < 0 || !Uni.HasSpecialWeight ((char) si)))
2051 // Since target can never be an extender, if the source
2052 // is an expander and it matters, then they never match.
2053 if (!ignoreNonSpace && ext == ExtenderType.Conditional)
2055 if (Uni.IsJapaneseSmallLetter ((char) si) !=
2056 Uni.IsJapaneseSmallLetter ((char) ti) ||
2057 ToDashTypeValue (ext, opt) !=
2058 // FIXME: we will have to specify correct value for target
2059 ToDashTypeValue (ExtenderType.None, opt) ||
2060 !Uni.IsHiragana ((char) si) !=
2061 !Uni.IsHiragana ((char) ti) ||
2062 IsHalfKana ((char) si, opt) !=
2063 IsHalfKana ((char) ti, opt))
2068 unsafe bool MatchesBackward (string s, ref int idx, int end, int orgStart, int ti, byte* sortkey, bool noLv4, ref Context ctx)
2071 if (ctx.AlwaysMatchFlags != null && si < 128 && (ctx.AlwaysMatchFlags [si / 8] & (1 << (si % 8))) != 0)
2073 if (ctx.NeverMatchFlags != null && si < 128 && (ctx.NeverMatchFlags [si / 8] & (1 << (si % 8))) != 0) {
2077 ExtenderType ext = GetExtenderType (s [idx]);
2078 Contraction ct = null;
2079 if (MatchesBackwardCore (s, ref idx, end, orgStart, ti, sortkey, noLv4, ext, ref ct, ref ctx)) {
2080 if (ctx.AlwaysMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
2081 ctx.AlwaysMatchFlags [si / 8] |= (byte) (1 << (si % 8));
2084 if (ctx.NeverMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128) {
2085 ctx.NeverMatchFlags [si / 8] |= (byte) (1 << (si % 8));
2090 unsafe bool MatchesBackwardCore (string s, ref int idx, int end, int orgStart, int ti, byte* sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, ref Context ctx)
2092 COpt opt = ctx.Option;
2093 byte* charSortKey = ctx.Buffer1;
2094 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
2097 // To handle extenders in source, we need to
2098 // check next _primary_ character.
2099 if (ext != ExtenderType.None) {
2100 byte diacritical = 0;
2101 for (int tmp = idx; ; tmp--) {
2102 if (tmp < 0) // heading extender
2104 if (IsIgnorable (s [tmp], opt))
2106 int tmpi = FilterOptions (s [tmp], opt);
2107 byte category = Category (tmpi);
2108 if (category == 1) {
2109 diacritical = Level2 (tmpi, ExtenderType.None);
2112 si = FilterExtender (tmpi, ext, opt);
2113 charSortKey [0] = category;
2114 charSortKey [1] = Level1 (si);
2115 if (!ignoreNonSpace)
2116 charSortKey [2] = Level2 (si, ext);
2117 charSortKey [3] = Uni.Level3 (si);
2118 if (ext != ExtenderType.Conditional &&
2121 (charSortKey [2] == 0) ?
2122 (byte) (diacritical + 2) :
2128 if (ext == ExtenderType.None)
2129 ct = GetTailContraction (s, idx, end);
2130 // if lv4 exists, it never matches contraction
2132 idx -= ct.Source.Length;
2135 if (ct.SortKey != null) {
2136 for (int i = 0; i < 4; i++)
2137 charSortKey [i] = sortkey [i];
2139 ctx.PrevSortKey = charSortKey;
2141 // Here is the core of LAMESPEC
2142 // described at the top of the source.
2143 int dummy = ct.Replacement.Length - 1;
2144 return 0 <= LastIndexOfSortKey (
2145 ct.Replacement, dummy, dummy,
2146 ct.Replacement.Length, sortkey,
2147 ti, noLv4, ref ctx);
2149 } else if (ext == ExtenderType.None) {
2151 si = FilterOptions (s [idx], opt);
2153 bool noMatch = false;
2154 charSortKey [0] = Category (si);
2155 if (charSortKey [0] == sortkey [0])
2156 charSortKey [1] = Level1 (si);
2159 if (!ignoreNonSpace && charSortKey [1] == sortkey [1])
2160 charSortKey [2] = Level2 (si, ext);
2161 else if (!ignoreNonSpace)
2165 charSortKey [3] = Uni.Level3 (si);
2166 if (charSortKey [0] != 1)
2169 if (ext == ExtenderType.None) {
2170 for (int tmp = cur + 1; tmp < orgStart; tmp++) {
2171 if (Category (s [tmp]) != 1)
2175 if (charSortKey [2] == 0)
2176 charSortKey [2] = 2;
2177 charSortKey [2] = (byte) (charSortKey [2]
2178 + Level2 (s [tmp], ExtenderType.None));
2181 return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4);