2 // SimpleCollator.cs : the core collator implementation
5 // Atsushi Enomoto <atsushi@ximian.com>
7 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 // Here's a summary for supporting contractions, expansions and diacritical
33 // Diacritical mapping is a simple tailoring rule that "remaps" diacritical
34 // weight value from one to another. For now it applies to all range of
35 // characters, but at some stage we might need to limit the remapping ranges.
37 // A Contraction consists of a string (actually char[]) and a sortkey entry
38 // (i.e. byte[]). It indicates that a sequence of characters is interpreted
39 // as a single character that has the mapped sortkey value. There is no
40 // character which goes across "special rules". When the collator encountered
41 // such a sequence of characters, it just returns the sortkey value without
42 // further replacement.
44 // Since it is still likely to happen that a contraction sequence matches
45 // other character than the identical sequence (depending on CompareOptions
46 // and of course, defined sortkey value itself), comparison cannot be done
47 // at source char[] level.
49 // In IndexOf(), the first character in the target string or the target char
50 // itself is turned into sortkey bytes. If the character has a contraction and
51 // that is sortkey map, then it is used instead. If the contraction exists and
52 // that is replacement map, then the first character of the replacement string
53 // is searched instead. IndexOf() always searches only for the top character,
54 // and if it returned negative value, then it returns -1. Otherwise, it then
55 // tries IsPrefix() from that location. If it returns true, then it returns
58 // LAMESPEC: IndexOf() is lame as a whole API. It never matches in the middle
59 // of expansion and there is no proper way to return such indexes within
60 // a single int return value.
62 // For example, try below in .NET:
63 // IndexOf("\u00E6", "a")
64 // IndexOf("\u00E6", "e")
69 using System.Collections;
70 using System.Globalization;
72 using Uni = Mono.Globalization.Unicode.MSCompatUnicodeTable;
73 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
74 using COpt = System.Globalization.CompareOptions;
76 namespace Mono.Globalization.Unicode
78 internal class SimpleCollator
80 // this environment variable is for debugging quick check.
81 #pragma warning disable 169, 414
82 static bool QuickCheckDisabled =
83 Environment.internalGetEnvironmentVariable (
84 "MONO_COLLATION_QUICK_CHECK_DISABLED") == "yes";
85 #pragma warning restore 169, 414
87 unsafe internal struct Context
89 public Context (CompareOptions opt, byte* alwaysMatchFlags, byte* neverMatchFlags, byte* buffer1, byte* buffer2, byte* prev1, bool quickCheckPossible)
92 AlwaysMatchFlags = alwaysMatchFlags;
93 NeverMatchFlags = neverMatchFlags;
98 QuickCheckPossible = quickCheckPossible;
101 public readonly CompareOptions Option;
102 public readonly byte* NeverMatchFlags;
103 public readonly byte* AlwaysMatchFlags;
104 public byte* Buffer1;
105 public byte* Buffer2;
107 public byte* PrevSortKey;
108 public readonly bool QuickCheckPossible;
110 public void ClearPrevInfo ()
117 unsafe struct PreviousInfo
120 public byte* SortKey;
122 public PreviousInfo (bool dummy)
131 public string Source;
138 static SimpleCollator invariant =
139 new SimpleCollator (CultureInfo.InvariantCulture);
141 readonly TextInfo textInfo; // for ToLower().
142 readonly CodePointIndexer cjkIndexer;
143 readonly Contraction [] contractions;
144 readonly Level2Map [] level2Maps;
145 // This flag marks characters as "unsafe", where the character
146 // could be used as part of a contraction (whose length > 1).
147 readonly byte [] unsafeFlags;
149 unsafe readonly byte* cjkCatTable;
150 unsafe readonly byte* cjkLv1Table;
151 unsafe readonly byte* cjkLv2Table;
152 readonly CodePointIndexer cjkLv2Indexer;
154 readonly bool frenchSort;
157 const int UnsafeFlagLength = 0x300 / 8;
159 // readonly byte [] contractionFlags = new byte [16];
161 // This array is internally used inside IndexOf() to store
162 // "no need to check" ASCII characters.
164 // Now that it should be thread safe, this array is allocated
166 // byte [] neverMatchFlags = new byte [128 / 8];
168 #region .ctor() and split functions
170 public SimpleCollator (CultureInfo culture)
173 textInfo = culture.TextInfo;
176 SetCJKTable (culture, ref cjkIndexer,
177 ref cjkCatTable, ref cjkLv1Table,
178 ref cjkLv2Indexer, ref cjkLv2Table);
181 // Get tailoring info
182 TailoringInfo t = null;
183 for (CultureInfo ci = culture; ci.LCID != 127; ci = ci.Parent) {
184 t = Uni.GetTailoringInfo (ci.LCID);
188 if (t == null) // then use invariant
189 t = Uni.GetTailoringInfo (127);
191 frenchSort = t.FrenchSort;
192 Uni.BuildTailoringTables (culture, t, ref contractions,
194 unsafeFlags = new byte [UnsafeFlagLength];
195 foreach (Contraction c in contractions) {
196 if (c.Source.Length > 1)
197 foreach (char ch in c.Source)
198 unsafeFlags [(int) ch / 8 ]
199 |= (byte) (1 << ((int) ch & 7));
200 // for (int i = 0; i < c.Source.Length; i++) {
201 // int ch = c.Source [i];
204 // contractionFlags [ch / 8] |= (byte) (1 << (ch & 7));
208 foreach (Contraction c in invariant.contractions) {
209 if (c.Source.Length > 1)
210 foreach (char ch in c.Source)
211 unsafeFlags [(int) ch / 8 ]
212 |= (byte) (1 << ((int) ch & 7));
213 // for (int i = 0; i < c.Source.Length; i++) {
214 // int ch = c.Source [i];
217 // contractionFlags [ch / 8] |= (byte) (1 << (ch & 7));
221 // FIXME: Since tailorings are mostly for latin
222 // (and in some cases Cyrillic) characters, it would
223 // be much better for performance to store "start
224 // indexes" for > 370 (culture-specific letters).
227 // dump tailoring table
228 Console.WriteLine ("******** building table for {0} : contractions - {1} diacritical - {2}",
229 culture.LCID, contractions.Length, level2Maps.Length);
230 foreach (Contraction c in contractions) {
231 foreach (char cc in c.Source)
232 Console.Write ("{0:X4} ", (int) cc);
233 Console.WriteLine (" -> '{0}'", c.Replacement);
238 unsafe private void SetCJKTable (
239 CultureInfo culture, ref CodePointIndexer cjkIndexer,
240 ref byte* catTable, ref byte* lv1Table,
241 ref CodePointIndexer lv2Indexer, ref byte* lv2Table)
243 string name = GetNeutralCulture (culture).Name;
245 Uni.FillCJK (name, ref cjkIndexer, ref catTable,
246 ref lv1Table, ref lv2Indexer, ref lv2Table);
249 static CultureInfo GetNeutralCulture (CultureInfo info)
251 CultureInfo ret = info;
252 while (ret.Parent != null && ret.Parent.LCID != 127)
259 unsafe byte Category (int cp)
261 if (cp < 0x3000 || cjkCatTable == null)
262 return Uni.Category (cp);
263 int idx = cjkIndexer.ToIndex (cp);
264 return idx < 0 ? Uni.Category (cp) : cjkCatTable [idx];
267 unsafe byte Level1 (int cp)
269 if (cp < 0x3000 || cjkLv1Table == null)
270 return Uni.Level1 (cp);
271 int idx = cjkIndexer.ToIndex (cp);
272 return idx < 0 ? Uni.Level1 (cp) : cjkLv1Table [idx];
275 unsafe byte Level2 (int cp, ExtenderType ext)
277 if (ext == ExtenderType.Buggy)
279 else if (ext == ExtenderType.Conditional)
282 if (cp < 0x3000 || cjkLv2Table == null)
283 return Uni.Level2 (cp);
284 int idx = cjkLv2Indexer.ToIndex (cp);
285 byte ret = idx < 0 ? (byte) 0 : cjkLv2Table [idx];
288 ret = Uni.Level2 (cp);
289 if (level2Maps.Length == 0)
291 for (int i = 0; i < level2Maps.Length; i++) {
292 if (level2Maps [i].Source == ret)
293 return level2Maps [i].Replace;
294 else if (level2Maps [i].Source > ret)
300 static bool IsHalfKana (int cp, COpt opt)
302 return (opt & COpt.IgnoreWidth) != 0 ||
303 Uni.IsHalfWidthKana ((char) cp);
306 Contraction GetContraction (string s, int start, int end)
308 // int si = s [start];
309 // if (si < 128 && (contractionFlags [si / 8] & (1 << (si & 7))) == 0)
311 Contraction c = GetContraction (s, start, end, contractions);
312 if (c != null || lcid == 127)
314 return GetContraction (s, start, end, invariant.contractions);
317 Contraction GetContraction (string s, int start, int end, Contraction [] clist)
319 for (int i = 0; i < clist.Length; i++) {
320 Contraction ct = clist [i];
321 int diff = ct.Source [0] - s [start];
323 return null; // it's already sorted
326 char [] chars = ct.Source;
327 if (end - start < chars.Length)
330 for (int n = 0; n < chars.Length; n++)
331 if (s [start + n] != chars [n]) {
341 Contraction GetTailContraction (string s, int start, int end)
343 // int si = s [end - 1];
344 // if (si < 128 && (contractionFlags [si / 8] & (1 << (si & 7))) == 0)
346 Contraction c = GetTailContraction (s, start, end, contractions);
347 if (c != null || lcid == 127)
349 return GetTailContraction (s, start, end, invariant.contractions);
352 Contraction GetTailContraction (string s, int start, int end, Contraction [] clist)
354 if (start == end || end < -1 || start >= s.Length || s.Length <= end + 1)
355 throw new SystemException (String.Format ("MONO internal error. Failed to get TailContraction. start = {0} end = {1} string = '{2}'", start, end, s));
356 for (int i = 0; i < clist.Length; i++) {
357 Contraction ct = clist [i];
358 char [] chars = ct.Source;
359 if (chars.Length > start - end)
361 if (chars [chars.Length - 1] != s [start])
365 for (int n = 0, spos = start - chars.Length + 1;
368 if (s [spos] != chars [n]) {
379 Contraction GetContraction (char c)
381 Contraction ct = GetContraction (c, contractions);
382 if (ct != null || lcid == 127)
384 return GetContraction (c, invariant.contractions);
387 Contraction GetContraction (char c, Contraction [] clist)
389 for (int i = 0; i < clist.Length; i++) {
390 Contraction ct = clist [i];
391 if (ct.Source [0] > c)
392 return null; // it's already sorted
393 if (ct.Source [0] == c && ct.Source.Length == 1)
399 int FilterOptions (int i, COpt opt)
401 if ((opt & COpt.IgnoreWidth) != 0) {
402 int x = Uni.ToWidthCompat (i);
406 if ((opt & COpt.OrdinalIgnoreCase) != 0)
407 i = textInfo.ToLower ((char) i);
408 if ((opt & COpt.IgnoreCase) != 0)
409 i = textInfo.ToLower ((char) i);
410 if ((opt & COpt.IgnoreKanaType) != 0)
411 i = Uni.ToKanaTypeInsensitive (i);
423 ExtenderType GetExtenderType (int i)
425 // LAMESPEC: Windows expects true for U+3005, but
426 // sometimes it does not represent to repeat just
428 // Windows also expects true for U+3031 and U+3032,
429 // but they should *never* repeat one character.
431 // U+2015 becomes an extender only when it is Japanese
433 return lcid == 16 ? ExtenderType.Conditional :
436 if (i < 0x3005 || i > 0xFF70)
437 return ExtenderType.None;
442 return ExtenderType.Simple;
444 return ExtenderType.Conditional;
447 return ExtenderType.Voiced;
451 return ExtenderType.None;
453 case 0x3005: // LAMESPEC: see above.
454 return ExtenderType.Buggy;
455 case 0x3031: // LAMESPEC: see above.
456 case 0x3032: // LAMESPEC: see above.
459 return ExtenderType.Simple;
462 return ExtenderType.Voiced;
464 return ExtenderType.Conditional;
466 return ExtenderType.None;
470 static byte ToDashTypeValue (ExtenderType ext, COpt opt)
472 if ((opt & COpt.IgnoreNonSpace) != 0) // LAMESPEC: huh, why?
475 case ExtenderType.None:
477 case ExtenderType.Conditional:
484 int FilterExtender (int i, ExtenderType ext, COpt opt)
486 if (ext == ExtenderType.Conditional &&
487 Uni.HasSpecialWeight ((char) i)) {
488 bool half = IsHalfKana ((char) i, opt);
489 bool katakana = !Uni.IsHiragana ((char) i);
490 switch (Level1 (i) & 7) {
492 return half ? 0xFF71 : katakana ? 0x30A2 : 0x3042;
494 return half ? 0xFF72 : katakana ? 0x30A4 : 0x3044;
496 return half ? 0xFF73 : katakana ? 0x30A6 : 0x3046;
498 return half ? 0xFF74 : katakana ? 0x30A8 : 0x3048;
500 return half ? 0xFF75 : katakana ? 0x30AA : 0x304A;
506 static bool IsIgnorable (int i, COpt opt)
508 return Uni.IsIgnorable (i, (byte) (1 +
509 ((opt & COpt.IgnoreSymbols) != 0 ? 2 : 0) +
510 ((opt & COpt.IgnoreNonSpace) != 0 ? 4 : 0)));
516 return i / 8 >= unsafeFlags.Length ? true : (unsafeFlags [i / 8] & (1 << (i % 8))) == 0;
521 public SortKey GetSortKey (string s)
523 return GetSortKey (s, CompareOptions.None);
526 public SortKey GetSortKey (string s, CompareOptions options)
528 return GetSortKey (s, 0, s.Length, options);
531 public SortKey GetSortKey (string s, int start, int length, CompareOptions options)
533 SortKeyBuffer buf = new SortKeyBuffer (lcid);
534 buf.Initialize (options, lcid, s, frenchSort);
535 int end = start + length;
536 GetSortKey (s, start, end, buf, options);
537 return buf.GetResultAndReset ();
540 unsafe void GetSortKey (string s, int start, int end,
541 SortKeyBuffer buf, CompareOptions opt)
543 byte* prevbuf = stackalloc byte [4];
544 ClearBuffer (prevbuf, 4);
545 Context ctx = new Context (opt, null, null, null, null, prevbuf, false);
547 for (int n = start; n < end; n++) {
550 ExtenderType ext = GetExtenderType (i);
551 if (ext != ExtenderType.None) {
552 i = FilterExtender (ctx.PrevCode, ext, opt);
554 FillSortKeyRaw (i, ext, buf, opt);
555 else if (ctx.PrevSortKey != null) {
556 byte* b = ctx.PrevSortKey;
560 b [2] != 1 ? b [2] : Level2 (i, ext),
561 b [3] != 1 ? b [3] : Uni.Level3 (i));
563 // otherwise do nothing.
564 // (if the extender is the first char
565 // in the string, then just ignore.)
569 if (IsIgnorable (i, opt))
571 i = FilterOptions (i, opt);
573 Contraction ct = GetContraction (s, n, end);
575 if (ct.Replacement != null) {
576 GetSortKey (ct.Replacement, 0, ct.Replacement.Length, buf, opt);
578 byte* b = ctx.PrevSortKey;
579 for (int bi = 0; bi < ct.SortKey.Length; bi++)
580 b [bi] = ct.SortKey [bi];
584 b [2] != 1 ? b [2] : Level2 (i, ext),
585 b [3] != 1 ? b [3] : Uni.Level3 (i));
588 n += ct.Source.Length - 1;
591 if (!Uni.IsIgnorableNonSpacing (i))
593 FillSortKeyRaw (i, ExtenderType.None, buf, opt);
598 void FillSortKeyRaw (int i, ExtenderType ext,
599 SortKeyBuffer buf, CompareOptions opt)
601 if (0x3400 <= i && i <= 0x4DB5) {
602 int diff = i - 0x3400;
603 buf.AppendCJKExtension (
604 (byte) (0x10 + diff / 254),
605 (byte) (diff % 254 + 2));
609 UnicodeCategory uc = char.GetUnicodeCategory ((char) i);
611 case UnicodeCategory.PrivateUse:
612 int diff = i - 0xE000;
614 (byte) (0xE5 + diff / 254),
615 (byte) (diff % 254 + 2),
619 case UnicodeCategory.Surrogate:
620 FillSurrogateSortKeyRaw (i, buf);
624 byte level2 = Level2 (i, ext);
625 if (Uni.HasSpecialWeight ((char) i)) {
626 byte level1 = Level1 (i);
632 Uni.IsJapaneseSmallLetter ((char) i),
633 ToDashTypeValue (ext, opt),
634 !Uni.IsHiragana ((char) i),
635 IsHalfKana ((char) i, opt)
637 if ((opt & COpt.IgnoreNonSpace) == 0 && ext == ExtenderType.Voiced)
638 // Append voice weight
639 buf.AppendNormal (1, 1, 1, 0);
649 void FillSurrogateSortKeyRaw (int i, SortKeyBuffer buf)
658 lower = (byte) ((i == 0xD800) ? 0x3E : 0x3F);
659 } else if (0xD840 <= i && i < 0xD880) {
663 } else if (0xDB80 <= i && i < 0xDC00) {
664 diffbase = 0xDB80 - 0x40;
668 diffbase = 0xDC00 - 0xF8 + 2;
672 int diff = i - diffbase;
675 (byte) (segment + diff / 254),
676 (byte) (diff % 254 + 2),
685 public int Compare (string s1, string s2)
687 return Compare (s1, s2, CompareOptions.None);
690 public int Compare (string s1, string s2, CompareOptions options)
692 return Compare (s1, 0, s1.Length, s2, 0, s2.Length, options);
695 private int CompareOrdinal (string s1, int idx1, int len1,
696 string s2, int idx2, int len2)
698 int min = len1 < len2 ? len1 : len2;
699 int end1 = idx1 + min;
700 int end2 = idx2 + min;
701 if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
702 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
703 for (int i1 = idx1, i2 = idx2;
704 i1 < end1 && i2 < end2; i1++, i2++)
705 if (s1 [i1] != s2 [i2])
706 return s1 [i1] - s2 [i2];
707 return len1 == len2 ? 0 :
708 len1 == min ? - 1 : 1;
711 // mostly equivalent to CompareOrdinal, but the return value is
712 // not based on codepoints.
713 private int CompareQuick (string s1, int idx1, int len1,
714 string s2, int idx2, int len2, out bool sourceConsumed,
715 out bool targetConsumed, bool immediateBreakup)
717 sourceConsumed = false;
718 targetConsumed = false;
719 int min = len1 < len2 ? len1 : len2;
720 int end1 = idx1 + min;
721 int end2 = idx2 + min;
722 if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
723 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
724 for (int i1 = idx1, i2 = idx2;
725 i1 < end1 && i2 < end2; i1++, i2++)
726 if (s1 [i1] != s2 [i2]) {
727 if (immediateBreakup)
729 int ret = Category (s1 [i1]) - Category (s2 [i2]);
731 ret = Level1 (s1 [i1]) - Level1 (s2 [i2]);
734 ret = Uni.Level3 (s1 [i1]) - Uni.Level3 (s2 [i2]);
736 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. '{0}' {2} {3} '{1}' {4} {5}", s1, s2, idx1, end1, idx2, end2));
739 sourceConsumed = len1 <= len2;
740 targetConsumed = len1 >= len2;
741 return len1 == len2 ? 0 :
742 len1 == min ? - 1 : 1;
745 private int CompareOrdinalIgnoreCase (string s1, int idx1, int len1,
746 string s2, int idx2, int len2)
748 int min = len1 < len2 ? len1 : len2;
749 int end1 = idx1 + min;
750 int end2 = idx2 + min;
751 if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
752 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
753 TextInfo ti = invariant.textInfo;
754 for (int i1 = idx1, i2 = idx2;
755 i1 < end1 && i2 < end2; i1++, i2++)
756 if (ti.ToLower (s1 [i1]) != ti.ToLower (s2 [i2]))
757 return ti.ToLower (s1 [i1]) - ti.ToLower (s2 [i2]);
758 return len1 == len2 ? 0 :
759 len1 == min ? - 1 : 1;
762 public unsafe int Compare (string s1, int idx1, int len1,
763 string s2, int idx2, int len2, CompareOptions options)
765 // quick equality check
766 if (idx1 == idx2 && len1 == len2 &&
767 Object.ReferenceEquals (s1, s2))
769 if (options == CompareOptions.Ordinal)
770 return CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
771 if (options == CompareOptions.OrdinalIgnoreCase)
772 return CompareOrdinalIgnoreCase (s1, idx1, len1, s2, idx2, len2);
774 #if false // stable easy version, depends on GetSortKey().
775 SortKey sk1 = GetSortKey (s1, idx1, len1, options);
776 SortKey sk2 = GetSortKey (s2, idx2, len2, options);
777 byte [] d1 = sk1.KeyData;
778 byte [] d2 = sk2.KeyData;
779 int len = d1.Length > d2.Length ? d2.Length : d1.Length;
780 for (int i = 0; i < len; i++)
781 if (d1 [i] != d2 [i])
782 return d1 [i] < d2 [i] ? -1 : 1;
783 return d1.Length == d2.Length ? 0 : d1.Length < d2.Length ? -1 : 1;
785 byte* sk1 = stackalloc byte [4];
786 byte* sk2 = stackalloc byte [4];
787 ClearBuffer (sk1, 4);
788 ClearBuffer (sk2, 4);
789 Context ctx = new Context (options, null, null, sk1, sk2, null,
790 QuickCheckPossible (s1, idx1, idx1 + len1, s2, idx2, idx2 + len2));
793 int ret = CompareInternal (s1, idx1, len1, s2, idx2, len2, out dummy, out dummy2, true, false, ref ctx);
794 return ret == 0 ? 0 : ret < 0 ? -1 : 1;
798 unsafe void ClearBuffer (byte* buffer, int size)
800 for (int i = 0; i < size; i++)
804 bool QuickCheckPossible (string s1, int idx1, int end1,
805 string s2, int idx2, int end2)
808 // looks like it is buggy and inefficient, so just disable it.
811 if (QuickCheckDisabled)
813 // if (s1.Length > 100 || s2.Length > 100)
815 for (int i = idx1; i < end1; i++)
816 if (s1 [i] < 0x20 && (s1 [i] < '\x9' || s1 [i] > '\xD') || s1 [i] >= 0x80 || s1 [i] == '-' || s1 [i] == '\'')
818 for (int i = idx2; i < end2; i++)
819 if (s2 [i] < 0x20 && (s2 [i] < '\x9' || s2 [i] > '\xD') || s2 [i] >= 0x80 || s2 [i] == '-' || s2 [i] == '\'')
825 unsafe int CompareInternal (string s1, int idx1, int len1, string s2,
827 out bool targetConsumed, out bool sourceConsumed,
828 bool skipHeadingExtenders, bool immediateBreakup,
831 COpt opt = ctx.Option;
834 int end1 = idx1 + len1;
835 int end2 = idx2 + len2;
836 targetConsumed = false;
837 sourceConsumed = false;
838 PreviousInfo prev2 = new PreviousInfo (false);
840 if (opt == CompareOptions.None && ctx.QuickCheckPossible)
841 return CompareQuick (s1, idx1, len1, s2, idx2, len2, out sourceConsumed, out targetConsumed, immediateBreakup);
843 // It holds final result that comes from the comparison
844 // at level 2 or lower. Even if Compare() found the
845 // difference at level 2 or lower, it still has to
846 // continue level 1 comparison. FinalResult is used
847 // when there was no further differences.
849 // It holds the comparison level to do. It starts from
850 // 5, and becomes 1 when we do primary-only comparison.
851 int currentLevel = 5;
858 // Skip heading extenders
859 if (skipHeadingExtenders) {
860 for (; idx1 < end1; idx1++)
861 if (GetExtenderType (s1 [idx1]) == ExtenderType.None)
863 for (; idx2 < end2; idx2++)
864 if (GetExtenderType (s2 [idx2]) == ExtenderType.None)
868 ExtenderType ext1 = ExtenderType.None;
869 ExtenderType ext2 = ExtenderType.None;
871 int quickCheckPos1 = idx1;
872 int quickCheckPos2 = idx2;
873 bool stringSort = (opt & COpt.StringSort) != 0;
874 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
875 Escape escape1 = new Escape ();
876 Escape escape2 = new Escape ();
879 for (; idx1 < end1; idx1++)
880 if (!IsIgnorable (s1 [idx1], opt))
882 for (; idx2 < end2; idx2++)
883 if (!IsIgnorable (s2 [idx2], opt))
887 if (escape1.Source == null)
890 start1 = escape1.Start;
891 idx1 = escape1.Index;
893 quickCheckPos1 = escape1.Optional;
894 escape1.Source = null;
898 if (escape2.Source == null)
901 start2 = escape2.Start;
902 idx2 = escape2.Index;
904 quickCheckPos2 = escape2.Optional;
905 escape2.Source = null;
909 // If comparison is unstable, then this part is one of the most doubtful part.
910 // Here we run quick codepoint comparison and run back to "stable" area to
911 // compare characters.
913 // Strictly to say, even the first character
914 // could be compared here, but it messed
915 // backward step, so I just avoided mess.
916 if (quickCheckPos1 < idx1 && quickCheckPos2 < idx2) {
917 while (idx1 < end1 && idx2 < end2 &&
918 s1 [idx1] == s2 [idx2]) {
922 if (idx1 == end1 || idx2 == end2)
923 continue; // check replacement
925 int backwardEnd1 = quickCheckPos1;
926 int backwardEnd2 = quickCheckPos2;
927 quickCheckPos1 = idx1;
928 quickCheckPos2 = idx2;
933 for (;idx1 > backwardEnd1; idx1--)
934 if (Category (s1 [idx1]) != 1)
936 for (;idx2 > backwardEnd2; idx2--)
937 if (Category (s2 [idx2]) != 1)
939 for (;idx1 > backwardEnd1; idx1--)
940 if (IsSafe (s1 [idx1]))
942 for (;idx2 > backwardEnd2; idx2--)
943 if (IsSafe (s2 [idx2]))
952 int i1 = FilterOptions (s1 [idx1], opt);
953 int i2 = FilterOptions (s2 [idx2], opt);
954 bool special1 = false;
955 bool special2 = false;
957 // If current character is an expander, then
958 // repeat the previous character.
959 ext1 = GetExtenderType (i1);
960 if (ext1 != ExtenderType.None) {
961 if (ctx.PrevCode < 0) {
962 if (ctx.PrevSortKey == null) {
967 sk1 = ctx.PrevSortKey;
970 i1 = FilterExtender (ctx.PrevCode, ext1, opt);
972 ext2 = GetExtenderType (i2);
973 if (ext2 != ExtenderType.None) {
974 if (prev2.Code < 0) {
975 if (prev2.SortKey == null) {
983 i2 = FilterExtender (prev2.Code, ext2, opt);
986 byte cat1 = Category (i1);
987 byte cat2 = Category (i2);
989 // Handle special weight characters
991 if (!stringSort && currentLevel == 5) {
992 lv5At1 = escape1.Source != null ?
993 escape1.Index - escape1.Start :
995 // here Windows has a bug that it does
996 // not consider thirtiary weight.
997 lv5Value1 = Level1 (i1) << 8 + Uni.Level3 (i1);
1003 if (!stringSort && currentLevel == 5) {
1004 lv5At2 = escape2.Source != null ?
1005 escape2.Index - escape2.Start :
1007 // here Windows has a bug that it does
1008 // not consider thirtiary weight.
1009 lv5Value2 = Level1 (i2) << 8 + Uni.Level3 (i2);
1014 if (cat1 == 6 || cat2 == 6) {
1015 if (currentLevel == 5) {
1016 if (lv5Value1 == lv5Value2) {
1017 // There is not really difference here.
1018 lv5At1 = lv5At2 = -1;
1019 lv5Value1 = lv5Value2 = 0;
1027 Contraction ct1 = null;
1028 if (ext1 == ExtenderType.None)
1029 ct1 = GetContraction (s1, idx1, end1);
1034 else if (ct1 != null) {
1035 offset1 = ct1.Source.Length;
1036 if (ct1.SortKey != null) {
1038 for (int i = 0; i < ct1.SortKey.Length; i++)
1039 sk1 [i] = ct1.SortKey [i];
1041 ctx.PrevSortKey = sk1;
1043 else if (escape1.Source == null) {
1044 escape1.Source = s1;
1045 escape1.Start = start1;
1046 escape1.Index = cur1 + ct1.Source.Length;
1048 escape1.Optional = quickCheckPos1;
1049 s1 = ct1.Replacement;
1060 sk1 [1] = Level1 (i1);
1061 if (!ignoreNonSpace && currentLevel > 1)
1062 sk1 [2] = Level2 (i1, ext1);
1063 if (currentLevel > 2)
1064 sk1 [3] = Uni.Level3 (i1);
1065 if (currentLevel > 3)
1066 special1 = Uni.HasSpecialWeight ((char) i1);
1071 Contraction ct2 = null;
1072 if (ext2 == ExtenderType.None)
1073 ct2 = GetContraction (s2, idx2, end2);
1077 else if (ct2 != null) {
1078 idx2 += ct2.Source.Length;
1079 if (ct2.SortKey != null) {
1081 for (int i = 0; i < ct2.SortKey.Length; i++)
1082 sk2 [i] = ct2.SortKey [i];
1084 prev2.SortKey = sk2;
1086 else if (escape2.Source == null) {
1087 escape2.Source = s2;
1088 escape2.Start = start2;
1089 escape2.Index = cur2 + ct2.Source.Length;
1091 escape2.Optional = quickCheckPos2;
1092 s2 = ct2.Replacement;
1103 sk2 [1] = Level1 (i2);
1104 if (!ignoreNonSpace && currentLevel > 1)
1105 sk2 [2] = Level2 (i2, ext2);
1106 if (currentLevel > 2)
1107 sk2 [3] = Uni.Level3 (i2);
1108 if (currentLevel > 3)
1109 special2 = Uni.HasSpecialWeight ((char) i2);
1115 // Add offset here so that it does not skip
1116 // idx1 while s2 has a replacement.
1119 if (!ignoreNonSpace) {
1120 // add diacritical marks in s1 here
1121 while (idx1 < end1) {
1122 if (Category (s1 [idx1]) != 1)
1126 sk1 [2] = (byte) (sk1 [2] +
1127 Level2 (s1 [idx1], ExtenderType.None));
1131 // add diacritical marks in s2 here
1132 while (idx2 < end2) {
1133 if (Category (s2 [idx2]) != 1)
1137 sk2 [2] = (byte) (sk2 [2] +
1138 Level2 (s2 [idx2], ExtenderType.None));
1143 int ret = sk1 [0] - sk2 [0];
1144 ret = ret != 0 ? ret : sk1 [1] - sk2 [1];
1147 if (currentLevel == 1)
1149 if (!ignoreNonSpace) {
1150 ret = sk1 [2] - sk2 [2];
1153 if (immediateBreakup)
1154 return -1; // different
1155 currentLevel = frenchSort ? 2 : 1;
1159 if (currentLevel == 2)
1161 ret = sk1 [3] - sk2 [3];
1164 if (immediateBreakup)
1165 return -1; // different
1169 if (currentLevel == 3)
1171 if (special1 != special2) {
1172 if (immediateBreakup)
1173 return -1; // different
1174 finalResult = special1 ? 1 : -1;
1179 ret = CompareFlagPair (
1180 !Uni.IsJapaneseSmallLetter ((char) i1),
1181 !Uni.IsJapaneseSmallLetter ((char) i2));
1182 ret = ret != 0 ? ret :
1183 ToDashTypeValue (ext1, opt) -
1184 ToDashTypeValue (ext2, opt);
1185 ret = ret != 0 ? ret : CompareFlagPair (
1186 Uni.IsHiragana ((char) i1),
1187 Uni.IsHiragana ((char) i2));
1188 ret = ret != 0 ? ret : CompareFlagPair (
1189 !IsHalfKana ((char) i1, opt),
1190 !IsHalfKana ((char) i2, opt));
1192 if (immediateBreakup)
1193 return -1; // different
1201 // If there were only level 3 or lower differences,
1202 // then we still have to find diacritical differences
1204 if (!ignoreNonSpace &&
1205 finalResult != 0 && currentLevel > 2) {
1206 while (idx1 < end1 && idx2 < end2) {
1207 if (!Uni.IsIgnorableNonSpacing (s1 [idx1]))
1209 if (!Uni.IsIgnorableNonSpacing (s2 [idx2]))
1211 finalResult = Level2 (FilterOptions (s1 [idx1], opt), ext1) - Level2 (FilterOptions (s2 [idx2], opt), ext2);
1212 if (finalResult != 0)
1216 // they should work only for the first character
1217 ext1 = ExtenderType.None;
1218 ext2 = ExtenderType.None;
1221 if (currentLevel == 1 && finalResult != 0) {
1222 while (idx1 < end1) {
1223 if (Uni.IsIgnorableNonSpacing (s1 [idx1]))
1228 while (idx2 < end2) {
1229 if (Uni.IsIgnorableNonSpacing (s2 [idx2]))
1235 // we still have to check level 5
1236 if (finalResult == 0) {
1237 if (lv5At1 < 0 && lv5At2 >= 0)
1239 else if (lv5At2 < 0 && lv5At1 >= 0)
1242 finalResult = lv5At1 - lv5At2;
1243 if (finalResult == 0)
1244 finalResult = lv5Value1 - lv5Value2;
1247 if (finalResult == 0) {
1249 targetConsumed = true;
1251 sourceConsumed = true;
1253 return idx1 != end1 ? 1 : idx2 == end2 ? finalResult : -1;
1256 int CompareFlagPair (bool b1, bool b2)
1258 return b1 == b2 ? 0 : b1 ? 1 : -1;
1263 #region IsPrefix() and IsSuffix()
1265 public bool IsPrefix (string src, string target, CompareOptions opt)
1267 return IsPrefix (src, target, 0, src.Length, opt);
1270 public unsafe bool IsPrefix (string s, string target, int start, int length, CompareOptions opt)
1272 if (target.Length == 0)
1274 byte* sk1 = stackalloc byte [4];
1275 byte* sk2 = stackalloc byte [4];
1276 ClearBuffer (sk1, 4);
1277 ClearBuffer (sk2, 4);
1278 Context ctx = new Context (opt, null, null, sk1, sk2, null,
1279 QuickCheckPossible (s, start, start + length, target, 0, target.Length));
1280 return IsPrefix (s, target, start, length, true, ref ctx);
1283 unsafe bool IsPrefix (string s, string target, int start, int length, bool skipHeadingExtenders, ref Context ctx)
1285 bool consumed, dummy;
1286 CompareInternal (s, start, length,
1287 target, 0, target.Length,
1288 out consumed, out dummy, skipHeadingExtenders,
1295 public bool IsSuffix (string src, string target, CompareOptions opt)
1297 return IsSuffix (src, target, src.Length - 1, src.Length, opt);
1300 public unsafe bool IsSuffix (string s, string target, int start, int length, CompareOptions opt)
1302 if (target.Length == 0)
1304 int last = LastIndexOf (s, target, start, length, opt);
1305 return last >= 0 && Compare (s, last, s.Length - last, target, 0, target.Length, opt) == 0;
1307 // quick check : simple codepoint comparison
1308 if (s.Length >= target.Length) {
1310 int se = start - length;
1311 for (int i = target.Length - 1; si >= se && i >= 0; i--, si--)
1312 if (s [si] != target [i])
1314 if (si == start + target.Length)
1318 PreviousInfo prev = new PreviousInfo (false);
1319 byte* sk1 = stackalloc byte [4];
1320 byte* sk2 = stackalloc byte [4];
1321 ClearBuffer (sk1, 4);
1322 ClearBuffer (sk2, 4);
1323 return IsSuffix (opt, s, target, start, length, ref prev, sk1, sk2);
1327 unsafe bool IsSuffix (string s, string t, int start, int length, ref Context ctx)
1330 COpt opt = ctx.Option;
1332 for (;tstart < t.Length; tstart++)
1333 if (!IsIgnorable (t [tstart], opt))
1335 if (tstart == t.Length)
1336 return true; // as if target is String.Empty.
1339 // This is still not working. If it is not likely to get working, then just remove it.
1341 int send = start - length;
1342 int ti = t.Length - 1;
1345 int sStep = start + 1;
1346 int tStep = t.Length;
1347 bool sourceConsumed, targetConsumed;
1349 for (; send < si; si--)
1350 if (!IsIgnorable (s [si]))
1352 for (; tend < ti; ti--)
1353 if (!IsIgnorable (t [ti]))
1357 for (; send < si; si--)
1358 if (IsSafe (s [si]))
1360 for (; tend < ti; ti--)
1361 if (IsSafe (t [ti]))
1363 Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, length, t, ti, tstart, sStep - si, tStep - ti);
1364 if (CompareInternal (opt, s, si, sStep - si,
1366 out targetConsumed, out sourceConsumed,
1378 // FIXME: it is not efficient for very long target.
1379 bool sourceConsumed, targetConsumed;
1380 int mismatchCount = 0;
1381 for (int i = 0; i < length; i++) {
1382 ctx.ClearPrevInfo ();
1384 int ret = CompareInternal (s, start - i, i + 1,
1385 t, tstart, t.Length - tstart,
1387 // FIXME: could immediately breakup
1388 out sourceConsumed, true, true, ref ctx);
1391 if (!sourceConsumed && targetConsumed)
1393 if (!targetConsumed) {
1395 if (mismatchCount > Uni.MaxExpansionLength)
1396 // The largest length of an
1397 // expansion is 3, so if the
1398 // target was not consumed more
1399 // than 3 times, then the tail
1400 // character does not match.
1410 #region IndexOf() / LastIndexOf()
1414 public int IndexOf (string s, string target, CompareOptions opt)
1416 return IndexOf (s, target, 0, s.Length, opt);
1419 int QuickIndexOf (string s, string target, int start, int length, out bool testWasUnable)
1421 int testedSourcePos = -1, testedTargetPos = -1;
1423 testWasUnable = true;
1424 if (target.Length == 0)
1426 else if (target.Length > length)
1428 testWasUnable = false;
1430 int end = start + length - target.Length + 1;
1431 for (int i = start; i < end; i++) {
1433 for (int j = 0; j < target.Length; j++) {
1434 if (testedTargetPos < j) {
1435 char c = target [j];
1436 if (c == 0 || c >= 0x80) {
1437 testWasUnable = true;
1441 testedTargetPos = j;
1443 if (testedSourcePos < i + j) {
1445 if (c == 0 || c >= 0x80) {
1446 testWasUnable = true;
1450 testedSourcePos = i + j;
1452 if (s [i + j] != target [j]) {
1464 public unsafe int IndexOf (string s, string target, int start, int length, CompareOptions opt)
1466 if (opt == CompareOptions.Ordinal)
1467 return IndexOfOrdinal (s, target, start, length);
1468 if (opt == CompareOptions.OrdinalIgnoreCase)
1469 return IndexOfOrdinalIgnoreCase (s, target, start, length);
1470 if (opt == CompareOptions.None) {
1472 int ret = QuickIndexOf (s, target, start, length, out testWasUnable);
1477 byte* alwaysMatchFlags = stackalloc byte [16];
1478 byte* neverMatchFlags = stackalloc byte [16];
1479 byte* targetSortKey = stackalloc byte [4];
1480 byte* sk1 = stackalloc byte [4];
1481 byte* sk2 = stackalloc byte [4];
1482 ClearBuffer (alwaysMatchFlags, 16);
1483 ClearBuffer (neverMatchFlags, 16);
1484 ClearBuffer (targetSortKey, 4);
1485 ClearBuffer (sk1, 4);
1486 ClearBuffer (sk2, 4);
1487 Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1489 return IndexOf (s, target, start, length,
1490 targetSortKey, ref ctx);
1493 // note that it wouldn't be used since ordinal IndexOf()
1494 // should be directed to icall.
1495 int IndexOfOrdinal (string s, string target, int start, int length)
1497 if (target.Length == 0)
1499 else if (target.Length > length)
1502 int end = start + length - target.Length + 1;
1503 for (int i = start; i < end; i++) {
1505 for (int j = 0; j < target.Length; j++) {
1506 if (s [i + j] != target [j]) {
1518 int IndexOfOrdinalIgnoreCase (string s, string target, int start, int length)
1520 if (target.Length == 0)
1522 else if (target.Length > length)
1525 int end = start + length - target.Length + 1;
1526 for (int i = start; i < end; i++) {
1528 for (int j = 0; j < target.Length; j++) {
1529 // I think almost all text has more lower letters than upper ones. Thus with this invariant comparison ToLower() should be faster since it costs less operations.
1530 if (textInfo.ToLower (s [i + j]) != textInfo.ToLower (target [j])) {
1544 public int IndexOf (string s, char target, CompareOptions opt)
1546 return IndexOf (s, target, 0, s.Length, opt);
1549 public unsafe int IndexOf (string s, char target, int start, int length, CompareOptions opt)
1551 if (opt == CompareOptions.Ordinal)
1552 return IndexOfOrdinal (s, target, start, length);
1553 if (opt == CompareOptions.OrdinalIgnoreCase)
1554 return IndexOfOrdinalIgnoreCase (s, target, start, length);
1555 byte* alwaysMatchFlags = stackalloc byte [16];
1556 byte* neverMatchFlags = stackalloc byte [16];
1557 byte* targetSortKey = stackalloc byte [4];
1558 byte* sk1 = stackalloc byte [4];
1559 byte* sk2 = stackalloc byte [4];
1560 ClearBuffer (alwaysMatchFlags, 16);
1561 ClearBuffer (neverMatchFlags, 16);
1562 ClearBuffer (targetSortKey, 4);
1563 ClearBuffer (sk1, 4);
1564 ClearBuffer (sk2, 4);
1565 Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1567 // If target is contraction, then use string search.
1568 Contraction ct = GetContraction (target);
1570 if (ct.Replacement != null)
1571 return IndexOf (s, ct.Replacement,
1572 start, length, targetSortKey, ref ctx);
1574 for (int i = 0; i < ct.SortKey.Length; i++)
1575 sk2 [i] = ct.SortKey [i];
1576 return IndexOfSortKey (s, start, length, sk2, char.MinValue, -1, true, ref ctx);
1579 int ti = FilterOptions ((int) target, opt);
1580 targetSortKey [0] = Category (ti);
1581 targetSortKey [1] = Level1 (ti);
1582 if ((opt & COpt.IgnoreNonSpace) == 0)
1584 Level2 (ti, ExtenderType.None);
1585 targetSortKey [3] = Uni.Level3 (ti);
1586 return IndexOfSortKey (s, start, length,
1587 targetSortKey, target, ti,
1588 !Uni.HasSpecialWeight ((char) ti), ref ctx);
1592 // note that it wouldn't be used since ordinal IndexOf()
1593 // should be directed to icall.
1594 int IndexOfOrdinal (string s, char target, int start, int length)
1596 int end = start + length;
1597 for (int i = start; i < end; i++)
1598 if (s [i] == target)
1603 int IndexOfOrdinalIgnoreCase (string s, char target, int start, int length)
1605 int end = start + length;
1606 target = textInfo.ToLower (target);
1607 for (int i = start; i < end; i++)
1608 if (textInfo.ToLower (s [i]) == target)
1613 // Searches target byte[] keydata
1614 unsafe int IndexOfSortKey (string s, int start, int length, byte* sortkey, char target, int ti, bool noLv4, ref Context ctx)
1616 int end = start + length;
1621 if (MatchesForward (s, ref idx, end, ti, sortkey, noLv4, ref ctx))
1627 // Searches string. Search head character (or keydata when
1628 // the head is contraction sortkey) and try IsPrefix().
1629 unsafe int IndexOf (string s, string target, int start, int length, byte* targetSortKey, ref Context ctx)
1631 COpt opt = ctx.Option;
1633 for (; tidx < target.Length; tidx++)
1634 if (!IsIgnorable (target [tidx], opt))
1636 if (tidx == target.Length)
1637 // FIXME: this is likely a hack. A string that is consists of \0 differs from those of other ignorable characters.
1638 return IndexOfOrdinal (target, '\0', 0, target.Length) >= 0 ? IndexOfOrdinal (s, target, start, length) : start;
1639 Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1640 string replace = ct != null ? ct.Replacement : null;
1641 byte* sk = replace == null ? targetSortKey : null;
1643 char tc = char.MinValue;
1645 if (ct != null && sk != null) {
1646 for (int i = 0; i < ct.SortKey.Length; i++)
1647 sk [i] = ct.SortKey [i];
1648 } else if (sk != null) {
1650 ti = FilterOptions (target [tidx], opt);
1651 sk [0] = Category (ti);
1652 sk [1] = Level1 (ti);
1653 if ((opt & COpt.IgnoreNonSpace) == 0)
1654 sk [2] = Level2 (ti, ExtenderType.None);
1655 sk [3] = Uni.Level3 (ti);
1656 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1659 for (tidx++; tidx < target.Length; tidx++) {
1660 if (Category (target [tidx]) != 1)
1664 sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1670 if (replace != null)
1671 idx = IndexOf (s, replace, start, length, targetSortKey, ref ctx);
1673 idx = IndexOfSortKey (s, start, length, sk, tc, ti, noLv4, ref ctx);
1676 length -= idx - start;
1678 if (IsPrefix (s, target, start, length, false, ref ctx))
1680 Contraction cts = GetContraction (s, start, length);
1682 start += cts.Source.Length;
1683 length -= cts.Source.Length;
1688 } while (length > 0);
1693 // There are the same number of LastIndexOf() methods as
1694 // IndexOf() related methods, with the same functionalities
1700 public int LastIndexOf (string s, string target, CompareOptions opt)
1702 return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1705 public unsafe int LastIndexOf (string s, string target, int start, int length, CompareOptions opt)
1707 if (opt == CompareOptions.Ordinal)
1708 return LastIndexOfOrdinal (s, target, start, length);
1709 if (opt == CompareOptions.OrdinalIgnoreCase)
1710 return LastIndexOfOrdinalIgnoreCase (s, target, start, length);
1711 byte* alwaysMatchFlags = stackalloc byte [16];
1712 byte* neverMatchFlags = stackalloc byte [16];
1713 byte* targetSortKey = stackalloc byte [4];
1714 byte* sk1 = stackalloc byte [4];
1715 byte* sk2 = stackalloc byte [4];
1716 ClearBuffer (alwaysMatchFlags, 16);
1717 ClearBuffer (neverMatchFlags, 16);
1718 ClearBuffer (targetSortKey, 4);
1719 ClearBuffer (sk1, 4);
1720 ClearBuffer (sk2, 4);
1721 // For some unknown reason CompareQuick() does not work fine w/ LastIndexOf().
1722 Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1723 return LastIndexOf (s, target, start, length,
1724 targetSortKey, ref ctx);
1727 int LastIndexOfOrdinal (string s, string target, int start, int length)
1729 if (target.Length == 0)
1731 if (s.Length < target.Length || target.Length > length)
1733 int end = start - length + target.Length -1;
1734 char tail = target [target.Length - 1];
1735 for (int i = start; i > end;) {
1736 if (s [i] != tail) {
1740 int x = i - target.Length + 1;
1742 bool mismatch = false;
1743 for (int j = target.Length - 2; j >= 0; j--)
1744 if (s [x + j] != target [j]) {
1755 int LastIndexOfOrdinalIgnoreCase (string s, string target, int start, int length)
1757 if (target.Length == 0)
1759 if (s.Length < length || target.Length > length)
1761 int end = start - length + target.Length - 1;
1762 char tail = textInfo.ToLower (target [target.Length - 1]);
1763 for (int i = start; i > end;) {
1764 if (textInfo.ToLower (s [i]) != tail) {
1768 int x = i - target.Length + 1;
1770 bool mismatch = false;
1771 for (int j = target.Length - 2; j >= 0; j--)
1772 if (textInfo.ToLower (s [x + j]) != textInfo.ToLower (target [j])) {
1785 public int LastIndexOf (string s, char target, CompareOptions opt)
1787 return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1790 public unsafe int LastIndexOf (string s, char target, int start, int length, CompareOptions opt)
1792 if (opt == CompareOptions.Ordinal)
1793 return LastIndexOfOrdinal (s, target, start, length);
1794 if (opt == CompareOptions.OrdinalIgnoreCase)
1795 return LastIndexOfOrdinalIgnoreCase (s, target, start, length);
1796 byte* alwaysMatchFlags = stackalloc byte [16];
1797 byte* neverMatchFlags = stackalloc byte [16];
1798 byte* targetSortKey = stackalloc byte [4];
1799 byte* sk1 = stackalloc byte [4];
1800 byte* sk2 = stackalloc byte [4];
1801 ClearBuffer (alwaysMatchFlags, 16);
1802 ClearBuffer (neverMatchFlags, 16);
1803 ClearBuffer (targetSortKey, 4);
1804 ClearBuffer (sk1, 4);
1805 ClearBuffer (sk2, 4);
1806 Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1808 // If target is a replacement contraction, then use
1810 Contraction ct = GetContraction (target);
1812 if (ct.Replacement != null)
1813 return LastIndexOf (s,
1814 ct.Replacement, start, length,
1815 targetSortKey, ref ctx);
1817 for (int bi = 0; bi < ct.SortKey.Length; bi++)
1818 sk2 [bi] = ct.SortKey [bi];
1819 return LastIndexOfSortKey (s, start,
1825 int ti = FilterOptions ((int) target, opt);
1826 targetSortKey [0] = Category (ti);
1827 targetSortKey [1] = Level1 (ti);
1828 if ((opt & COpt.IgnoreNonSpace) == 0)
1829 targetSortKey [2] = Level2 (ti, ExtenderType.None);
1830 targetSortKey [3] = Uni.Level3 (ti);
1831 return LastIndexOfSortKey (s, start, start,
1832 length, targetSortKey,
1833 ti, !Uni.HasSpecialWeight ((char) ti),
1838 int LastIndexOfOrdinal (string s, char target, int start, int length)
1842 int end = start - length;
1843 for (int i = start; i > end; i--)
1844 if (s [i] == target)
1849 int LastIndexOfOrdinalIgnoreCase (string s, char target, int start, int length)
1853 int end = start - length;
1854 char c = textInfo.ToUpper (target);
1855 for (int i = start; i > end; i--)
1856 if (textInfo.ToUpper (s [i]) == c)
1861 // Searches target byte[] keydata
1862 unsafe int LastIndexOfSortKey (string s, int start, int orgStart, int length, byte* sortkey, int ti, bool noLv4, ref Context ctx)
1864 int end = start - length;
1868 if (MatchesBackward (s, ref idx, end, orgStart,
1869 ti, sortkey, noLv4, ref ctx))
1875 // Searches string. Search head character (or keydata when
1876 // the head is contraction sortkey) and try IsPrefix().
1877 unsafe int LastIndexOf (string s, string target, int start, int length, byte* targetSortKey, ref Context ctx)
1879 COpt opt = ctx.Option;
1880 int orgStart = start;
1882 for (; tidx < target.Length; tidx++)
1883 if (!IsIgnorable (target [tidx], opt))
1885 if (tidx == target.Length)
1886 // FIXME: this is likely a hack. A string that is consists of \0 differs from those of other ignorable characters.
1887 return IndexOfOrdinal (target, '\0', 0, target.Length) >= 0 ? LastIndexOfOrdinal (s, target, start, length) : start;
1888 Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1889 string replace = ct != null ? ct.Replacement : null;
1890 byte* sk = replace == null ? targetSortKey : null;
1894 if (ct != null && sk != null) {
1895 for (int i = 0; i < ct.SortKey.Length; i++)
1896 sk [i] = ct.SortKey [i];
1897 } else if (sk != null) {
1898 ti = FilterOptions (target [tidx], opt);
1899 sk [0] = Category (ti);
1900 sk [1] = Level1 (ti);
1901 if ((opt & COpt.IgnoreNonSpace) == 0)
1902 sk [2] = Level2 (ti, ExtenderType.None);
1903 sk [3] = Uni.Level3 (ti);
1904 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1907 for (tidx++; tidx < target.Length; tidx++) {
1908 if (Category (target [tidx]) != 1)
1912 sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1919 if (replace != null)
1920 idx = LastIndexOf (s, replace,
1922 targetSortKey, ref ctx);
1924 idx = LastIndexOfSortKey (s, start, orgStart, length, sk, ti, noLv4, ref ctx);
1927 length -= start - idx;
1930 if (IsPrefix (s, target, idx, orgStart - idx + 1, false, ref ctx)) {
1931 for (;idx < orgStart; idx++)
1932 if (!IsIgnorable (s [idx], opt))
1936 Contraction cts = GetContraction (s, idx, orgStart - idx + 1);
1938 start -= cts.Source.Length;
1939 length -= cts.Source.Length;
1944 } while (length > 0);
1948 unsafe bool MatchesForward (string s, ref int idx, int end, int ti, byte* sortkey, bool noLv4, ref Context ctx)
1951 if (ctx.AlwaysMatchFlags != null && si < 128 && (ctx.AlwaysMatchFlags [si / 8] & (1 << (si % 8))) != 0)
1953 if (ctx.NeverMatchFlags != null &&
1955 (ctx.NeverMatchFlags [si / 8] & (1 << (si % 8))) != 0) {
1959 ExtenderType ext = GetExtenderType (s [idx]);
1960 Contraction ct = null;
1961 if (MatchesForwardCore (s, ref idx, end, ti, sortkey, noLv4, ext, ref ct, ref ctx)) {
1962 if (ctx.AlwaysMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
1963 ctx.AlwaysMatchFlags [si / 8] |= (byte) (1 << (si % 8));
1966 if (ctx.NeverMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
1967 ctx.NeverMatchFlags [si / 8] |= (byte) (1 << (si % 8));
1971 unsafe bool MatchesForwardCore (string s, ref int idx, int end, int ti, byte* sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, ref Context ctx)
1973 COpt opt = ctx.Option;
1974 byte* charSortKey = ctx.Buffer1;
1975 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
1977 if (ext == ExtenderType.None)
1978 ct = GetContraction (s, idx, end);
1979 else if (ctx.PrevCode < 0) {
1980 if (ctx.PrevSortKey == null) {
1984 charSortKey = ctx.PrevSortKey;
1987 si = FilterExtender (ctx.PrevCode, ext, opt);
1988 // if lv4 exists, it never matches contraction
1990 idx += ct.Source.Length;
1993 if (ct.SortKey != null) {
1994 for (int i = 0; i < 4; i++)
1995 charSortKey [i] = sortkey [i];
1997 ctx.PrevSortKey = charSortKey;
1999 // Here is the core of LAMESPEC
2000 // described at the top of the source.
2002 return MatchesForward (ct.Replacement, ref dummy,
2003 ct.Replacement.Length, ti, sortkey, noLv4, ref ctx);
2007 si = FilterOptions (s [idx], opt);
2009 charSortKey [0] = Category (si);
2010 bool noMatch = false;
2011 if (sortkey [0] == charSortKey [0])
2012 charSortKey [1] = Level1 (si);
2015 if (!ignoreNonSpace && sortkey [1] == charSortKey [1])
2016 charSortKey [2] = Level2 (si, ext);
2017 else if (!ignoreNonSpace)
2020 for (; idx < end; idx++) {
2021 if (Category (s [idx]) != 1)
2026 charSortKey [3] = Uni.Level3 (si);
2027 if (charSortKey [0] != 1)
2030 for (; idx < end; idx++) {
2031 if (Category (s [idx]) != 1)
2035 if (charSortKey [2] == 0)
2036 charSortKey [2] = 2;
2037 charSortKey [2] = (byte) (charSortKey [2]
2038 + Level2 (s [idx], ExtenderType.None));
2041 return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4);
2044 unsafe bool MatchesPrimitive (COpt opt, byte* source, int si, ExtenderType ext, byte* target, int ti, bool noLv4)
2046 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
2047 if (source [0] != target [0] ||
2048 source [1] != target [1] ||
2049 (!ignoreNonSpace && source [2] != target [2]) ||
2050 source [3] != target [3])
2052 if (noLv4 && (si < 0 || !Uni.HasSpecialWeight ((char) si)))
2056 // Since target can never be an extender, if the source
2057 // is an expander and it matters, then they never match.
2058 if (!ignoreNonSpace && ext == ExtenderType.Conditional)
2060 if (Uni.IsJapaneseSmallLetter ((char) si) !=
2061 Uni.IsJapaneseSmallLetter ((char) ti) ||
2062 ToDashTypeValue (ext, opt) !=
2063 // FIXME: we will have to specify correct value for target
2064 ToDashTypeValue (ExtenderType.None, opt) ||
2065 !Uni.IsHiragana ((char) si) !=
2066 !Uni.IsHiragana ((char) ti) ||
2067 IsHalfKana ((char) si, opt) !=
2068 IsHalfKana ((char) ti, opt))
2073 unsafe bool MatchesBackward (string s, ref int idx, int end, int orgStart, int ti, byte* sortkey, bool noLv4, ref Context ctx)
2076 if (ctx.AlwaysMatchFlags != null && si < 128 && (ctx.AlwaysMatchFlags [si / 8] & (1 << (si % 8))) != 0)
2078 if (ctx.NeverMatchFlags != null && si < 128 && (ctx.NeverMatchFlags [si / 8] & (1 << (si % 8))) != 0) {
2082 ExtenderType ext = GetExtenderType (s [idx]);
2083 Contraction ct = null;
2084 if (MatchesBackwardCore (s, ref idx, end, orgStart, ti, sortkey, noLv4, ext, ref ct, ref ctx)) {
2085 if (ctx.AlwaysMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
2086 ctx.AlwaysMatchFlags [si / 8] |= (byte) (1 << (si % 8));
2089 if (ctx.NeverMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128) {
2090 ctx.NeverMatchFlags [si / 8] |= (byte) (1 << (si % 8));
2095 unsafe bool MatchesBackwardCore (string s, ref int idx, int end, int orgStart, int ti, byte* sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, ref Context ctx)
2097 COpt opt = ctx.Option;
2098 byte* charSortKey = ctx.Buffer1;
2099 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
2102 // To handle extenders in source, we need to
2103 // check next _primary_ character.
2104 if (ext != ExtenderType.None) {
2105 byte diacritical = 0;
2106 for (int tmp = idx; ; tmp--) {
2107 if (tmp < 0) // heading extender
2109 if (IsIgnorable (s [tmp], opt))
2111 int tmpi = FilterOptions (s [tmp], opt);
2112 byte category = Category (tmpi);
2113 if (category == 1) {
2114 diacritical = Level2 (tmpi, ExtenderType.None);
2117 si = FilterExtender (tmpi, ext, opt);
2118 charSortKey [0] = category;
2119 charSortKey [1] = Level1 (si);
2120 if (!ignoreNonSpace)
2121 charSortKey [2] = Level2 (si, ext);
2122 charSortKey [3] = Uni.Level3 (si);
2123 if (ext != ExtenderType.Conditional &&
2126 (charSortKey [2] == 0) ?
2127 (byte) (diacritical + 2) :
2133 if (ext == ExtenderType.None)
2134 ct = GetTailContraction (s, idx, end);
2135 // if lv4 exists, it never matches contraction
2137 idx -= ct.Source.Length;
2140 if (ct.SortKey != null) {
2141 for (int i = 0; i < 4; i++)
2142 charSortKey [i] = sortkey [i];
2144 ctx.PrevSortKey = charSortKey;
2146 // Here is the core of LAMESPEC
2147 // described at the top of the source.
2148 int dummy = ct.Replacement.Length - 1;
2149 return 0 <= LastIndexOfSortKey (
2150 ct.Replacement, dummy, dummy,
2151 ct.Replacement.Length, sortkey,
2152 ti, noLv4, ref ctx);
2154 } else if (ext == ExtenderType.None) {
2156 si = FilterOptions (s [idx], opt);
2158 bool noMatch = false;
2159 charSortKey [0] = Category (si);
2160 if (charSortKey [0] == sortkey [0])
2161 charSortKey [1] = Level1 (si);
2164 if (!ignoreNonSpace && charSortKey [1] == sortkey [1])
2165 charSortKey [2] = Level2 (si, ext);
2166 else if (!ignoreNonSpace)
2170 charSortKey [3] = Uni.Level3 (si);
2171 if (charSortKey [0] != 1)
2174 if (ext == ExtenderType.None) {
2175 for (int tmp = cur + 1; tmp < orgStart; tmp++) {
2176 if (Category (s [tmp]) != 1)
2180 if (charSortKey [2] == 0)
2181 charSortKey [2] = 2;
2182 charSortKey [2] = (byte) (charSortKey [2]
2183 + Level2 (s [tmp], ExtenderType.None));
2186 return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4);