2 // SimpleCollator.cs : the core collator implementation
5 // Atsushi Enomoto <atsushi@ximian.com>
7 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 // Here's a summary for supporting contractions, expansions and diacritical
33 // Diacritical mapping is a simple tailoring rule that "remaps" diacritical
34 // weight value from one to another. For now it applies to all range of
35 // characters, but at some stage we might need to limit the remapping ranges.
37 // A Contraction consists of a string (actually char[]) and a sortkey entry
38 // (i.e. byte[]). It indicates that a sequence of characters is interpreted
39 // as a single character that has the mapped sortkey value. There is no
40 // character which goes across "special rules". When the collator encountered
41 // such a sequence of characters, it just returns the sortkey value without
42 // further replacement.
44 // Since it is still likely to happen that a contraction sequence matches
45 // other character than the identical sequence (depending on CompareOptions
46 // and of course, defined sortkey value itself), comparison cannot be done
47 // at source char[] level.
49 // In IndexOf(), the first character in the target string or the target char
50 // itself is turned into sortkey bytes. If the character has a contraction and
51 // that is sortkey map, then it is used instead. If the contraction exists and
52 // that is replacement map, then the first character of the replacement string
53 // is searched instead. IndexOf() always searches only for the top character,
54 // and if it returned negative value, then it returns -1. Otherwise, it then
55 // tries IsPrefix() from that location. If it returns true, then it returns
58 // LAMESPEC: IndexOf() is lame as a whole API. It never matches in the middle
59 // of expansion and there is no proper way to return such indexes within
60 // a single int return value.
62 // For example, try below in .NET:
63 // IndexOf("\u00E6", "a")
64 // IndexOf("\u00E6", "e")
69 using System.Collections;
70 using System.Globalization;
72 using Uni = Mono.Globalization.Unicode.MSCompatUnicodeTable;
73 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
74 using COpt = System.Globalization.CompareOptions;
76 namespace Mono.Globalization.Unicode
78 internal class SimpleCollator
80 // this environment variable is for debugging quick check.
81 static bool QuickCheckDisabled =
82 Environment.internalGetEnvironmentVariable (
83 "MONO_COLLATION_QUICK_CHECK_DISABLED") == "yes";
85 unsafe internal struct Context
87 public Context (CompareOptions opt, byte* alwaysMatchFlags, byte* neverMatchFlags, byte* buffer1, byte* buffer2, byte* prev1, bool quickCheckPossible)
90 AlwaysMatchFlags = alwaysMatchFlags;
91 NeverMatchFlags = neverMatchFlags;
96 QuickCheckPossible = quickCheckPossible;
99 public readonly CompareOptions Option;
100 public readonly byte* NeverMatchFlags;
101 public readonly byte* AlwaysMatchFlags;
102 public byte* Buffer1;
103 public byte* Buffer2;
105 public byte* PrevSortKey;
106 public readonly bool QuickCheckPossible;
108 public void ClearPrevInfo ()
115 unsafe struct PreviousInfo
118 public byte* SortKey;
120 public PreviousInfo (bool dummy)
129 public string Source;
136 static SimpleCollator invariant =
137 new SimpleCollator (CultureInfo.InvariantCulture);
139 readonly TextInfo textInfo; // for ToLower().
140 readonly bool frenchSort;
141 unsafe readonly byte* cjkCatTable;
142 unsafe readonly byte* cjkLv1Table;
143 readonly CodePointIndexer cjkIndexer;
144 unsafe readonly byte* cjkLv2Table;
145 readonly CodePointIndexer cjkLv2Indexer;
147 readonly Contraction [] contractions;
148 readonly Level2Map [] level2Maps;
150 // This flag marks characters as "unsafe", where the character
151 // could be used as part of a contraction (whose length > 1).
152 readonly byte [] unsafeFlags;
154 const int UnsafeFlagLength = 0x300 / 8;
156 // readonly byte [] contractionFlags = new byte [16];
158 // This array is internally used inside IndexOf() to store
159 // "no need to check" ASCII characters.
161 // Now that it should be thread safe, this array is allocated
163 // byte [] neverMatchFlags = new byte [128 / 8];
165 #region .ctor() and split functions
167 public SimpleCollator (CultureInfo culture)
170 textInfo = culture.TextInfo;
173 SetCJKTable (culture, ref cjkIndexer,
174 ref cjkCatTable, ref cjkLv1Table,
175 ref cjkLv2Indexer, ref cjkLv2Table);
178 // Get tailoring info
179 TailoringInfo t = null;
180 for (CultureInfo ci = culture; ci.LCID != 127; ci = ci.Parent) {
181 t = Uni.GetTailoringInfo (ci.LCID);
185 if (t == null) // then use invariant
186 t = Uni.GetTailoringInfo (127);
188 frenchSort = t.FrenchSort;
189 Uni.BuildTailoringTables (culture, t, ref contractions,
191 unsafeFlags = new byte [UnsafeFlagLength];
192 foreach (Contraction c in contractions) {
193 if (c.Source.Length > 1)
194 foreach (char ch in c.Source)
195 unsafeFlags [(int) ch / 8 ]
196 |= (byte) (1 << ((int) ch & 7));
197 // for (int i = 0; i < c.Source.Length; i++) {
198 // int ch = c.Source [i];
201 // contractionFlags [ch / 8] |= (byte) (1 << (ch & 7));
205 foreach (Contraction c in invariant.contractions) {
206 if (c.Source.Length > 1)
207 foreach (char ch in c.Source)
208 unsafeFlags [(int) ch / 8 ]
209 |= (byte) (1 << ((int) ch & 7));
210 // for (int i = 0; i < c.Source.Length; i++) {
211 // int ch = c.Source [i];
214 // contractionFlags [ch / 8] |= (byte) (1 << (ch & 7));
218 // FIXME: Since tailorings are mostly for latin
219 // (and in some cases Cyrillic) characters, it would
220 // be much better for performance to store "start
221 // indexes" for > 370 (culture-specific letters).
224 // dump tailoring table
225 Console.WriteLine ("******** building table for {0} : contractions - {1} diacritical - {2}",
226 culture.LCID, contractions.Length, level2Maps.Length);
227 foreach (Contraction c in contractions) {
228 foreach (char cc in c.Source)
229 Console.Write ("{0:X4} ", (int) cc);
230 Console.WriteLine (" -> '{0}'", c.Replacement);
235 unsafe private void SetCJKTable (
236 CultureInfo culture, ref CodePointIndexer cjkIndexer,
237 ref byte* catTable, ref byte* lv1Table,
238 ref CodePointIndexer lv2Indexer, ref byte* lv2Table)
240 string name = GetNeutralCulture (culture).Name;
242 Uni.FillCJK (name, ref cjkIndexer, ref catTable,
243 ref lv1Table, ref lv2Indexer, ref lv2Table);
246 static CultureInfo GetNeutralCulture (CultureInfo info)
248 CultureInfo ret = info;
249 while (ret.Parent != null && ret.Parent.LCID != 127)
256 unsafe byte Category (int cp)
258 if (cp < 0x3000 || cjkCatTable == null)
259 return Uni.Category (cp);
260 int idx = cjkIndexer.ToIndex (cp);
261 return idx < 0 ? Uni.Category (cp) : cjkCatTable [idx];
264 unsafe byte Level1 (int cp)
266 if (cp < 0x3000 || cjkLv1Table == null)
267 return Uni.Level1 (cp);
268 int idx = cjkIndexer.ToIndex (cp);
269 return idx < 0 ? Uni.Level1 (cp) : cjkLv1Table [idx];
272 unsafe byte Level2 (int cp, ExtenderType ext)
274 if (ext == ExtenderType.Buggy)
276 else if (ext == ExtenderType.Conditional)
279 if (cp < 0x3000 || cjkLv2Table == null)
280 return Uni.Level2 (cp);
281 int idx = cjkLv2Indexer.ToIndex (cp);
282 byte ret = idx < 0 ? (byte) 0 : cjkLv2Table [idx];
285 ret = Uni.Level2 (cp);
286 if (level2Maps.Length == 0)
288 for (int i = 0; i < level2Maps.Length; i++) {
289 if (level2Maps [i].Source == ret)
290 return level2Maps [i].Replace;
291 else if (level2Maps [i].Source > ret)
297 static bool IsHalfKana (int cp, COpt opt)
299 return (opt & COpt.IgnoreWidth) != 0 ||
300 Uni.IsHalfWidthKana ((char) cp);
303 Contraction GetContraction (string s, int start, int end)
305 // int si = s [start];
306 // if (si < 128 && (contractionFlags [si / 8] & (1 << (si & 7))) == 0)
308 Contraction c = GetContraction (s, start, end, contractions);
309 if (c != null || lcid == 127)
311 return GetContraction (s, start, end, invariant.contractions);
314 Contraction GetContraction (string s, int start, int end, Contraction [] clist)
316 for (int i = 0; i < clist.Length; i++) {
317 Contraction ct = clist [i];
318 int diff = ct.Source [0] - s [start];
320 return null; // it's already sorted
323 char [] chars = ct.Source;
324 if (end - start < chars.Length)
327 for (int n = 0; n < chars.Length; n++)
328 if (s [start + n] != chars [n]) {
338 Contraction GetTailContraction (string s, int start, int end)
340 // int si = s [end - 1];
341 // if (si < 128 && (contractionFlags [si / 8] & (1 << (si & 7))) == 0)
343 Contraction c = GetTailContraction (s, start, end, contractions);
344 if (c != null || lcid == 127)
346 return GetTailContraction (s, start, end, invariant.contractions);
349 Contraction GetTailContraction (string s, int start, int end, Contraction [] clist)
351 if (start == end || end < -1 || start >= s.Length || s.Length <= end + 1)
352 throw new SystemException (String.Format ("MONO internal error. Failed to get TailContraction. start = {0} end = {1} string = '{2}'", start, end, s));
353 for (int i = 0; i < clist.Length; i++) {
354 Contraction ct = clist [i];
355 int diff = ct.Source [0] - s [end + 1];
357 return null; // it's already sorted
360 char [] chars = ct.Source;
363 if (chars.Length > start - end)
365 for (int n = 0; n < chars.Length; n++)
366 if (s [start - n] != chars [chars.Length - 1 - n]) {
376 Contraction GetContraction (char c)
378 Contraction ct = GetContraction (c, contractions);
379 if (ct != null || lcid == 127)
381 return GetContraction (c, invariant.contractions);
384 Contraction GetContraction (char c, Contraction [] clist)
386 for (int i = 0; i < clist.Length; i++) {
387 Contraction ct = clist [i];
388 if (ct.Source [0] > c)
389 return null; // it's already sorted
390 if (ct.Source [0] == c && ct.Source.Length == 1)
396 int FilterOptions (int i, COpt opt)
398 if ((opt & COpt.IgnoreWidth) != 0) {
399 int x = Uni.ToWidthCompat (i);
403 if ((opt & COpt.IgnoreCase) != 0)
404 i = textInfo.ToLower ((char) i);
405 if ((opt & COpt.IgnoreKanaType) != 0)
406 i = Uni.ToKanaTypeInsensitive (i);
418 ExtenderType GetExtenderType (int i)
420 // LAMESPEC: Windows expects true for U+3005, but
421 // sometimes it does not represent to repeat just
423 // Windows also expects true for U+3031 and U+3032,
424 // but they should *never* repeat one character.
426 // U+2015 becomes an extender only when it is Japanese
428 return lcid == 16 ? ExtenderType.Conditional :
431 if (i < 0x3005 || i > 0xFF70)
432 return ExtenderType.None;
437 return ExtenderType.Simple;
439 return ExtenderType.Conditional;
442 return ExtenderType.Voiced;
446 return ExtenderType.None;
448 case 0x3005: // LAMESPEC: see above.
449 return ExtenderType.Buggy;
450 case 0x3031: // LAMESPEC: see above.
451 case 0x3032: // LAMESPEC: see above.
454 return ExtenderType.Simple;
457 return ExtenderType.Voiced;
459 return ExtenderType.Conditional;
461 return ExtenderType.None;
465 static byte ToDashTypeValue (ExtenderType ext, COpt opt)
467 if ((opt & COpt.IgnoreNonSpace) != 0) // LAMESPEC: huh, why?
470 case ExtenderType.None:
472 case ExtenderType.Conditional:
479 int FilterExtender (int i, ExtenderType ext, COpt opt)
481 if (ext == ExtenderType.Conditional &&
482 Uni.HasSpecialWeight ((char) i)) {
483 bool half = IsHalfKana ((char) i, opt);
484 bool katakana = !Uni.IsHiragana ((char) i);
485 switch (Level1 (i) & 7) {
487 return half ? 0xFF71 : katakana ? 0x30A2 : 0x3042;
489 return half ? 0xFF72 : katakana ? 0x30A4 : 0x3044;
491 return half ? 0xFF73 : katakana ? 0x30A6 : 0x3046;
493 return half ? 0xFF74 : katakana ? 0x30A8 : 0x3048;
495 return half ? 0xFF75 : katakana ? 0x30AA : 0x304A;
501 static bool IsIgnorable (int i, COpt opt)
503 return Uni.IsIgnorable (i, (byte) (1 +
504 ((opt & COpt.IgnoreSymbols) != 0 ? 2 : 0) +
505 ((opt & COpt.IgnoreNonSpace) != 0 ? 4 : 0)));
511 return i / 8 >= unsafeFlags.Length ? true : (unsafeFlags [i / 8] & (1 << (i % 8))) == 0;
516 public SortKey GetSortKey (string s)
518 return GetSortKey (s, CompareOptions.None);
521 public SortKey GetSortKey (string s, CompareOptions options)
523 return GetSortKey (s, 0, s.Length, options);
526 public SortKey GetSortKey (string s, int start, int length, CompareOptions options)
528 SortKeyBuffer buf = new SortKeyBuffer (lcid);
529 buf.Initialize (options, lcid, s, frenchSort);
530 int end = start + length;
531 GetSortKey (s, start, end, buf, options);
532 return buf.GetResultAndReset ();
535 unsafe void GetSortKey (string s, int start, int end,
536 SortKeyBuffer buf, CompareOptions opt)
538 byte* prevbuf = stackalloc byte [4];
539 ClearBuffer (prevbuf, 4);
540 Context ctx = new Context (opt, null, null, null, null, prevbuf, false);
542 for (int n = start; n < end; n++) {
545 ExtenderType ext = GetExtenderType (i);
546 if (ext != ExtenderType.None) {
547 i = FilterExtender (ctx.PrevCode, ext, opt);
549 FillSortKeyRaw (i, ext, buf, opt);
550 else if (ctx.PrevSortKey != null) {
551 byte* b = ctx.PrevSortKey;
555 b [2] != 1 ? b [2] : Level2 (i, ext),
556 b [3] != 1 ? b [3] : Uni.Level3 (i));
558 // otherwise do nothing.
559 // (if the extender is the first char
560 // in the string, then just ignore.)
564 if (IsIgnorable (i, opt))
566 i = FilterOptions (i, opt);
568 Contraction ct = GetContraction (s, n, end);
570 if (ct.Replacement != null) {
571 GetSortKey (ct.Replacement, 0, ct.Replacement.Length, buf, opt);
573 byte* b = ctx.PrevSortKey;
574 for (int bi = 0; bi < ct.SortKey.Length; bi++)
575 b [bi] = ct.SortKey [bi];
579 b [2] != 1 ? b [2] : Level2 (i, ext),
580 b [3] != 1 ? b [3] : Uni.Level3 (i));
583 n += ct.Source.Length - 1;
586 if (!Uni.IsIgnorableNonSpacing (i))
588 FillSortKeyRaw (i, ExtenderType.None, buf, opt);
593 void FillSortKeyRaw (int i, ExtenderType ext,
594 SortKeyBuffer buf, CompareOptions opt)
596 if (0x3400 <= i && i <= 0x4DB5) {
597 int diff = i - 0x3400;
598 buf.AppendCJKExtension (
599 (byte) (0x10 + diff / 254),
600 (byte) (diff % 254 + 2));
604 UnicodeCategory uc = char.GetUnicodeCategory ((char) i);
606 case UnicodeCategory.PrivateUse:
607 int diff = i - 0xE000;
609 (byte) (0xE5 + diff / 254),
610 (byte) (diff % 254 + 2),
614 case UnicodeCategory.Surrogate:
615 FillSurrogateSortKeyRaw (i, buf);
619 byte level2 = Level2 (i, ext);
620 if (Uni.HasSpecialWeight ((char) i)) {
621 byte level1 = Level1 (i);
627 Uni.IsJapaneseSmallLetter ((char) i),
628 ToDashTypeValue (ext, opt),
629 !Uni.IsHiragana ((char) i),
630 IsHalfKana ((char) i, opt)
632 if ((opt & COpt.IgnoreNonSpace) == 0 && ext == ExtenderType.Voiced)
633 // Append voice weight
634 buf.AppendNormal (1, 1, 1, 0);
644 void FillSurrogateSortKeyRaw (int i, SortKeyBuffer buf)
653 lower = (byte) ((i == 0xD800) ? 0x3E : 0x3F);
654 } else if (0xD840 <= i && i < 0xD880) {
658 } else if (0xDB80 <= i && i < 0xDC00) {
659 diffbase = 0xDB80 - 0x40;
663 diffbase = 0xDC00 - 0xF8 + 2;
667 int diff = i - diffbase;
670 (byte) (segment + diff / 254),
671 (byte) (diff % 254 + 2),
680 public int Compare (string s1, string s2)
682 return Compare (s1, s2, CompareOptions.None);
685 public int Compare (string s1, string s2, CompareOptions options)
687 return Compare (s1, 0, s1.Length, s2, 0, s2.Length, options);
690 private int CompareOrdinal (string s1, int idx1, int len1,
691 string s2, int idx2, int len2)
693 int min = len1 < len2 ? len1 : len2;
694 int end1 = idx1 + min;
695 int end2 = idx2 + min;
696 if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
697 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
698 for (int i1 = idx1, i2 = idx2;
699 i1 < end1 && i2 < end2; i1++, i2++)
700 if (s1 [i1] != s2 [i2])
701 return s1 [i1] - s2 [i2];
702 return len1 == len2 ? 0 :
703 len1 == min ? - 1 : 1;
706 // mostly equivalent to CompareOrdinal, but the return value is
707 // not based on codepoints.
708 private int CompareQuick (string s1, int idx1, int len1,
709 string s2, int idx2, int len2, out bool sourceConsumed,
710 out bool targetConsumed, bool immediateBreakup)
712 sourceConsumed = false;
713 targetConsumed = false;
714 int min = len1 < len2 ? len1 : len2;
715 int end1 = idx1 + min;
716 int end2 = idx2 + min;
717 if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
718 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
719 for (int i1 = idx1, i2 = idx2;
720 i1 < end1 && i2 < end2; i1++, i2++)
721 if (s1 [i1] != s2 [i2]) {
722 if (immediateBreakup)
724 int ret = Category (s1 [i1]) - Category (s2 [i2]);
726 ret = Level1 (s1 [i1]) - Level1 (s2 [i2]);
729 ret = Uni.Level3 (s1 [i1]) - Uni.Level3 (s2 [i2]);
731 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. '{0}' {2} {3} '{1}' {4} {5}", s1, s2, idx1, end1, idx2, end2));
734 sourceConsumed = len1 <= len2;
735 targetConsumed = len1 >= len2;
736 return len1 == len2 ? 0 :
737 len1 == min ? - 1 : 1;
740 private int CompareOrdinalIgnoreCase (string s1, int idx1, int len1,
741 string s2, int idx2, int len2)
743 int min = len1 < len2 ? len1 : len2;
744 int end1 = idx1 + min;
745 int end2 = idx2 + min;
746 if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
747 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
748 TextInfo ti = invariant.textInfo;
749 for (int i1 = idx1, i2 = idx2;
750 i1 < end1 && i2 < end2; i1++, i2++)
751 if (ti.ToLower (s1 [i1]) != ti.ToLower (s2 [i2]))
752 return ti.ToLower (s1 [i1]) - ti.ToLower (s2 [i2]);
753 return len1 == len2 ? 0 :
754 len1 == min ? - 1 : 1;
757 public unsafe int Compare (string s1, int idx1, int len1,
758 string s2, int idx2, int len2, CompareOptions options)
760 // quick equality check
761 if (idx1 == idx2 && len1 == len2 &&
762 Object.ReferenceEquals (s1, s2))
764 // FIXME: this should be done inside Compare() at
766 // int ord = CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
769 if (options == CompareOptions.Ordinal)
770 return CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
772 if (options == CompareOptions.OrdinalIgnoreCase)
773 return CompareOrdinalIgnoreCase (s1, idx1, len1, s2, idx2, len2);
776 #if false // stable easy version, depends on GetSortKey().
777 SortKey sk1 = GetSortKey (s1, idx1, len1, options);
778 SortKey sk2 = GetSortKey (s2, idx2, len2, options);
779 byte [] d1 = sk1.KeyData;
780 byte [] d2 = sk2.KeyData;
781 int len = d1.Length > d2.Length ? d2.Length : d1.Length;
782 for (int i = 0; i < len; i++)
783 if (d1 [i] != d2 [i])
784 return d1 [i] < d2 [i] ? -1 : 1;
785 return d1.Length == d2.Length ? 0 : d1.Length < d2.Length ? -1 : 1;
787 byte* sk1 = stackalloc byte [4];
788 byte* sk2 = stackalloc byte [4];
789 ClearBuffer (sk1, 4);
790 ClearBuffer (sk2, 4);
791 Context ctx = new Context (options, null, null, sk1, sk2, null,
792 QuickCheckPossible (s1, idx1, idx1 + len1, s2, idx2, idx2 + len2));
795 int ret = CompareInternal (s1, idx1, len1, s2, idx2, len2, out dummy, out dummy2, true, false, ref ctx);
796 return ret == 0 ? 0 : ret < 0 ? -1 : 1;
800 unsafe void ClearBuffer (byte* buffer, int size)
802 for (int i = 0; i < size; i++)
806 bool QuickCheckPossible (string s1, int idx1, int end1,
807 string s2, int idx2, int end2)
809 if (QuickCheckDisabled)
811 // if (s1.Length > 100 || s2.Length > 100)
813 for (int i = idx1; i < end1; i++)
814 if (s1 [i] < 0x20 && (s1 [i] < '\x9' || s1 [i] > '\xD') || s1 [i] >= 0x80 || s1 [i] == '-' || s1 [i] == '\'')
816 for (int i = idx2; i < end2; i++)
817 if (s2 [i] < 0x20 && (s2 [i] < '\x9' || s2 [i] > '\xD') || s2 [i] >= 0x80 || s2 [i] == '-' || s2 [i] == '\'')
822 unsafe int CompareInternal (string s1, int idx1, int len1, string s2,
824 out bool targetConsumed, out bool sourceConsumed,
825 bool skipHeadingExtenders, bool immediateBreakup,
828 COpt opt = ctx.Option;
831 int end1 = idx1 + len1;
832 int end2 = idx2 + len2;
833 targetConsumed = false;
834 sourceConsumed = false;
835 PreviousInfo prev2 = new PreviousInfo (false);
837 if (opt == CompareOptions.None && ctx.QuickCheckPossible)
838 return CompareQuick (s1, idx1, len1, s2, idx2, len2, out sourceConsumed, out targetConsumed, immediateBreakup);
840 // It holds final result that comes from the comparison
841 // at level 2 or lower. Even if Compare() found the
842 // difference at level 2 or lower, it still has to
843 // continue level 1 comparison. FinalResult is used
844 // when there was no further differences.
846 // It holds the comparison level to do. It starts from
847 // 5, and becomes 1 when we do primary-only comparison.
848 int currentLevel = 5;
855 // Skip heading extenders
856 if (skipHeadingExtenders) {
857 for (; idx1 < end1; idx1++)
858 if (GetExtenderType (s1 [idx1]) == ExtenderType.None)
860 for (; idx2 < end2; idx2++)
861 if (GetExtenderType (s2 [idx2]) == ExtenderType.None)
865 ExtenderType ext1 = ExtenderType.None;
866 ExtenderType ext2 = ExtenderType.None;
868 int quickCheckPos1 = idx1;
869 int quickCheckPos2 = idx2;
870 bool stringSort = (opt & COpt.StringSort) != 0;
871 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
872 Escape escape1 = new Escape ();
873 Escape escape2 = new Escape ();
876 for (; idx1 < end1; idx1++)
877 if (!IsIgnorable (s1 [idx1], opt))
879 for (; idx2 < end2; idx2++)
880 if (!IsIgnorable (s2 [idx2], opt))
884 if (escape1.Source == null)
887 start1 = escape1.Start;
888 idx1 = escape1.Index;
890 quickCheckPos1 = escape1.Optional;
891 escape1.Source = null;
895 if (escape2.Source == null)
898 start2 = escape2.Start;
899 idx2 = escape2.Index;
901 quickCheckPos2 = escape2.Optional;
902 escape2.Source = null;
906 // If comparison is unstable, then this part is one of the most doubtful part.
907 // Here we run quick codepoint comparison and run back to "stable" area to
908 // compare characters.
910 // Strictly to say, even the first character
911 // could be compared here, but it messed
912 // backward step, so I just avoided mess.
913 if (quickCheckPos1 < idx1 && quickCheckPos2 < idx2) {
914 while (idx1 < end1 && idx2 < end2 &&
915 s1 [idx1] == s2 [idx2]) {
919 if (idx1 == end1 || idx2 == end2)
920 continue; // check replacement
922 int backwardEnd1 = quickCheckPos1;
923 int backwardEnd2 = quickCheckPos2;
924 quickCheckPos1 = idx1;
925 quickCheckPos2 = idx2;
930 for (;idx1 > backwardEnd1; idx1--)
931 if (Category (s1 [idx1]) != 1)
933 for (;idx2 > backwardEnd2; idx2--)
934 if (Category (s2 [idx2]) != 1)
936 for (;idx1 > backwardEnd1; idx1--)
937 if (IsSafe (s1 [idx1]))
939 for (;idx2 > backwardEnd2; idx2--)
940 if (IsSafe (s2 [idx2]))
949 int i1 = FilterOptions (s1 [idx1], opt);
950 int i2 = FilterOptions (s2 [idx2], opt);
951 bool special1 = false;
952 bool special2 = false;
954 // If current character is an expander, then
955 // repeat the previous character.
956 ext1 = GetExtenderType (i1);
957 if (ext1 != ExtenderType.None) {
958 if (ctx.PrevCode < 0) {
959 if (ctx.PrevSortKey == null) {
964 sk1 = ctx.PrevSortKey;
967 i1 = FilterExtender (ctx.PrevCode, ext1, opt);
969 ext2 = GetExtenderType (i2);
970 if (ext2 != ExtenderType.None) {
971 if (prev2.Code < 0) {
972 if (prev2.SortKey == null) {
980 i2 = FilterExtender (prev2.Code, ext2, opt);
983 byte cat1 = Category (i1);
984 byte cat2 = Category (i2);
986 // Handle special weight characters
987 if (!stringSort && currentLevel > 4) {
989 lv5At1 = escape1.Source != null ?
990 escape1.Index - escape1.Start :
992 // here Windows has a bug that it does
993 // not consider thirtiary weight.
994 lv5Value1 = Level1 (i1) << 8 + Uni.Level3 (i1);
999 lv5At2 = escape2.Source != null ?
1000 escape2.Index - escape2.Start :
1002 // here Windows has a bug that it does
1003 // not consider thirtiary weight.
1004 lv5Value2 = Level1 (i2) << 8 + Uni.Level3 (i2);
1008 if (cat1 == 6 || cat2 == 6) {
1014 Contraction ct1 = null;
1015 if (ext1 == ExtenderType.None)
1016 ct1 = GetContraction (s1, idx1, end1);
1021 else if (ct1 != null) {
1022 offset1 = ct1.Source.Length;
1023 if (ct1.SortKey != null) {
1025 for (int i = 0; i < ct1.SortKey.Length; i++)
1026 sk1 [i] = ct1.SortKey [i];
1028 ctx.PrevSortKey = sk1;
1030 else if (escape1.Source == null) {
1031 escape1.Source = s1;
1032 escape1.Start = start1;
1033 escape1.Index = cur1 + ct1.Source.Length;
1035 escape1.Optional = quickCheckPos1;
1036 s1 = ct1.Replacement;
1047 sk1 [1] = Level1 (i1);
1048 if (!ignoreNonSpace && currentLevel > 1)
1049 sk1 [2] = Level2 (i1, ext1);
1050 if (currentLevel > 2)
1051 sk1 [3] = Uni.Level3 (i1);
1052 if (currentLevel > 3)
1053 special1 = Uni.HasSpecialWeight ((char) i1);
1058 Contraction ct2 = null;
1059 if (ext2 == ExtenderType.None)
1060 ct2 = GetContraction (s2, idx2, end2);
1064 else if (ct2 != null) {
1065 idx2 += ct2.Source.Length;
1066 if (ct2.SortKey != null) {
1068 for (int i = 0; i < ct2.SortKey.Length; i++)
1069 sk2 [i] = ct2.SortKey [i];
1071 prev2.SortKey = sk2;
1073 else if (escape2.Source == null) {
1074 escape2.Source = s2;
1075 escape2.Start = start2;
1076 escape2.Index = cur2 + ct2.Source.Length;
1078 escape2.Optional = quickCheckPos2;
1079 s2 = ct2.Replacement;
1090 sk2 [1] = Level1 (i2);
1091 if (!ignoreNonSpace && currentLevel > 1)
1092 sk2 [2] = Level2 (i2, ext2);
1093 if (currentLevel > 2)
1094 sk2 [3] = Uni.Level3 (i2);
1095 if (currentLevel > 3)
1096 special2 = Uni.HasSpecialWeight ((char) i2);
1102 // Add offset here so that it does not skip
1103 // idx1 while s2 has a replacement.
1106 // add diacritical marks in s1 here
1107 if (!ignoreNonSpace) {
1108 while (idx1 < end1) {
1109 if (Category (s1 [idx1]) != 1)
1113 sk1 [2] = (byte) (sk1 [2] +
1114 Level2 (s1 [idx1], ExtenderType.None));
1118 // add diacritical marks in s2 here
1119 while (idx2 < end2) {
1120 if (Category (s2 [idx2]) != 1)
1124 sk2 [2] = (byte) (sk2 [2] +
1125 Level2 (s2 [idx2], ExtenderType.None));
1130 int ret = sk1 [0] - sk2 [0];
1131 ret = ret != 0 ? ret : sk1 [1] - sk2 [1];
1134 if (currentLevel == 1)
1136 if (!ignoreNonSpace) {
1137 ret = sk1 [2] - sk2 [2];
1140 if (immediateBreakup)
1141 return -1; // different
1142 currentLevel = frenchSort ? 2 : 1;
1146 if (currentLevel == 2)
1148 ret = sk1 [3] - sk2 [3];
1151 if (immediateBreakup)
1152 return -1; // different
1156 if (currentLevel == 3)
1158 if (special1 != special2) {
1159 if (immediateBreakup)
1160 return -1; // different
1161 finalResult = special1 ? 1 : -1;
1166 ret = CompareFlagPair (
1167 !Uni.IsJapaneseSmallLetter ((char) i1),
1168 !Uni.IsJapaneseSmallLetter ((char) i2));
1169 ret = ret != 0 ? ret :
1170 ToDashTypeValue (ext1, opt) -
1171 ToDashTypeValue (ext2, opt);
1172 ret = ret != 0 ? ret : CompareFlagPair (
1173 Uni.IsHiragana ((char) i1),
1174 Uni.IsHiragana ((char) i2));
1175 ret = ret != 0 ? ret : CompareFlagPair (
1176 !IsHalfKana ((char) i1, opt),
1177 !IsHalfKana ((char) i2, opt));
1179 if (immediateBreakup)
1180 return -1; // different
1188 // If there were only level 3 or lower differences,
1189 // then we still have to find diacritical differences
1191 if (!ignoreNonSpace &&
1192 finalResult != 0 && currentLevel > 2) {
1193 while (idx1 < end1 && idx2 < end2) {
1194 if (!Uni.IsIgnorableNonSpacing (s1 [idx1]))
1196 if (!Uni.IsIgnorableNonSpacing (s2 [idx2]))
1198 finalResult = Level2 (FilterOptions (s1 [idx1], opt), ext1) - Level2 (FilterOptions (s2 [idx2], opt), ext2);
1199 if (finalResult != 0)
1203 // they should work only for the first character
1204 ext1 = ExtenderType.None;
1205 ext2 = ExtenderType.None;
1208 if (currentLevel == 1 && finalResult != 0) {
1209 while (idx1 < end1) {
1210 if (Uni.IsIgnorableNonSpacing (s1 [idx1]))
1215 while (idx2 < end2) {
1216 if (Uni.IsIgnorableNonSpacing (s2 [idx2]))
1222 // we still have to handle level 5
1223 if (finalResult == 0) {
1224 finalResult = lv5At1 - lv5At2;
1225 if (finalResult == 0)
1226 finalResult = lv5Value1 - lv5Value2;
1228 if (finalResult == 0) {
1230 targetConsumed = true;
1232 sourceConsumed = true;
1234 return idx1 != end1 ? 1 : idx2 == end2 ? finalResult : -1;
1237 int CompareFlagPair (bool b1, bool b2)
1239 return b1 == b2 ? 0 : b1 ? 1 : -1;
1244 #region IsPrefix() and IsSuffix()
1246 public bool IsPrefix (string src, string target, CompareOptions opt)
1248 return IsPrefix (src, target, 0, src.Length, opt);
1251 public unsafe bool IsPrefix (string s, string target, int start, int length, CompareOptions opt)
1253 if (target.Length == 0)
1255 byte* sk1 = stackalloc byte [4];
1256 byte* sk2 = stackalloc byte [4];
1257 ClearBuffer (sk1, 4);
1258 ClearBuffer (sk2, 4);
1259 Context ctx = new Context (opt, null, null, sk1, sk2, null,
1260 QuickCheckPossible (s, start, start + length, target, 0, target.Length));
1261 return IsPrefix (s, target, start, length, true, ref ctx);
1264 unsafe bool IsPrefix (string s, string target, int start, int length, bool skipHeadingExtenders, ref Context ctx)
1266 bool consumed, dummy;
1267 CompareInternal (s, start, length,
1268 target, 0, target.Length,
1269 out consumed, out dummy, skipHeadingExtenders,
1276 public bool IsSuffix (string src, string target, CompareOptions opt)
1278 return IsSuffix (src, target, src.Length - 1, src.Length, opt);
1281 public unsafe bool IsSuffix (string s, string target, int start, int length, CompareOptions opt)
1283 if (target.Length == 0)
1285 int last = LastIndexOf (s, target, start, length, opt);
1286 return last >= 0 && Compare (s, last, s.Length - last, target, 0, target.Length, opt) == 0;
1288 // quick check : simple codepoint comparison
1289 if (s.Length >= target.Length) {
1291 int se = start - length;
1292 for (int i = target.Length - 1; si >= se && i >= 0; i--, si--)
1293 if (s [si] != target [i])
1295 if (si == start + target.Length)
1299 PreviousInfo prev = new PreviousInfo (false);
1300 byte* sk1 = stackalloc byte [4];
1301 byte* sk2 = stackalloc byte [4];
1302 ClearBuffer (sk1, 4);
1303 ClearBuffer (sk2, 4);
1304 return IsSuffix (opt, s, target, start, length, ref prev, sk1, sk2);
1308 unsafe bool IsSuffix (string s, string t, int start, int length, ref Context ctx)
1311 COpt opt = ctx.Option;
1313 for (;tstart < t.Length; tstart++)
1314 if (!IsIgnorable (t [tstart], opt))
1316 if (tstart == t.Length)
1317 return true; // as if target is String.Empty.
1320 // This is still not working. If it is not likely to get working, then just remove it.
1322 int send = start - length;
1323 int ti = t.Length - 1;
1326 int sStep = start + 1;
1327 int tStep = t.Length;
1328 bool sourceConsumed, targetConsumed;
1330 for (; send < si; si--)
1331 if (!IsIgnorable (s [si]))
1333 for (; tend < ti; ti--)
1334 if (!IsIgnorable (t [ti]))
1338 for (; send < si; si--)
1339 if (IsSafe (s [si]))
1341 for (; tend < ti; ti--)
1342 if (IsSafe (t [ti]))
1344 Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, length, t, ti, tstart, sStep - si, tStep - ti);
1345 if (CompareInternal (opt, s, si, sStep - si,
1347 out targetConsumed, out sourceConsumed,
1359 // FIXME: it is not efficient for very long target.
1360 bool sourceConsumed, targetConsumed;
1361 int mismatchCount = 0;
1362 for (int i = 0; i < length; i++) {
1363 ctx.ClearPrevInfo ();
1365 int ret = CompareInternal (s, start - i, i + 1,
1366 t, tstart, t.Length - tstart,
1368 // FIXME: could immediately breakup
1369 out sourceConsumed, true, true, ref ctx);
1372 if (!sourceConsumed && targetConsumed)
1374 if (!targetConsumed) {
1376 if (mismatchCount > Uni.MaxExpansionLength)
1377 // The largest length of an
1378 // expansion is 3, so if the
1379 // target was not consumed more
1380 // than 3 times, then the tail
1381 // character does not match.
1391 #region IndexOf() / LastIndexOf()
1393 public int IndexOf (string s, string target, CompareOptions opt)
1395 return IndexOf (s, target, 0, s.Length, opt);
1398 int QuickIndexOf (string s, string target, int start, int length, out bool testWasUnable)
1400 int testedSourcePos = -1, testedTargetPos = -1;
1402 testWasUnable = true;
1403 if (target.Length == 0)
1405 else if (target.Length > length)
1407 testWasUnable = false;
1409 int end = start + length - target.Length + 1;
1410 for (int i = start; i < end; i++) {
1412 for (int j = 0; j < target.Length; j++) {
1413 if (testedTargetPos < j) {
1414 if (target [j] >= 0x80) {
1415 testWasUnable = true;
1419 testedTargetPos = j;
1421 if (testedSourcePos < i + j) {
1422 if (s [i + j] >= 0x80) {
1423 testWasUnable = true;
1427 testedSourcePos = i + j;
1429 if (s [i + j] != target [j]) {
1441 public unsafe int IndexOf (string s, string target, int start, int length, CompareOptions opt)
1443 if (opt == CompareOptions.None) {
1445 int ret = QuickIndexOf (s, target, start, length, out testWasUnable);
1450 byte* alwaysMatchFlags = stackalloc byte [16];
1451 byte* neverMatchFlags = stackalloc byte [16];
1452 byte* targetSortKey = stackalloc byte [4];
1453 byte* sk1 = stackalloc byte [4];
1454 byte* sk2 = stackalloc byte [4];
1455 ClearBuffer (alwaysMatchFlags, 16);
1456 ClearBuffer (neverMatchFlags, 16);
1457 ClearBuffer (targetSortKey, 4);
1458 ClearBuffer (sk1, 4);
1459 ClearBuffer (sk2, 4);
1460 Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1462 return IndexOf (s, target, start, length,
1463 targetSortKey, ref ctx);
1466 public int IndexOf (string s, char target, CompareOptions opt)
1468 return IndexOf (s, target, 0, s.Length, opt);
1471 public unsafe int IndexOf (string s, char target, int start, int length, CompareOptions opt)
1473 byte* alwaysMatchFlags = stackalloc byte [16];
1474 byte* neverMatchFlags = stackalloc byte [16];
1475 byte* targetSortKey = stackalloc byte [4];
1476 byte* sk1 = stackalloc byte [4];
1477 byte* sk2 = stackalloc byte [4];
1478 ClearBuffer (alwaysMatchFlags, 16);
1479 ClearBuffer (neverMatchFlags, 16);
1480 ClearBuffer (targetSortKey, 4);
1481 ClearBuffer (sk1, 4);
1482 ClearBuffer (sk2, 4);
1483 Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1485 // If target is contraction, then use string search.
1486 Contraction ct = GetContraction (target);
1488 if (ct.Replacement != null)
1489 return IndexOf (s, ct.Replacement,
1490 start, length, targetSortKey, ref ctx);
1492 for (int i = 0; i < ct.SortKey.Length; i++)
1493 sk2 [i] = ct.SortKey [i];
1494 return IndexOfSortKey (s, start, length, sk2, char.MinValue, -1, true, ref ctx);
1497 int ti = FilterOptions ((int) target, opt);
1498 targetSortKey [0] = Category (ti);
1499 targetSortKey [1] = Level1 (ti);
1500 if ((opt & COpt.IgnoreNonSpace) == 0)
1502 Level2 (ti, ExtenderType.None);
1503 targetSortKey [3] = Uni.Level3 (ti);
1504 return IndexOfSortKey (s, start, length,
1505 targetSortKey, target, ti,
1506 !Uni.HasSpecialWeight ((char) ti), ref ctx);
1510 // Searches target byte[] keydata
1511 unsafe int IndexOfSortKey (string s, int start, int length, byte* sortkey, char target, int ti, bool noLv4, ref Context ctx)
1513 int end = start + length;
1518 if (MatchesForward (s, ref idx, end, ti, sortkey, noLv4, ref ctx))
1524 // Searches string. Search head character (or keydata when
1525 // the head is contraction sortkey) and try IsPrefix().
1526 unsafe int IndexOf (string s, string target, int start, int length, byte* targetSortKey, ref Context ctx)
1528 COpt opt = ctx.Option;
1530 for (; tidx < target.Length; tidx++)
1531 if (!IsIgnorable (target [tidx], opt))
1533 if (tidx == target.Length)
1535 Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1536 string replace = ct != null ? ct.Replacement : null;
1537 byte* sk = replace == null ? targetSortKey : null;
1539 char tc = char.MinValue;
1541 if (ct != null && sk != null) {
1542 for (int i = 0; i < ct.SortKey.Length; i++)
1543 sk [i] = ct.SortKey [i];
1544 } else if (sk != null) {
1546 ti = FilterOptions (target [tidx], opt);
1547 sk [0] = Category (ti);
1548 sk [1] = Level1 (ti);
1549 if ((opt & COpt.IgnoreNonSpace) == 0)
1550 sk [2] = Level2 (ti, ExtenderType.None);
1551 sk [3] = Uni.Level3 (ti);
1552 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1555 for (tidx++; tidx < target.Length; tidx++) {
1556 if (Category (target [tidx]) != 1)
1560 sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1566 if (replace != null)
1567 idx = IndexOf (s, replace, start, length, targetSortKey, ref ctx);
1569 idx = IndexOfSortKey (s, start, length, sk, tc, ti, noLv4, ref ctx);
1572 length -= idx - start;
1574 if (IsPrefix (s, target, start, length, false, ref ctx))
1576 Contraction cts = GetContraction (s, start, length);
1578 start += cts.Source.Length;
1579 length -= cts.Source.Length;
1584 } while (length > 0);
1589 // There are the same number of IndexOf() related methods,
1590 // with the same functionalities for each.
1593 public int LastIndexOf (string s, string target, CompareOptions opt)
1595 return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1598 public unsafe int LastIndexOf (string s, string target, int start, int length, CompareOptions opt)
1600 byte* alwaysMatchFlags = stackalloc byte [16];
1601 byte* neverMatchFlags = stackalloc byte [16];
1602 byte* targetSortKey = stackalloc byte [4];
1603 byte* sk1 = stackalloc byte [4];
1604 byte* sk2 = stackalloc byte [4];
1605 ClearBuffer (alwaysMatchFlags, 16);
1606 ClearBuffer (neverMatchFlags, 16);
1607 ClearBuffer (targetSortKey, 4);
1608 ClearBuffer (sk1, 4);
1609 ClearBuffer (sk2, 4);
1610 // For some unknown reason CompareQuick() does not work fine w/ LastIndexOf().
1611 Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1612 return LastIndexOf (s, target, start, length,
1613 targetSortKey, ref ctx);
1616 public int LastIndexOf (string s, char target, CompareOptions opt)
1618 return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1621 public unsafe int LastIndexOf (string s, char target, int start, int length, CompareOptions opt)
1623 byte* alwaysMatchFlags = stackalloc byte [16];
1624 byte* neverMatchFlags = stackalloc byte [16];
1625 byte* targetSortKey = stackalloc byte [4];
1626 byte* sk1 = stackalloc byte [4];
1627 byte* sk2 = stackalloc byte [4];
1628 ClearBuffer (alwaysMatchFlags, 16);
1629 ClearBuffer (neverMatchFlags, 16);
1630 ClearBuffer (targetSortKey, 4);
1631 ClearBuffer (sk1, 4);
1632 ClearBuffer (sk2, 4);
1633 Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1635 // If target is a replacement contraction, then use
1637 Contraction ct = GetContraction (target);
1639 if (ct.Replacement != null)
1640 return LastIndexOf (s,
1641 ct.Replacement, start, length,
1642 targetSortKey, ref ctx);
1644 for (int bi = 0; bi < ct.SortKey.Length; bi++)
1645 sk2 [bi] = ct.SortKey [bi];
1646 return LastIndexOfSortKey (s, start,
1652 int ti = FilterOptions ((int) target, opt);
1653 targetSortKey [0] = Category (ti);
1654 targetSortKey [1] = Level1 (ti);
1655 if ((opt & COpt.IgnoreNonSpace) == 0)
1656 targetSortKey [2] = Level2 (ti, ExtenderType.None);
1657 targetSortKey [3] = Uni.Level3 (ti);
1658 return LastIndexOfSortKey (s, start, start,
1659 length, targetSortKey,
1660 ti, !Uni.HasSpecialWeight ((char) ti),
1665 // Searches target byte[] keydata
1666 unsafe int LastIndexOfSortKey (string s, int start, int orgStart, int length, byte* sortkey, int ti, bool noLv4, ref Context ctx)
1668 int end = start - length;
1672 if (MatchesBackward (s, ref idx, end, orgStart,
1673 ti, sortkey, noLv4, ref ctx))
1679 // Searches string. Search head character (or keydata when
1680 // the head is contraction sortkey) and try IsPrefix().
1681 unsafe int LastIndexOf (string s, string target, int start, int length, byte* targetSortKey, ref Context ctx)
1683 COpt opt = ctx.Option;
1684 int orgStart = start;
1686 for (; tidx < target.Length; tidx++)
1687 if (!IsIgnorable (target [tidx], opt))
1689 if (tidx == target.Length)
1691 Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1692 string replace = ct != null ? ct.Replacement : null;
1693 byte* sk = replace == null ? targetSortKey : null;
1697 if (ct != null && sk != null) {
1698 for (int i = 0; i < ct.SortKey.Length; i++)
1699 sk [i] = ct.SortKey [i];
1700 } else if (sk != null) {
1701 ti = FilterOptions (target [tidx], opt);
1702 sk [0] = Category (ti);
1703 sk [1] = Level1 (ti);
1704 if ((opt & COpt.IgnoreNonSpace) == 0)
1705 sk [2] = Level2 (ti, ExtenderType.None);
1706 sk [3] = Uni.Level3 (ti);
1707 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1710 for (tidx++; tidx < target.Length; tidx++) {
1711 if (Category (target [tidx]) != 1)
1715 sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1722 if (replace != null)
1723 idx = LastIndexOf (s, replace,
1725 targetSortKey, ref ctx);
1727 idx = LastIndexOfSortKey (s, start, orgStart, length, sk, ti, noLv4, ref ctx);
1730 length -= start - idx;
1733 if (IsPrefix (s, target, idx, orgStart - idx + 1, false, ref ctx)) {
1734 for (;idx < orgStart; idx++)
1735 if (!IsIgnorable (s [idx], opt))
1739 Contraction cts = GetContraction (s, idx, orgStart - idx + 1);
1741 start -= cts.Source.Length;
1742 length -= cts.Source.Length;
1747 } while (length > 0);
1751 unsafe bool MatchesForward (string s, ref int idx, int end, int ti, byte* sortkey, bool noLv4, ref Context ctx)
1754 if (ctx.AlwaysMatchFlags != null && si < 128 && (ctx.AlwaysMatchFlags [si / 8] & (1 << (si % 8))) != 0)
1756 if (ctx.NeverMatchFlags != null &&
1758 (ctx.NeverMatchFlags [si / 8] & (1 << (si % 8))) != 0) {
1762 ExtenderType ext = GetExtenderType (s [idx]);
1763 Contraction ct = null;
1764 if (MatchesForwardCore (s, ref idx, end, ti, sortkey, noLv4, ext, ref ct, ref ctx)) {
1765 if (ctx.AlwaysMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
1766 ctx.AlwaysMatchFlags [si / 8] |= (byte) (1 << (si % 8));
1769 if (ctx.NeverMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
1770 ctx.NeverMatchFlags [si / 8] |= (byte) (1 << (si % 8));
1774 unsafe bool MatchesForwardCore (string s, ref int idx, int end, int ti, byte* sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, ref Context ctx)
1776 COpt opt = ctx.Option;
1777 byte* charSortKey = ctx.Buffer1;
1778 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
1780 if (ext == ExtenderType.None)
1781 ct = GetContraction (s, idx, end);
1782 else if (ctx.PrevCode < 0) {
1783 if (ctx.PrevSortKey == null) {
1787 charSortKey = ctx.PrevSortKey;
1790 si = FilterExtender (ctx.PrevCode, ext, opt);
1791 // if lv4 exists, it never matches contraction
1793 idx += ct.Source.Length;
1796 if (ct.SortKey != null) {
1797 for (int i = 0; i < 4; i++)
1798 charSortKey [i] = sortkey [i];
1800 ctx.PrevSortKey = charSortKey;
1802 // Here is the core of LAMESPEC
1803 // described at the top of the source.
1805 return MatchesForward (ct.Replacement, ref dummy,
1806 ct.Replacement.Length, ti, sortkey, noLv4, ref ctx);
1810 si = FilterOptions (s [idx], opt);
1812 charSortKey [0] = Category (si);
1813 bool noMatch = false;
1814 if (sortkey [0] == charSortKey [0])
1815 charSortKey [1] = Level1 (si);
1818 if (!ignoreNonSpace && sortkey [1] == charSortKey [1])
1819 charSortKey [2] = Level2 (si, ext);
1820 else if (!ignoreNonSpace)
1823 for (; idx < end; idx++) {
1824 if (Category (s [idx]) != 1)
1829 charSortKey [3] = Uni.Level3 (si);
1830 if (charSortKey [0] != 1)
1833 for (; idx < end; idx++) {
1834 if (Category (s [idx]) != 1)
1838 if (charSortKey [2] == 0)
1839 charSortKey [2] = 2;
1840 charSortKey [2] = (byte) (charSortKey [2]
1841 + Level2 (s [idx], ExtenderType.None));
1844 return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4);
1847 unsafe bool MatchesPrimitive (COpt opt, byte* source, int si, ExtenderType ext, byte* target, int ti, bool noLv4)
1849 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
1850 if (source [0] != target [0] ||
1851 source [1] != target [1] ||
1852 (!ignoreNonSpace && source [2] != target [2]) ||
1853 source [3] != target [3])
1855 if (noLv4 && (si < 0 || !Uni.HasSpecialWeight ((char) si)))
1859 // Since target can never be an extender, if the source
1860 // is an expander and it matters, then they never match.
1861 if (!ignoreNonSpace && ext == ExtenderType.Conditional)
1863 if (Uni.IsJapaneseSmallLetter ((char) si) !=
1864 Uni.IsJapaneseSmallLetter ((char) ti) ||
1865 ToDashTypeValue (ext, opt) !=
1866 // FIXME: we will have to specify correct value for target
1867 ToDashTypeValue (ExtenderType.None, opt) ||
1868 !Uni.IsHiragana ((char) si) !=
1869 !Uni.IsHiragana ((char) ti) ||
1870 IsHalfKana ((char) si, opt) !=
1871 IsHalfKana ((char) ti, opt))
1876 unsafe bool MatchesBackward (string s, ref int idx, int end, int orgStart, int ti, byte* sortkey, bool noLv4, ref Context ctx)
1879 if (ctx.AlwaysMatchFlags != null && si < 128 && (ctx.AlwaysMatchFlags [si / 8] & (1 << (si % 8))) != 0)
1881 if (ctx.NeverMatchFlags != null && si < 128 && (ctx.NeverMatchFlags [si / 8] & (1 << (si % 8))) != 0) {
1885 ExtenderType ext = GetExtenderType (s [idx]);
1886 Contraction ct = null;
1887 if (MatchesBackwardCore (s, ref idx, end, orgStart, ti, sortkey, noLv4, ext, ref ct, ref ctx)) {
1888 if (ctx.AlwaysMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
1889 ctx.AlwaysMatchFlags [si / 8] |= (byte) (1 << (si % 8));
1892 if (ctx.NeverMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128) {
1893 ctx.NeverMatchFlags [si / 8] |= (byte) (1 << (si % 8));
1898 unsafe bool MatchesBackwardCore (string s, ref int idx, int end, int orgStart, int ti, byte* sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, ref Context ctx)
1900 COpt opt = ctx.Option;
1901 byte* charSortKey = ctx.Buffer1;
1902 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
1905 // To handle extenders in source, we need to
1906 // check next _primary_ character.
1907 if (ext != ExtenderType.None) {
1908 byte diacritical = 0;
1909 for (int tmp = 0; ; tmp--) {
1910 if (tmp < 0) // heading extender
1912 if (IsIgnorable (s [tmp], opt))
1914 int tmpi = FilterOptions (s [tmp], opt);
1915 byte category = Category (tmpi);
1916 if (category == 1) {
1917 diacritical = Level2 (tmpi, ExtenderType.None);
1920 si = FilterExtender (tmpi, ext, opt);
1921 charSortKey [0] = category;
1922 charSortKey [1] = Level1 (si);
1923 if (!ignoreNonSpace)
1924 charSortKey [2] = Level2 (si, ext);
1925 charSortKey [3] = Uni.Level3 (si);
1926 if (ext != ExtenderType.Conditional &&
1929 (charSortKey [2] == 0) ?
1930 (byte) (diacritical + 2) :
1936 if (ext == ExtenderType.None)
1937 ct = GetTailContraction (s, idx, end);
1938 // if lv4 exists, it never matches contraction
1940 idx -= ct.Source.Length;
1943 if (ct.SortKey != null) {
1944 for (int i = 0; i < 4; i++)
1945 charSortKey [i] = sortkey [i];
1947 ctx.PrevSortKey = charSortKey;
1949 // Here is the core of LAMESPEC
1950 // described at the top of the source.
1951 int dummy = ct.Replacement.Length - 1;
1952 return 0 <= LastIndexOfSortKey (
1953 ct.Replacement, dummy, dummy,
1954 ct.Replacement.Length, sortkey,
1955 ti, noLv4, ref ctx);
1957 } else if (ext == ExtenderType.None) {
1959 si = FilterOptions (s [idx], opt);
1961 bool noMatch = false;
1962 charSortKey [0] = Category (si);
1963 if (charSortKey [0] == sortkey [0])
1964 charSortKey [1] = Level1 (si);
1967 if (!ignoreNonSpace && charSortKey [1] == sortkey [1])
1968 charSortKey [2] = Level2 (si, ext);
1969 else if (!ignoreNonSpace)
1973 charSortKey [3] = Uni.Level3 (si);
1974 if (charSortKey [0] != 1)
1977 if (ext == ExtenderType.None) {
1978 for (int tmp = cur + 1; tmp < orgStart; tmp++) {
1979 if (Category (s [tmp]) != 1)
1983 if (charSortKey [2] == 0)
1984 charSortKey [2] = 2;
1985 charSortKey [2] = (byte) (charSortKey [2]
1986 + Level2 (s [tmp], ExtenderType.None));
1989 return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4);