2 // SimpleCollator.cs : the core collator implementation
5 // Atsushi Enomoto <atsushi@ximian.com>
7 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 // Here's a summary for supporting contractions, expansions and diacritical
33 // Diacritical mapping is a simple tailoring rule that "remaps" diacritical
34 // weight value from one to another. For now it applies to all range of
35 // characters, but at some stage we might need to limit the remapping ranges.
37 // A Contraction consists of a string (actually char[]) and a sortkey entry
38 // (i.e. byte[]). It indicates that a sequence of characters is interpreted
39 // as a single character that has the mapped sortkey value. There is no
40 // character which goes across "special rules". When the collator encountered
41 // such a sequence of characters, it just returns the sortkey value without
42 // further replacement.
44 // Since it is still likely to happen that a contraction sequence matches
45 // other character than the identical sequence (depending on CompareOptions
46 // and of course, defined sortkey value itself), comparison cannot be done
47 // at source char[] level.
49 // In IndexOf(), the first character in the target string or the target char
50 // itself is turned into sortkey bytes. If the character has a contraction and
51 // that is sortkey map, then it is used instead. If the contraction exists and
52 // that is replacement map, then the first character of the replacement string
53 // is searched instead. IndexOf() always searches only for the top character,
54 // and if it returned negative value, then it returns -1. Otherwise, it then
55 // tries IsPrefix() from that location. If it returns true, then it returns
58 // LAMESPEC: IndexOf() is lame as a whole API. It never matches in the middle
59 // of expansion and there is no proper way to return such indexes within
60 // a single int return value.
62 // For example, try below in .NET:
63 // IndexOf("\u00E6", "a")
64 // IndexOf("\u00E6", "e")
69 using System.Collections;
70 using System.Globalization;
72 using Uni = Mono.Globalization.Unicode.MSCompatUnicodeTable;
73 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
74 using COpt = System.Globalization.CompareOptions;
76 namespace Mono.Globalization.Unicode
78 internal class SimpleCollator
80 unsafe internal struct Context
82 public Context (CompareOptions opt, byte* alwaysMatchFlags, byte* neverMatchFlags, byte* buffer1, byte* buffer2, byte* prev1)
85 AlwaysMatchFlags = alwaysMatchFlags;
86 NeverMatchFlags = neverMatchFlags;
93 public readonly CompareOptions Option;
94 public readonly byte* NeverMatchFlags;
95 public readonly byte* AlwaysMatchFlags;
99 public byte* PrevSortKey;
101 public void ClearPrevInfo ()
108 unsafe struct PreviousInfo
111 public byte* SortKey;
113 public PreviousInfo (bool dummy)
122 public string Source;
129 static SimpleCollator invariant =
130 new SimpleCollator (CultureInfo.InvariantCulture);
132 readonly TextInfo textInfo; // for ToLower().
133 readonly bool frenchSort;
134 unsafe readonly byte* cjkCatTable;
135 unsafe readonly byte* cjkLv1Table;
136 readonly CodePointIndexer cjkIndexer;
137 unsafe readonly byte* cjkLv2Table;
138 readonly CodePointIndexer cjkLv2Indexer;
140 readonly Contraction [] contractions;
141 readonly Level2Map [] level2Maps;
143 // This flag marks characters as "unsafe", where the character
144 // could be used as part of a contraction (whose length > 1).
145 readonly byte [] unsafeFlags;
147 const int UnsafeFlagLength = 0x300 / 8;
149 // readonly byte [] contractionFlags = new byte [16];
151 // This array is internally used inside IndexOf() to store
152 // "no need to check" ASCII characters.
154 // Now that it should be thread safe, this array is allocated
156 // byte [] neverMatchFlags = new byte [128 / 8];
158 #region .ctor() and split functions
160 public SimpleCollator (CultureInfo culture)
163 textInfo = culture.TextInfo;
166 SetCJKTable (culture, ref cjkIndexer,
167 ref cjkCatTable, ref cjkLv1Table,
168 ref cjkLv2Indexer, ref cjkLv2Table);
171 // Get tailoring info
172 TailoringInfo t = null;
173 for (CultureInfo ci = culture; ci.LCID != 127; ci = ci.Parent) {
174 t = Uni.GetTailoringInfo (ci.LCID);
178 if (t == null) // then use invariant
179 t = Uni.GetTailoringInfo (127);
181 frenchSort = t.FrenchSort;
182 Uni.BuildTailoringTables (culture, t, ref contractions,
184 unsafeFlags = new byte [UnsafeFlagLength];
185 foreach (Contraction c in contractions) {
186 if (c.Source.Length > 1)
187 foreach (char ch in c.Source)
188 unsafeFlags [(int) ch / 8 ]
189 |= (byte) (1 << ((int) ch & 7));
190 // for (int i = 0; i < c.Source.Length; i++) {
191 // int ch = c.Source [i];
194 // contractionFlags [ch / 8] |= (byte) (1 << (ch & 7));
198 foreach (Contraction c in invariant.contractions) {
199 if (c.Source.Length > 1)
200 foreach (char ch in c.Source)
201 unsafeFlags [(int) ch / 8 ]
202 |= (byte) (1 << ((int) ch & 7));
203 // for (int i = 0; i < c.Source.Length; i++) {
204 // int ch = c.Source [i];
207 // contractionFlags [ch / 8] |= (byte) (1 << (ch & 7));
211 // FIXME: Since tailorings are mostly for latin
212 // (and in some cases Cyrillic) characters, it would
213 // be much better for performance to store "start
214 // indexes" for > 370 (culture-specific letters).
217 // dump tailoring table
218 Console.WriteLine ("******** building table for {0} : contractions - {1} diacritical - {2}",
219 culture.LCID, contractions.Length, level2Maps.Length);
220 foreach (Contraction c in contractions) {
221 foreach (char cc in c.Source)
222 Console.Write ("{0:X4} ", (int) cc);
223 Console.WriteLine (" -> '{0}'", c.Replacement);
228 unsafe private void SetCJKTable (
229 CultureInfo culture, ref CodePointIndexer cjkIndexer,
230 ref byte* catTable, ref byte* lv1Table,
231 ref CodePointIndexer lv2Indexer, ref byte* lv2Table)
233 string name = GetNeutralCulture (culture).Name;
235 Uni.FillCJK (name, ref cjkIndexer, ref catTable,
236 ref lv1Table, ref lv2Indexer, ref lv2Table);
239 static CultureInfo GetNeutralCulture (CultureInfo info)
241 CultureInfo ret = info;
242 while (ret.Parent != null && ret.Parent.LCID != 127)
249 unsafe byte Category (int cp)
251 if (cp < 0x3000 || cjkCatTable == null)
252 return Uni.Category (cp);
253 int idx = cjkIndexer.ToIndex (cp);
254 return idx < 0 ? Uni.Category (cp) : cjkCatTable [idx];
257 unsafe byte Level1 (int cp)
259 if (cp < 0x3000 || cjkLv1Table == null)
260 return Uni.Level1 (cp);
261 int idx = cjkIndexer.ToIndex (cp);
262 return idx < 0 ? Uni.Level1 (cp) : cjkLv1Table [idx];
265 unsafe byte Level2 (int cp, ExtenderType ext)
267 if (ext == ExtenderType.Buggy)
269 else if (ext == ExtenderType.Conditional)
272 if (cp < 0x3000 || cjkLv2Table == null)
273 return Uni.Level2 (cp);
274 int idx = cjkLv2Indexer.ToIndex (cp);
275 byte ret = idx < 0 ? (byte) 0 : cjkLv2Table [idx];
278 ret = Uni.Level2 (cp);
279 if (level2Maps.Length == 0)
281 for (int i = 0; i < level2Maps.Length; i++) {
282 if (level2Maps [i].Source == ret)
283 return level2Maps [i].Replace;
284 else if (level2Maps [i].Source > ret)
290 static bool IsHalfKana (int cp, COpt opt)
292 return (opt & COpt.IgnoreWidth) != 0 ||
293 Uni.IsHalfWidthKana ((char) cp);
296 Contraction GetContraction (string s, int start, int end)
298 // int si = s [start];
299 // if (si < 128 && (contractionFlags [si / 8] & (1 << (si & 7))) == 0)
301 Contraction c = GetContraction (s, start, end, contractions);
302 if (c != null || lcid == 127)
304 return GetContraction (s, start, end, invariant.contractions);
307 Contraction GetContraction (string s, int start, int end, Contraction [] clist)
309 for (int i = 0; i < clist.Length; i++) {
310 Contraction ct = clist [i];
311 int diff = ct.Source [0] - s [start];
313 return null; // it's already sorted
316 char [] chars = ct.Source;
317 if (end - start < chars.Length)
320 for (int n = 0; n < chars.Length; n++)
321 if (s [start + n] != chars [n]) {
331 Contraction GetTailContraction (string s, int start, int end)
333 // int si = s [end - 1];
334 // if (si < 128 && (contractionFlags [si / 8] & (1 << (si & 7))) == 0)
336 Contraction c = GetTailContraction (s, start, end, contractions);
337 if (c != null || lcid == 127)
339 return GetTailContraction (s, start, end, invariant.contractions);
342 Contraction GetTailContraction (string s, int start, int end, Contraction [] clist)
344 if (start == end || end < -1 || start >= s.Length || s.Length <= end + 1)
345 throw new SystemException (String.Format ("MONO internal error. Failed to get TailContraction. start = {0} end = {1} string = '{2}'", start, end, s));
346 for (int i = 0; i < clist.Length; i++) {
347 Contraction ct = clist [i];
348 int diff = ct.Source [0] - s [end + 1];
350 return null; // it's already sorted
353 char [] chars = ct.Source;
356 if (chars.Length > start - end)
358 for (int n = 0; n < chars.Length; n++)
359 if (s [start - n] != chars [chars.Length - 1 - n]) {
369 Contraction GetContraction (char c)
371 Contraction ct = GetContraction (c, contractions);
372 if (ct != null || lcid == 127)
374 return GetContraction (c, invariant.contractions);
377 Contraction GetContraction (char c, Contraction [] clist)
379 for (int i = 0; i < clist.Length; i++) {
380 Contraction ct = clist [i];
381 if (ct.Source [0] > c)
382 return null; // it's already sorted
383 if (ct.Source [0] == c && ct.Source.Length == 1)
389 int FilterOptions (int i, COpt opt)
391 if ((opt & COpt.IgnoreWidth) != 0) {
392 int x = Uni.ToWidthCompat (i);
396 if ((opt & COpt.IgnoreCase) != 0)
397 i = textInfo.ToLower ((char) i);
398 if ((opt & COpt.IgnoreKanaType) != 0)
399 i = Uni.ToKanaTypeInsensitive (i);
411 ExtenderType GetExtenderType (int i)
413 // LAMESPEC: Windows expects true for U+3005, but
414 // sometimes it does not represent to repeat just
416 // Windows also expects true for U+3031 and U+3032,
417 // but they should *never* repeat one character.
419 // U+2015 becomes an extender only when it is Japanese
421 return lcid == 16 ? ExtenderType.Conditional :
424 if (i < 0x3005 || i > 0xFF70)
425 return ExtenderType.None;
430 return ExtenderType.Simple;
432 return ExtenderType.Conditional;
435 return ExtenderType.Voiced;
439 return ExtenderType.None;
441 case 0x3005: // LAMESPEC: see above.
442 return ExtenderType.Buggy;
443 case 0x3031: // LAMESPEC: see above.
444 case 0x3032: // LAMESPEC: see above.
447 return ExtenderType.Simple;
450 return ExtenderType.Voiced;
452 return ExtenderType.Conditional;
454 return ExtenderType.None;
458 static byte ToDashTypeValue (ExtenderType ext, COpt opt)
460 if ((opt & COpt.IgnoreNonSpace) != 0) // LAMESPEC: huh, why?
463 case ExtenderType.None:
465 case ExtenderType.Conditional:
472 int FilterExtender (int i, ExtenderType ext, COpt opt)
474 if (ext == ExtenderType.Conditional &&
475 Uni.HasSpecialWeight ((char) i)) {
476 bool half = IsHalfKana ((char) i, opt);
477 bool katakana = !Uni.IsHiragana ((char) i);
478 switch (Level1 (i) & 7) {
480 return half ? 0xFF71 : katakana ? 0x30A2 : 0x3042;
482 return half ? 0xFF72 : katakana ? 0x30A4 : 0x3044;
484 return half ? 0xFF73 : katakana ? 0x30A6 : 0x3046;
486 return half ? 0xFF74 : katakana ? 0x30A8 : 0x3048;
488 return half ? 0xFF75 : katakana ? 0x30AA : 0x304A;
494 static bool IsIgnorable (int i, COpt opt)
496 return Uni.IsIgnorable (i, (byte) (1 +
497 ((opt & COpt.IgnoreSymbols) != 0 ? 2 : 0) +
498 ((opt & COpt.IgnoreNonSpace) != 0 ? 4 : 0)));
504 return i / 8 >= unsafeFlags.Length ? true : (unsafeFlags [i / 8] & (1 << (i % 8))) == 0;
509 public SortKey GetSortKey (string s)
511 return GetSortKey (s, CompareOptions.None);
514 public SortKey GetSortKey (string s, CompareOptions options)
516 return GetSortKey (s, 0, s.Length, options);
519 public SortKey GetSortKey (string s, int start, int length, CompareOptions options)
521 SortKeyBuffer buf = new SortKeyBuffer (lcid);
522 buf.Initialize (options, lcid, s, frenchSort);
523 int end = start + length;
524 GetSortKey (s, start, end, buf, options);
525 return buf.GetResultAndReset ();
528 unsafe void GetSortKey (string s, int start, int end,
529 SortKeyBuffer buf, CompareOptions opt)
531 byte* prevbuf = stackalloc byte [4];
532 ClearBuffer (prevbuf, 4);
533 Context ctx = new Context (opt, null, null, null, null, prevbuf);
535 for (int n = start; n < end; n++) {
538 ExtenderType ext = GetExtenderType (i);
539 if (ext != ExtenderType.None) {
540 i = FilterExtender (ctx.PrevCode, ext, opt);
542 FillSortKeyRaw (i, ext, buf, opt);
543 else if (ctx.PrevSortKey != null) {
544 byte* b = ctx.PrevSortKey;
548 b [2] != 1 ? b [2] : Level2 (i, ext),
549 b [3] != 1 ? b [3] : Uni.Level3 (i));
551 // otherwise do nothing.
552 // (if the extender is the first char
553 // in the string, then just ignore.)
557 if (IsIgnorable (i, opt))
559 i = FilterOptions (i, opt);
561 Contraction ct = GetContraction (s, n, end);
563 if (ct.Replacement != null) {
564 GetSortKey (ct.Replacement, 0, ct.Replacement.Length, buf, opt);
566 byte* b = ctx.PrevSortKey;
567 for (int bi = 0; bi < ct.SortKey.Length; bi++)
568 b [bi] = ct.SortKey [bi];
572 b [2] != 1 ? b [2] : Level2 (i, ext),
573 b [3] != 1 ? b [3] : Uni.Level3 (i));
576 n += ct.Source.Length - 1;
579 if (!Uni.IsIgnorableNonSpacing (i))
581 FillSortKeyRaw (i, ExtenderType.None, buf, opt);
586 void FillSortKeyRaw (int i, ExtenderType ext,
587 SortKeyBuffer buf, CompareOptions opt)
589 if (0x3400 <= i && i <= 0x4DB5) {
590 int diff = i - 0x3400;
591 buf.AppendCJKExtension (
592 (byte) (0x10 + diff / 254),
593 (byte) (diff % 254 + 2));
597 UnicodeCategory uc = char.GetUnicodeCategory ((char) i);
599 case UnicodeCategory.PrivateUse:
600 int diff = i - 0xE000;
602 (byte) (0xE5 + diff / 254),
603 (byte) (diff % 254 + 2),
607 case UnicodeCategory.Surrogate:
608 FillSurrogateSortKeyRaw (i, buf);
612 byte level2 = Level2 (i, ext);
613 if (Uni.HasSpecialWeight ((char) i)) {
614 byte level1 = Level1 (i);
620 Uni.IsJapaneseSmallLetter ((char) i),
621 ToDashTypeValue (ext, opt),
622 !Uni.IsHiragana ((char) i),
623 IsHalfKana ((char) i, opt)
625 if ((opt & COpt.IgnoreNonSpace) == 0 && ext == ExtenderType.Voiced)
626 // Append voice weight
627 buf.AppendNormal (1, 1, 1, 0);
637 void FillSurrogateSortKeyRaw (int i, SortKeyBuffer buf)
646 lower = (byte) ((i == 0xD800) ? 0x3E : 0x3F);
647 } else if (0xD840 <= i && i < 0xD880) {
651 } else if (0xDB80 <= i && i < 0xDC00) {
652 diffbase = 0xDB80 - 0x40;
656 diffbase = 0xDC00 - 0xF8 + 2;
660 int diff = i - diffbase;
663 (byte) (segment + diff / 254),
664 (byte) (diff % 254 + 2),
673 public int Compare (string s1, string s2)
675 return Compare (s1, s2, CompareOptions.None);
678 public int Compare (string s1, string s2, CompareOptions options)
680 return Compare (s1, 0, s1.Length, s2, 0, s2.Length, options);
683 private int CompareOrdinal (string s1, int idx1, int len1,
684 string s2, int idx2, int len2)
686 int min = len1 < len2 ? len1 : len2;
687 int end1 = idx1 + min;
688 int end2 = idx2 + min;
689 if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
690 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
691 for (int i1 = idx1, i2 = idx2;
692 i1 < end1 && i2 < end2; i1++, i2++)
693 if (s1 [i1] != s2 [i2])
694 return s1 [i1] - s2 [i2];
695 return len1 == len2 ? 0 :
696 len1 == min ? - 1 : 1;
699 private int CompareOrdinalIgnoreCase (string s1, int idx1, int len1,
700 string s2, int idx2, int len2)
702 int min = len1 < len2 ? len1 : len2;
703 int end1 = idx1 + min;
704 int end2 = idx2 + min;
705 if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
706 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
707 TextInfo ti = invariant.textInfo;
708 for (int i1 = idx1, i2 = idx2;
709 i1 < end1 && i2 < end2; i1++, i2++)
710 if (ti.ToLower (s1 [i1]) != ti.ToLower (s2 [i2]))
711 return ti.ToLower (s1 [i1]) - ti.ToLower (s2 [i2]);
712 return len1 == len2 ? 0 :
713 len1 == min ? - 1 : 1;
716 public unsafe int Compare (string s1, int idx1, int len1,
717 string s2, int idx2, int len2, CompareOptions options)
719 // quick equality check
720 if (idx1 == idx2 && len1 == len2 &&
721 Object.ReferenceEquals (s1, s2))
723 // FIXME: this should be done inside Compare() at
725 // int ord = CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
728 if (options == CompareOptions.Ordinal)
729 return CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
731 if (options == CompareOptions.OrdinalIgnoreCase)
732 return CompareOrdinalIgnoreCase (s1, idx1, len1, s2, idx2, len2);
735 #if false // stable easy version, depends on GetSortKey().
736 SortKey sk1 = GetSortKey (s1, idx1, len1, options);
737 SortKey sk2 = GetSortKey (s2, idx2, len2, options);
738 byte [] d1 = sk1.KeyData;
739 byte [] d2 = sk2.KeyData;
740 int len = d1.Length > d2.Length ? d2.Length : d1.Length;
741 for (int i = 0; i < len; i++)
742 if (d1 [i] != d2 [i])
743 return d1 [i] < d2 [i] ? -1 : 1;
744 return d1.Length == d2.Length ? 0 : d1.Length < d2.Length ? -1 : 1;
746 byte* sk1 = stackalloc byte [4];
747 byte* sk2 = stackalloc byte [4];
748 ClearBuffer (sk1, 4);
749 ClearBuffer (sk2, 4);
750 Context ctx = new Context (options, null, null, sk1, sk2, null);
753 int ret = CompareInternal (s1, idx1, len1, s2, idx2, len2, out dummy, out dummy2, true, false, ref ctx);
754 return ret == 0 ? 0 : ret < 0 ? -1 : 1;
758 unsafe void ClearBuffer (byte* buffer, int size)
760 for (int i = 0; i < size; i++)
764 unsafe int CompareInternal (string s1, int idx1, int len1, string s2,
766 out bool targetConsumed, out bool sourceConsumed,
767 bool skipHeadingExtenders, bool immediatelyBreakup,
770 COpt opt = ctx.Option;
773 int end1 = idx1 + len1;
774 int end2 = idx2 + len2;
775 targetConsumed = false;
776 sourceConsumed = false;
777 PreviousInfo prev2 = new PreviousInfo (false);
779 // It holds final result that comes from the comparison
780 // at level 2 or lower. Even if Compare() found the
781 // difference at level 2 or lower, it still has to
782 // continue level 1 comparison. FinalResult is used
783 // when there was no further differences.
785 // It holds the comparison level to do. It starts from
786 // 5, and becomes 1 when we do primary-only comparison.
787 int currentLevel = 5;
794 // Skip heading extenders
795 if (skipHeadingExtenders) {
796 for (; idx1 < end1; idx1++)
797 if (GetExtenderType (s1 [idx1]) == ExtenderType.None)
799 for (; idx2 < end2; idx2++)
800 if (GetExtenderType (s2 [idx2]) == ExtenderType.None)
804 ExtenderType ext1 = ExtenderType.None;
805 ExtenderType ext2 = ExtenderType.None;
807 int quickCheckPos1 = idx1;
808 int quickCheckPos2 = idx2;
809 bool stringSort = (opt & COpt.StringSort) != 0;
810 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
811 Escape escape1 = new Escape ();
812 Escape escape2 = new Escape ();
815 for (; idx1 < end1; idx1++)
816 if (!IsIgnorable (s1 [idx1], opt))
818 for (; idx2 < end2; idx2++)
819 if (!IsIgnorable (s2 [idx2], opt))
823 if (escape1.Source == null)
826 start1 = escape1.Start;
827 idx1 = escape1.Index;
829 quickCheckPos1 = escape1.Optional;
830 escape1.Source = null;
834 if (escape2.Source == null)
837 start2 = escape2.Start;
838 idx2 = escape2.Index;
840 quickCheckPos2 = escape2.Optional;
841 escape2.Source = null;
845 // If comparison is unstable, then this part is one of the most doubtful part.
846 // Here we run quick codepoint comparison and run back to "stable" area to
847 // compare characters.
849 // Strictly to say, even the first character
850 // could be compared here, but it messed
851 // backward step, so I just avoided mess.
852 if (quickCheckPos1 < idx1 && quickCheckPos2 < idx2) {
853 while (idx1 < end1 && idx2 < end2 &&
854 s1 [idx1] == s2 [idx2]) {
858 if (idx1 == end1 || idx2 == end2)
859 continue; // check replacement
861 int backwardEnd1 = quickCheckPos1;
862 int backwardEnd2 = quickCheckPos2;
863 quickCheckPos1 = idx1;
864 quickCheckPos2 = idx2;
869 for (;idx1 > backwardEnd1; idx1--)
870 if (Category (s1 [idx1]) != 1)
872 for (;idx2 > backwardEnd2; idx2--)
873 if (Category (s2 [idx2]) != 1)
875 for (;idx1 > backwardEnd1; idx1--)
876 if (IsSafe (s1 [idx1]))
878 for (;idx2 > backwardEnd2; idx2--)
879 if (IsSafe (s2 [idx2]))
888 int i1 = FilterOptions (s1 [idx1], opt);
889 int i2 = FilterOptions (s2 [idx2], opt);
890 bool special1 = false;
891 bool special2 = false;
893 // If current character is an expander, then
894 // repeat the previous character.
895 ext1 = GetExtenderType (i1);
896 if (ext1 != ExtenderType.None) {
897 if (ctx.PrevCode < 0) {
898 if (ctx.PrevSortKey == null) {
903 sk1 = ctx.PrevSortKey;
906 i1 = FilterExtender (ctx.PrevCode, ext1, opt);
908 ext2 = GetExtenderType (i2);
909 if (ext2 != ExtenderType.None) {
910 if (prev2.Code < 0) {
911 if (prev2.SortKey == null) {
919 i2 = FilterExtender (prev2.Code, ext2, opt);
922 byte cat1 = Category (i1);
923 byte cat2 = Category (i2);
925 // Handle special weight characters
926 if (!stringSort && currentLevel > 4) {
928 lv5At1 = escape1.Source != null ?
929 escape1.Index - escape1.Start :
931 // here Windows has a bug that it does
932 // not consider thirtiary weight.
933 lv5Value1 = Level1 (i1) << 8 + Uni.Level3 (i1);
938 lv5At2 = escape2.Source != null ?
939 escape2.Index - escape2.Start :
941 // here Windows has a bug that it does
942 // not consider thirtiary weight.
943 lv5Value2 = Level1 (i2) << 8 + Uni.Level3 (i2);
947 if (cat1 == 6 || cat2 == 6) {
953 Contraction ct1 = null;
954 if (ext1 == ExtenderType.None)
955 ct1 = GetContraction (s1, idx1, end1);
960 else if (ct1 != null) {
961 offset1 = ct1.Source.Length;
962 if (ct1.SortKey != null) {
964 for (int i = 0; i < ct1.SortKey.Length; i++)
965 sk1 [i] = ct1.SortKey [i];
967 ctx.PrevSortKey = sk1;
969 else if (escape1.Source == null) {
971 escape1.Start = start1;
972 escape1.Index = cur1 + ct1.Source.Length;
974 escape1.Optional = quickCheckPos1;
975 s1 = ct1.Replacement;
986 sk1 [1] = Level1 (i1);
987 if (!ignoreNonSpace && currentLevel > 1)
988 sk1 [2] = Level2 (i1, ext1);
989 if (currentLevel > 2)
990 sk1 [3] = Uni.Level3 (i1);
991 if (currentLevel > 3)
992 special1 = Uni.HasSpecialWeight ((char) i1);
997 Contraction ct2 = null;
998 if (ext2 == ExtenderType.None)
999 ct2 = GetContraction (s2, idx2, end2);
1003 else if (ct2 != null) {
1004 idx2 += ct2.Source.Length;
1005 if (ct2.SortKey != null) {
1007 for (int i = 0; i < ct2.SortKey.Length; i++)
1008 sk2 [i] = ct2.SortKey [i];
1010 prev2.SortKey = sk2;
1012 else if (escape2.Source == null) {
1013 escape2.Source = s2;
1014 escape2.Start = start2;
1015 escape2.Index = cur2 + ct2.Source.Length;
1017 escape2.Optional = quickCheckPos2;
1018 s2 = ct2.Replacement;
1029 sk2 [1] = Level1 (i2);
1030 if (!ignoreNonSpace && currentLevel > 1)
1031 sk2 [2] = Level2 (i2, ext2);
1032 if (currentLevel > 2)
1033 sk2 [3] = Uni.Level3 (i2);
1034 if (currentLevel > 3)
1035 special2 = Uni.HasSpecialWeight ((char) i2);
1041 // Add offset here so that it does not skip
1042 // idx1 while s2 has a replacement.
1045 // add diacritical marks in s1 here
1046 if (!ignoreNonSpace) {
1047 while (idx1 < end1) {
1048 if (Category (s1 [idx1]) != 1)
1052 sk1 [2] = (byte) (sk1 [2] +
1053 Level2 (s1 [idx1], ExtenderType.None));
1057 // add diacritical marks in s2 here
1058 while (idx2 < end2) {
1059 if (Category (s2 [idx2]) != 1)
1063 sk2 [2] = (byte) (sk2 [2] +
1064 Level2 (s2 [idx2], ExtenderType.None));
1069 int ret = sk1 [0] - sk2 [0];
1070 ret = ret != 0 ? ret : sk1 [1] - sk2 [1];
1073 if (currentLevel == 1)
1075 if (!ignoreNonSpace) {
1076 ret = sk1 [2] - sk2 [2];
1079 if (immediatelyBreakup)
1080 return -1; // different
1081 currentLevel = frenchSort ? 2 : 1;
1085 if (currentLevel == 2)
1087 ret = sk1 [3] - sk2 [3];
1090 if (immediatelyBreakup)
1091 return -1; // different
1095 if (currentLevel == 3)
1097 if (special1 != special2) {
1098 if (immediatelyBreakup)
1099 return -1; // different
1100 finalResult = special1 ? 1 : -1;
1105 ret = CompareFlagPair (
1106 !Uni.IsJapaneseSmallLetter ((char) i1),
1107 !Uni.IsJapaneseSmallLetter ((char) i2));
1108 ret = ret != 0 ? ret :
1109 ToDashTypeValue (ext1, opt) -
1110 ToDashTypeValue (ext2, opt);
1111 ret = ret != 0 ? ret : CompareFlagPair (
1112 Uni.IsHiragana ((char) i1),
1113 Uni.IsHiragana ((char) i2));
1114 ret = ret != 0 ? ret : CompareFlagPair (
1115 !IsHalfKana ((char) i1, opt),
1116 !IsHalfKana ((char) i2, opt));
1118 if (immediatelyBreakup)
1119 return -1; // different
1127 // If there were only level 3 or lower differences,
1128 // then we still have to find diacritical differences
1130 if (!ignoreNonSpace &&
1131 finalResult != 0 && currentLevel > 2) {
1132 while (idx1 < end1 && idx2 < end2) {
1133 if (!Uni.IsIgnorableNonSpacing (s1 [idx1]))
1135 if (!Uni.IsIgnorableNonSpacing (s2 [idx2]))
1137 finalResult = Level2 (FilterOptions (s1 [idx1], opt), ext1) - Level2 (FilterOptions (s2 [idx2], opt), ext2);
1138 if (finalResult != 0)
1142 // they should work only for the first character
1143 ext1 = ExtenderType.None;
1144 ext2 = ExtenderType.None;
1147 if (currentLevel == 1 && finalResult != 0) {
1148 while (idx1 < end1) {
1149 if (Uni.IsIgnorableNonSpacing (s1 [idx1]))
1154 while (idx2 < end2) {
1155 if (Uni.IsIgnorableNonSpacing (s2 [idx2]))
1161 // we still have to handle level 5
1162 if (finalResult == 0) {
1163 finalResult = lv5At1 - lv5At2;
1164 if (finalResult == 0)
1165 finalResult = lv5Value1 - lv5Value2;
1167 if (finalResult == 0) {
1169 targetConsumed = true;
1171 sourceConsumed = true;
1173 return idx1 != end1 ? 1 : idx2 == end2 ? finalResult : -1;
1176 int CompareFlagPair (bool b1, bool b2)
1178 return b1 == b2 ? 0 : b1 ? 1 : -1;
1183 #region IsPrefix() and IsSuffix()
1185 public bool IsPrefix (string src, string target, CompareOptions opt)
1187 return IsPrefix (src, target, 0, src.Length, opt);
1190 public unsafe bool IsPrefix (string s, string target, int start, int length, CompareOptions opt)
1192 if (target.Length == 0)
1194 byte* sk1 = stackalloc byte [4];
1195 byte* sk2 = stackalloc byte [4];
1196 ClearBuffer (sk1, 4);
1197 ClearBuffer (sk2, 4);
1198 Context ctx = new Context (opt, null, null, sk1, sk2, null);
1199 return IsPrefix (s, target, start, length, true, ref ctx);
1202 unsafe bool IsPrefix (string s, string target, int start, int length, bool skipHeadingExtenders, ref Context ctx)
1204 bool consumed, dummy;
1205 CompareInternal (s, start, length,
1206 target, 0, target.Length,
1207 out consumed, out dummy, skipHeadingExtenders,
1214 public bool IsSuffix (string src, string target, CompareOptions opt)
1216 return IsSuffix (src, target, src.Length - 1, src.Length, opt);
1219 public unsafe bool IsSuffix (string s, string target, int start, int length, CompareOptions opt)
1221 if (target.Length == 0)
1223 int last = LastIndexOf (s, target, start, length, opt);
1224 return last >= 0 && Compare (s, last, s.Length - last, target, 0, target.Length, opt) == 0;
1226 // quick check : simple codepoint comparison
1227 if (s.Length >= target.Length) {
1229 int se = start - length;
1230 for (int i = target.Length - 1; si >= se && i >= 0; i--, si--)
1231 if (s [si] != target [i])
1233 if (si == start + target.Length)
1237 PreviousInfo prev = new PreviousInfo (false);
1238 byte* sk1 = stackalloc byte [4];
1239 byte* sk2 = stackalloc byte [4];
1240 ClearBuffer (sk1, 4);
1241 ClearBuffer (sk2, 4);
1242 return IsSuffix (opt, s, target, start, length, ref prev, sk1, sk2);
1246 unsafe bool IsSuffix (string s, string t, int start, int length, ref Context ctx)
1249 COpt opt = ctx.Option;
1251 for (;tstart < t.Length; tstart++)
1252 if (!IsIgnorable (t [tstart], opt))
1254 if (tstart == t.Length)
1255 return true; // as if target is String.Empty.
1258 // This is still not working. If it is not likely to get working, then just remove it.
1260 int send = start - length;
1261 int ti = t.Length - 1;
1264 int sStep = start + 1;
1265 int tStep = t.Length;
1266 bool sourceConsumed, targetConsumed;
1268 for (; send < si; si--)
1269 if (!IsIgnorable (s [si]))
1271 for (; tend < ti; ti--)
1272 if (!IsIgnorable (t [ti]))
1276 for (; send < si; si--)
1277 if (IsSafe (s [si]))
1279 for (; tend < ti; ti--)
1280 if (IsSafe (t [ti]))
1282 Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, length, t, ti, tstart, sStep - si, tStep - ti);
1283 if (CompareInternal (opt, s, si, sStep - si,
1285 out targetConsumed, out sourceConsumed,
1297 // FIXME: it is not efficient for very long target.
1298 bool sourceConsumed, targetConsumed;
1299 int mismatchCount = 0;
1300 for (int i = 0; i < length; i++) {
1301 ctx.ClearPrevInfo ();
1303 int ret = CompareInternal (s, start - i, i + 1,
1304 t, tstart, t.Length - tstart,
1306 // FIXME: could immediately breakup
1307 out sourceConsumed, true, true, ref ctx);
1310 if (!sourceConsumed && targetConsumed)
1312 if (!targetConsumed) {
1314 if (mismatchCount > Uni.MaxExpansionLength)
1315 // The largest length of an
1316 // expansion is 3, so if the
1317 // target was not consumed more
1318 // than 3 times, then the tail
1319 // character does not match.
1329 #region IndexOf() / LastIndexOf()
1331 public int IndexOf (string s, string target, CompareOptions opt)
1333 return IndexOf (s, target, 0, s.Length, opt);
1336 public unsafe int IndexOf (string s, string target, int start, int length, CompareOptions opt)
1338 byte* alwaysMatchFlags = stackalloc byte [16];
1339 byte* neverMatchFlags = stackalloc byte [16];
1340 byte* targetSortKey = stackalloc byte [4];
1341 byte* sk1 = stackalloc byte [4];
1342 byte* sk2 = stackalloc byte [4];
1343 ClearBuffer (alwaysMatchFlags, 16);
1344 ClearBuffer (neverMatchFlags, 16);
1345 ClearBuffer (targetSortKey, 4);
1346 ClearBuffer (sk1, 4);
1347 ClearBuffer (sk2, 4);
1348 Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null);
1350 return IndexOf (s, target, start, length,
1351 targetSortKey, ref ctx);
1354 public int IndexOf (string s, char target, CompareOptions opt)
1356 return IndexOf (s, target, 0, s.Length, opt);
1359 public unsafe int IndexOf (string s, char target, int start, int length, CompareOptions opt)
1361 byte* alwaysMatchFlags = stackalloc byte [16];
1362 byte* neverMatchFlags = stackalloc byte [16];
1363 byte* targetSortKey = stackalloc byte [4];
1364 byte* sk1 = stackalloc byte [4];
1365 byte* sk2 = stackalloc byte [4];
1366 ClearBuffer (alwaysMatchFlags, 16);
1367 ClearBuffer (neverMatchFlags, 16);
1368 ClearBuffer (targetSortKey, 4);
1369 ClearBuffer (sk1, 4);
1370 ClearBuffer (sk2, 4);
1371 Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null);
1373 // If target is contraction, then use string search.
1374 Contraction ct = GetContraction (target);
1376 if (ct.Replacement != null)
1377 return IndexOf (s, ct.Replacement,
1378 start, length, targetSortKey, ref ctx);
1380 for (int i = 0; i < ct.SortKey.Length; i++)
1381 sk2 [i] = ct.SortKey [i];
1382 return IndexOfSortKey (s, start, length, sk2, char.MinValue, -1, true, ref ctx);
1385 int ti = FilterOptions ((int) target, opt);
1386 targetSortKey [0] = Category (ti);
1387 targetSortKey [1] = Level1 (ti);
1388 if ((opt & COpt.IgnoreNonSpace) == 0)
1390 Level2 (ti, ExtenderType.None);
1391 targetSortKey [3] = Uni.Level3 (ti);
1392 return IndexOfSortKey (s, start, length,
1393 targetSortKey, target, ti,
1394 !Uni.HasSpecialWeight ((char) ti), ref ctx);
1398 // Searches target byte[] keydata
1399 unsafe int IndexOfSortKey (string s, int start, int length, byte* sortkey, char target, int ti, bool noLv4, ref Context ctx)
1401 int end = start + length;
1406 if (MatchesForward (s, ref idx, end, ti, sortkey, noLv4, ref ctx))
1412 // Searches string. Search head character (or keydata when
1413 // the head is contraction sortkey) and try IsPrefix().
1414 unsafe int IndexOf (string s, string target, int start, int length, byte* targetSortKey, ref Context ctx)
1416 COpt opt = ctx.Option;
1418 for (; tidx < target.Length; tidx++)
1419 if (!IsIgnorable (target [tidx], opt))
1421 if (tidx == target.Length)
1423 Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1424 string replace = ct != null ? ct.Replacement : null;
1425 byte* sk = replace == null ? targetSortKey : null;
1427 char tc = char.MinValue;
1429 if (ct != null && sk != null) {
1430 for (int i = 0; i < ct.SortKey.Length; i++)
1431 sk [i] = ct.SortKey [i];
1432 } else if (sk != null) {
1434 ti = FilterOptions (target [tidx], opt);
1435 sk [0] = Category (ti);
1436 sk [1] = Level1 (ti);
1437 if ((opt & COpt.IgnoreNonSpace) == 0)
1438 sk [2] = Level2 (ti, ExtenderType.None);
1439 sk [3] = Uni.Level3 (ti);
1440 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1443 for (tidx++; tidx < target.Length; tidx++) {
1444 if (Category (target [tidx]) != 1)
1448 sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1454 if (replace != null)
1455 idx = IndexOf (s, replace, start, length, targetSortKey, ref ctx);
1457 idx = IndexOfSortKey (s, start, length, sk, tc, ti, noLv4, ref ctx);
1460 length -= idx - start;
1462 if (IsPrefix (s, target, start, length, false, ref ctx))
1464 Contraction cts = GetContraction (s, start, length);
1466 start += cts.Source.Length;
1467 length -= cts.Source.Length;
1472 } while (length > 0);
1477 // There are the same number of IndexOf() related methods,
1478 // with the same functionalities for each.
1481 public int LastIndexOf (string s, string target, CompareOptions opt)
1483 return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1486 public unsafe int LastIndexOf (string s, string target, int start, int length, CompareOptions opt)
1488 byte* alwaysMatchFlags = stackalloc byte [16];
1489 byte* neverMatchFlags = stackalloc byte [16];
1490 byte* targetSortKey = stackalloc byte [4];
1491 byte* sk1 = stackalloc byte [4];
1492 byte* sk2 = stackalloc byte [4];
1493 ClearBuffer (alwaysMatchFlags, 16);
1494 ClearBuffer (neverMatchFlags, 16);
1495 ClearBuffer (targetSortKey, 4);
1496 ClearBuffer (sk1, 4);
1497 ClearBuffer (sk2, 4);
1498 Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null);
1499 return LastIndexOf (s, target, start, length,
1500 targetSortKey, ref ctx);
1503 public int LastIndexOf (string s, char target, CompareOptions opt)
1505 return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1508 public unsafe int LastIndexOf (string s, char target, int start, int length, CompareOptions opt)
1510 byte* alwaysMatchFlags = stackalloc byte [16];
1511 byte* neverMatchFlags = stackalloc byte [16];
1512 byte* targetSortKey = stackalloc byte [4];
1513 byte* sk1 = stackalloc byte [4];
1514 byte* sk2 = stackalloc byte [4];
1515 ClearBuffer (alwaysMatchFlags, 16);
1516 ClearBuffer (neverMatchFlags, 16);
1517 ClearBuffer (targetSortKey, 4);
1518 ClearBuffer (sk1, 4);
1519 ClearBuffer (sk2, 4);
1520 Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null);
1522 // If target is a replacement contraction, then use
1524 Contraction ct = GetContraction (target);
1526 if (ct.Replacement != null)
1527 return LastIndexOf (s,
1528 ct.Replacement, start, length,
1529 targetSortKey, ref ctx);
1531 for (int bi = 0; bi < ct.SortKey.Length; bi++)
1532 sk2 [bi] = ct.SortKey [bi];
1533 return LastIndexOfSortKey (s, start,
1539 int ti = FilterOptions ((int) target, opt);
1540 targetSortKey [0] = Category (ti);
1541 targetSortKey [1] = Level1 (ti);
1542 if ((opt & COpt.IgnoreNonSpace) == 0)
1543 targetSortKey [2] = Level2 (ti, ExtenderType.None);
1544 targetSortKey [3] = Uni.Level3 (ti);
1545 return LastIndexOfSortKey (s, start, start,
1546 length, targetSortKey,
1547 ti, !Uni.HasSpecialWeight ((char) ti),
1552 // Searches target byte[] keydata
1553 unsafe int LastIndexOfSortKey (string s, int start, int orgStart, int length, byte* sortkey, int ti, bool noLv4, ref Context ctx)
1555 int end = start - length;
1559 if (MatchesBackward (s, ref idx, end, orgStart,
1560 ti, sortkey, noLv4, ref ctx))
1566 // Searches string. Search head character (or keydata when
1567 // the head is contraction sortkey) and try IsPrefix().
1568 unsafe int LastIndexOf (string s, string target, int start, int length, byte* targetSortKey, ref Context ctx)
1570 COpt opt = ctx.Option;
1571 int orgStart = start;
1573 for (; tidx < target.Length; tidx++)
1574 if (!IsIgnorable (target [tidx], opt))
1576 if (tidx == target.Length)
1578 Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1579 string replace = ct != null ? ct.Replacement : null;
1580 byte* sk = replace == null ? targetSortKey : null;
1584 if (ct != null && sk != null) {
1585 for (int i = 0; i < ct.SortKey.Length; i++)
1586 sk [i] = ct.SortKey [i];
1587 } else if (sk != null) {
1588 ti = FilterOptions (target [tidx], opt);
1589 sk [0] = Category (ti);
1590 sk [1] = Level1 (ti);
1591 if ((opt & COpt.IgnoreNonSpace) == 0)
1592 sk [2] = Level2 (ti, ExtenderType.None);
1593 sk [3] = Uni.Level3 (ti);
1594 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1597 for (tidx++; tidx < target.Length; tidx++) {
1598 if (Category (target [tidx]) != 1)
1602 sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1609 if (replace != null)
1610 idx = LastIndexOf (s, replace,
1612 targetSortKey, ref ctx);
1614 idx = LastIndexOfSortKey (s, start, orgStart, length, sk, ti, noLv4, ref ctx);
1617 length -= start - idx;
1620 if (IsPrefix (s, target, idx, orgStart - idx + 1, false, ref ctx)) {
1621 for (;idx < orgStart; idx++)
1622 if (!IsIgnorable (s [idx], opt))
1626 Contraction cts = GetContraction (s, idx, orgStart - idx + 1);
1628 start -= cts.Source.Length;
1629 length -= cts.Source.Length;
1634 } while (length > 0);
1638 unsafe bool MatchesForward (string s, ref int idx, int end, int ti, byte* sortkey, bool noLv4, ref Context ctx)
1640 COpt opt = ctx.Option;
1642 if (ctx.AlwaysMatchFlags != null && si < 128 && (ctx.AlwaysMatchFlags [si / 8] & (1 << (si % 8))) != 0)
1644 if (ctx.NeverMatchFlags != null &&
1646 (ctx.NeverMatchFlags [si / 8] & (1 << (si % 8))) != 0) {
1650 ExtenderType ext = GetExtenderType (s [idx]);
1651 Contraction ct = null;
1652 if (MatchesForwardCore (s, ref idx, end, ti, sortkey, noLv4, ext, ref ct, ref ctx)) {
1653 if (ctx.AlwaysMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
1654 ctx.AlwaysMatchFlags [si / 8] |= (byte) (1 << (si % 8));
1657 if (ctx.NeverMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
1658 ctx.NeverMatchFlags [si / 8] |= (byte) (1 << (si % 8));
1662 unsafe bool MatchesForwardCore (string s, ref int idx, int end, int ti, byte* sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, ref Context ctx)
1664 COpt opt = ctx.Option;
1665 byte* charSortKey = ctx.Buffer1;
1666 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
1668 if (ext == ExtenderType.None)
1669 ct = GetContraction (s, idx, end);
1670 else if (ctx.PrevCode < 0) {
1671 if (ctx.PrevSortKey == null) {
1675 charSortKey = ctx.PrevSortKey;
1678 si = FilterExtender (ctx.PrevCode, ext, opt);
1679 // if lv4 exists, it never matches contraction
1681 idx += ct.Source.Length;
1684 if (ct.SortKey != null) {
1685 for (int i = 0; i < 4; i++)
1686 charSortKey [i] = sortkey [i];
1688 ctx.PrevSortKey = charSortKey;
1690 // Here is the core of LAMESPEC
1691 // described at the top of the source.
1693 return MatchesForward (ct.Replacement, ref dummy,
1694 ct.Replacement.Length, ti, sortkey, noLv4, ref ctx);
1698 si = FilterOptions (s [idx], opt);
1700 charSortKey [0] = Category (si);
1701 bool noMatch = false;
1702 if (sortkey [0] == charSortKey [0])
1703 charSortKey [1] = Level1 (si);
1706 if (!ignoreNonSpace && sortkey [1] == charSortKey [1])
1707 charSortKey [2] = Level2 (si, ext);
1708 else if (!ignoreNonSpace)
1711 for (; idx < end; idx++) {
1712 if (Category (s [idx]) != 1)
1717 charSortKey [3] = Uni.Level3 (si);
1718 if (charSortKey [0] != 1)
1721 for (; idx < end; idx++) {
1722 if (Category (s [idx]) != 1)
1726 if (charSortKey [2] == 0)
1727 charSortKey [2] = 2;
1728 charSortKey [2] = (byte) (charSortKey [2]
1729 + Level2 (s [idx], ExtenderType.None));
1732 return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4);
1735 unsafe bool MatchesPrimitive (COpt opt, byte* source, int si, ExtenderType ext, byte* target, int ti, bool noLv4)
1737 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
1738 if (source [0] != target [0] ||
1739 source [1] != target [1] ||
1740 (!ignoreNonSpace && source [2] != target [2]) ||
1741 source [3] != target [3])
1743 if (noLv4 && (si < 0 || !Uni.HasSpecialWeight ((char) si)))
1747 // Since target can never be an extender, if the source
1748 // is an expander and it matters, then they never match.
1749 if (!ignoreNonSpace && ext == ExtenderType.Conditional)
1751 if (Uni.IsJapaneseSmallLetter ((char) si) !=
1752 Uni.IsJapaneseSmallLetter ((char) ti) ||
1753 ToDashTypeValue (ext, opt) !=
1754 // FIXME: we will have to specify correct value for target
1755 ToDashTypeValue (ExtenderType.None, opt) ||
1756 !Uni.IsHiragana ((char) si) !=
1757 !Uni.IsHiragana ((char) ti) ||
1758 IsHalfKana ((char) si, opt) !=
1759 IsHalfKana ((char) ti, opt))
1764 unsafe bool MatchesBackward (string s, ref int idx, int end, int orgStart, int ti, byte* sortkey, bool noLv4, ref Context ctx)
1767 if (ctx.AlwaysMatchFlags != null && si < 128 && (ctx.AlwaysMatchFlags [si / 8] & (1 << (si % 8))) != 0)
1769 if (ctx.NeverMatchFlags != null && si < 128 && (ctx.NeverMatchFlags [si / 8] & (1 << (si % 8))) != 0) {
1773 ExtenderType ext = GetExtenderType (s [idx]);
1774 Contraction ct = null;
1775 if (MatchesBackwardCore (s, ref idx, end, orgStart, ti, sortkey, noLv4, ext, ref ct, ref ctx)) {
1776 if (ctx.AlwaysMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
1777 ctx.AlwaysMatchFlags [si / 8] |= (byte) (1 << (si % 8));
1780 if (ctx.NeverMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128) {
1781 ctx.NeverMatchFlags [si / 8] |= (byte) (1 << (si % 8));
1786 unsafe bool MatchesBackwardCore (string s, ref int idx, int end, int orgStart, int ti, byte* sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, ref Context ctx)
1788 COpt opt = ctx.Option;
1789 byte* charSortKey = ctx.Buffer1;
1790 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
1793 // To handle extenders in source, we need to
1794 // check next _primary_ character.
1795 if (ext != ExtenderType.None) {
1796 byte diacritical = 0;
1797 for (int tmp = 0; ; tmp--) {
1798 if (tmp < 0) // heading extender
1800 if (IsIgnorable (s [tmp], opt))
1802 int tmpi = FilterOptions (s [tmp], opt);
1803 byte category = Category (tmpi);
1804 if (category == 1) {
1805 diacritical = Level2 (tmpi, ExtenderType.None);
1808 si = FilterExtender (tmpi, ext, opt);
1809 charSortKey [0] = category;
1810 charSortKey [1] = Level1 (si);
1811 if (!ignoreNonSpace)
1812 charSortKey [2] = Level2 (si, ext);
1813 charSortKey [3] = Uni.Level3 (si);
1814 if (ext != ExtenderType.Conditional &&
1817 (charSortKey [2] == 0) ?
1818 (byte) (diacritical + 2) :
1824 if (ext == ExtenderType.None)
1825 ct = GetTailContraction (s, idx, end);
1826 // if lv4 exists, it never matches contraction
1828 idx -= ct.Source.Length;
1831 if (ct.SortKey != null) {
1832 for (int i = 0; i < 4; i++)
1833 charSortKey [i] = sortkey [i];
1835 ctx.PrevSortKey = charSortKey;
1837 // Here is the core of LAMESPEC
1838 // described at the top of the source.
1839 int dummy = ct.Replacement.Length - 1;
1840 return 0 <= LastIndexOfSortKey (
1841 ct.Replacement, dummy, dummy,
1842 ct.Replacement.Length, sortkey,
1843 ti, noLv4, ref ctx);
1845 } else if (ext == ExtenderType.None) {
1847 si = FilterOptions (s [idx], opt);
1849 bool noMatch = false;
1850 charSortKey [0] = Category (si);
1851 if (charSortKey [0] == sortkey [0])
1852 charSortKey [1] = Level1 (si);
1855 if (!ignoreNonSpace && charSortKey [1] == sortkey [1])
1856 charSortKey [2] = Level2 (si, ext);
1857 else if (!ignoreNonSpace)
1861 charSortKey [3] = Uni.Level3 (si);
1862 if (charSortKey [0] != 1)
1865 if (ext == ExtenderType.None) {
1866 for (int tmp = cur + 1; tmp < orgStart; tmp++) {
1867 if (Category (s [tmp]) != 1)
1871 if (charSortKey [2] == 0)
1872 charSortKey [2] = 2;
1873 charSortKey [2] = (byte) (charSortKey [2]
1874 + Level2 (s [tmp], ExtenderType.None));
1877 return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4);