2 // SimpleCollator.cs : the core collator implementation
5 // Atsushi Enomoto <atsushi@ximian.com>
7 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 // Here's a summary for supporting contractions, expansions and diacritical
33 // Diacritical mapping is a simple tailoring rule that "remaps" diacritical
34 // weight value from one to another. For now it applies to all range of
35 // characters, but at some stage we might need to limit the remapping ranges.
37 // A Contraction consists of a string (actually char[]) and a sortkey entry
38 // (i.e. byte[]). It indicates that a sequence of characters is interpreted
39 // as a single character that has the mapped sortkey value. There is no
40 // character which goes across "special rules". When the collator encountered
41 // such a sequence of characters, it just returns the sortkey value without
42 // further replacement.
44 // Since it is still likely to happen that a contraction sequence matches
45 // other character than the identical sequence (depending on CompareOptions
46 // and of course, defined sortkey value itself), comparison cannot be done
47 // at source char[] level.
49 // In IndexOf(), the first character in the target string or the target char
50 // itself is turned into sortkey bytes. If the character has a contraction and
51 // that is sortkey map, then it is used instead. If the contraction exists and
52 // that is replacement map, then the first character of the replacement string
53 // is searched instead. IndexOf() always searches only for the top character,
54 // and if it returned negative value, then it returns -1. Otherwise, it then
55 // tries IsPrefix() from that location. If it returns true, then it returns
58 // LAMESPEC: IndexOf() is lame as a whole API. It never matches in the middle
59 // of expansion and there is no proper way to return such indexes within
60 // a single int return value.
62 // For example, try below in .NET:
63 // IndexOf("\u00E6", "a")
64 // IndexOf("\u00E6", "e")
69 using System.Collections;
70 using System.Globalization;
72 using Uni = Mono.Globalization.Unicode.MSCompatUnicodeTable;
73 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
74 using COpt = System.Globalization.CompareOptions;
76 namespace Mono.Globalization.Unicode
78 internal class SimpleCollator
83 public byte [] SortKey;
85 public PreviousInfo (bool dummy)
101 static SimpleCollator invariant =
102 new SimpleCollator (CultureInfo.InvariantCulture);
104 readonly TextInfo textInfo; // for ToLower().
105 readonly bool frenchSort;
106 unsafe readonly byte* cjkCatTable;
107 unsafe readonly byte* cjkLv1Table;
108 readonly CodePointIndexer cjkIndexer;
109 unsafe readonly byte* cjkLv2Table;
110 readonly CodePointIndexer cjkLv2Indexer;
112 readonly Contraction [] contractions;
113 readonly Level2Map [] level2Maps;
115 // This flag marks characters as "unsafe", where the character
116 // could be used as part of a contraction (whose length > 1).
117 readonly byte [] unsafeFlags;
119 const int UnsafeFlagLength = 0x300 / 8;
121 // readonly byte [] contractionFlags = new byte [16];
123 // This array is internally used inside IndexOf() to store
124 // "no need to check" ASCII characters.
126 // Now that it should be thread safe, this array is allocated
128 // byte [] checkedFlags = new byte [128 / 8];
130 #region .ctor() and split functions
132 public SimpleCollator (CultureInfo culture)
135 textInfo = culture.TextInfo;
138 SetCJKTable (culture, ref cjkIndexer,
139 ref cjkCatTable, ref cjkLv1Table,
140 ref cjkLv2Indexer, ref cjkLv2Table);
143 // Get tailoring info
144 TailoringInfo t = null;
145 for (CultureInfo ci = culture; ci.LCID != 127; ci = ci.Parent) {
146 t = Uni.GetTailoringInfo (ci.LCID);
150 if (t == null) // then use invariant
151 t = Uni.GetTailoringInfo (127);
153 frenchSort = t.FrenchSort;
154 Uni.BuildTailoringTables (culture, t, ref contractions,
156 unsafeFlags = new byte [UnsafeFlagLength];
157 foreach (Contraction c in contractions) {
158 if (c.Source.Length > 1)
159 foreach (char ch in c.Source)
160 unsafeFlags [(int) ch / 8 ]
161 |= (byte) (1 << ((int) ch & 7));
162 // for (int i = 0; i < c.Source.Length; i++) {
163 // int ch = c.Source [i];
166 // contractionFlags [ch / 8] |= (byte) (1 << (ch & 7));
170 foreach (Contraction c in invariant.contractions) {
171 if (c.Source.Length > 1)
172 foreach (char ch in c.Source)
173 unsafeFlags [(int) ch / 8 ]
174 |= (byte) (1 << ((int) ch & 7));
175 // for (int i = 0; i < c.Source.Length; i++) {
176 // int ch = c.Source [i];
179 // contractionFlags [ch / 8] |= (byte) (1 << (ch & 7));
183 // FIXME: Since tailorings are mostly for latin
184 // (and in some cases Cyrillic) characters, it would
185 // be much better for performance to store "start
186 // indexes" for > 370 (culture-specific letters).
189 // dump tailoring table
190 Console.WriteLine ("******** building table for {0} : c - {1} d - {2}",
191 culture.LCID, contractions.Length, level2Maps.Length);
192 foreach (Contraction c in contractions) {
193 foreach (char cc in c.Source)
194 Console.Write ("{0:X4} ", (int) cc);
195 Console.WriteLine (" -> '{0}'", c.Replacement);
200 unsafe private void SetCJKTable (
201 CultureInfo culture, ref CodePointIndexer cjkIndexer,
202 ref byte* catTable, ref byte* lv1Table,
203 ref CodePointIndexer lv2Indexer, ref byte* lv2Table)
205 string name = GetNeutralCulture (culture).Name;
207 Uni.FillCJK (name, ref cjkIndexer, ref catTable,
208 ref lv1Table, ref lv2Indexer, ref lv2Table);
211 static CultureInfo GetNeutralCulture (CultureInfo info)
213 CultureInfo ret = info;
214 while (ret.Parent != null && ret.Parent.LCID != 127)
221 unsafe byte Category (int cp)
223 if (cp < 0x3000 || cjkCatTable == null)
224 return Uni.Category (cp);
225 int idx = cjkIndexer.ToIndex (cp);
226 return idx < 0 ? Uni.Category (cp) : cjkCatTable [idx];
229 unsafe byte Level1 (int cp)
231 if (cp < 0x3000 || cjkLv1Table == null)
232 return Uni.Level1 (cp);
233 int idx = cjkIndexer.ToIndex (cp);
234 return idx < 0 ? Uni.Level1 (cp) : cjkLv1Table [idx];
237 unsafe byte Level2 (int cp, ExtenderType ext)
239 if (ext == ExtenderType.Buggy)
241 else if (ext == ExtenderType.Conditional)
244 if (cp < 0x3000 || cjkLv2Table == null)
245 return Uni.Level2 (cp);
246 int idx = cjkLv2Indexer.ToIndex (cp);
247 byte ret = idx < 0 ? (byte) 0 : cjkLv2Table [idx];
250 ret = Uni.Level2 (cp);
251 if (level2Maps.Length == 0)
253 for (int i = 0; i < level2Maps.Length; i++) {
254 if (level2Maps [i].Source == ret)
255 return level2Maps [i].Replace;
256 else if (level2Maps [i].Source > ret)
262 static bool IsHalfKana (int cp, COpt opt)
264 return (opt & COpt.IgnoreWidth) != 0 ||
265 Uni.IsHalfWidthKana ((char) cp);
268 Contraction GetContraction (string s, int start, int end)
270 // int si = s [start];
271 // if (si < 128 && (contractionFlags [si / 8] & (1 << (si & 7))) == 0)
273 Contraction c = GetContraction (s, start, end, contractions);
274 if (c != null || lcid == 127)
276 return GetContraction (s, start, end, invariant.contractions);
279 Contraction GetContraction (string s, int start, int end, Contraction [] clist)
281 for (int i = 0; i < clist.Length; i++) {
282 Contraction ct = clist [i];
283 int diff = ct.Source [0] - s [start];
285 return null; // it's already sorted
288 char [] chars = ct.Source;
289 if (end - start < chars.Length)
292 for (int n = 0; n < chars.Length; n++)
293 if (s [start + n] != chars [n]) {
303 Contraction GetTailContraction (string s, int start, int end)
305 // int si = s [end - 1];
306 // if (si < 128 && (contractionFlags [si / 8] & (1 << (si & 7))) == 0)
308 Contraction c = GetTailContraction (s, start, end, contractions);
309 if (c != null || lcid == 127)
311 return GetTailContraction (s, start, end, invariant.contractions);
314 Contraction GetTailContraction (string s, int start, int end, Contraction [] clist)
316 for (int i = 0; i < clist.Length; i++) {
317 Contraction ct = clist [i];
318 int diff = ct.Source [0] - s [end];
320 return null; // it's already sorted
323 char [] chars = ct.Source;
324 if (start - end + 1 < chars.Length)
327 int offset = start - chars.Length + 1;
328 for (int n = 0; n < chars.Length; n++)
329 if (s [offset + n] != chars [n]) {
339 Contraction GetContraction (char c)
341 Contraction ct = GetContraction (c, contractions);
342 if (ct != null || lcid == 127)
344 return GetContraction (c, invariant.contractions);
347 Contraction GetContraction (char c, Contraction [] clist)
349 for (int i = 0; i < clist.Length; i++) {
350 Contraction ct = clist [i];
351 if (ct.Source [0] > c)
352 return null; // it's already sorted
353 if (ct.Source [0] == c && ct.Source.Length == 1)
359 int FilterOptions (int i, COpt opt)
361 if ((opt & COpt.IgnoreWidth) != 0) {
362 int x = Uni.ToWidthCompat (i);
366 if ((opt & COpt.IgnoreCase) != 0)
367 i = textInfo.ToLower ((char) i);
368 if ((opt & COpt.IgnoreKanaType) != 0)
369 i = Uni.ToKanaTypeInsensitive (i);
381 ExtenderType GetExtenderType (int i)
383 // LAMESPEC: Windows expects true for U+3005, but
384 // sometimes it does not represent to repeat just
386 // Windows also expects true for U+3031 and U+3032,
387 // but they should *never* repeat one character.
389 // U+2015 becomes an extender only when it is Japanese
391 return lcid == 16 ? ExtenderType.Conditional :
394 if (i < 0x3005 || i > 0xFF70)
395 return ExtenderType.None;
400 return ExtenderType.Simple;
402 return ExtenderType.Conditional;
405 return ExtenderType.Voiced;
409 return ExtenderType.None;
411 case 0x3005: // LAMESPEC: see above.
412 return ExtenderType.Buggy;
413 case 0x3031: // LAMESPEC: see above.
414 case 0x3032: // LAMESPEC: see above.
417 return ExtenderType.Simple;
420 return ExtenderType.Voiced;
422 return ExtenderType.Conditional;
424 return ExtenderType.None;
428 static byte ToDashTypeValue (ExtenderType ext, COpt opt)
430 if ((opt & COpt.IgnoreNonSpace) != 0) // LAMESPEC: huh, why?
433 case ExtenderType.None:
435 case ExtenderType.Conditional:
442 int FilterExtender (int i, ExtenderType ext, COpt opt)
444 if (ext == ExtenderType.Conditional &&
445 Uni.HasSpecialWeight ((char) i)) {
446 bool half = IsHalfKana ((char) i, opt);
447 bool katakana = !Uni.IsHiragana ((char) i);
448 switch (Level1 (i) & 7) {
450 return half ? 0xFF71 : katakana ? 0x30A2 : 0x3042;
452 return half ? 0xFF72 : katakana ? 0x30A4 : 0x3044;
454 return half ? 0xFF73 : katakana ? 0x30A6 : 0x3046;
456 return half ? 0xFF74 : katakana ? 0x30A8 : 0x3048;
458 return half ? 0xFF75 : katakana ? 0x30AA : 0x304A;
464 static bool IsIgnorable (int i, COpt opt)
466 return Uni.IsIgnorable (i, (byte) (1 +
467 ((opt & COpt.IgnoreSymbols) != 0 ? 2 : 0) +
468 ((opt & COpt.IgnoreNonSpace) != 0 ? 4 : 0)));
474 return i / 8 >= unsafeFlags.Length ? true : (unsafeFlags [i / 8] & (1 << (i % 8))) == 0;
479 public SortKey GetSortKey (string s)
481 return GetSortKey (s, CompareOptions.None);
484 public SortKey GetSortKey (string s, CompareOptions options)
486 return GetSortKey (s, 0, s.Length, options);
489 public SortKey GetSortKey (string s, int start, int length, CompareOptions options)
491 SortKeyBuffer buf = new SortKeyBuffer (lcid);
492 buf.Initialize (options, lcid, s, frenchSort);
493 int end = start + length;
494 GetSortKey (s, start, end, buf, options);
495 return buf.GetResultAndReset ();
498 void GetSortKey (string s, int start, int end,
499 SortKeyBuffer buf, CompareOptions opt)
501 PreviousInfo prev = new PreviousInfo (false);
503 for (int n = start; n < end; n++) {
506 ExtenderType ext = GetExtenderType (i);
507 if (ext != ExtenderType.None) {
508 i = FilterExtender (prev.Code, ext, opt);
510 FillSortKeyRaw (i, ext, buf, opt);
511 else if (prev.SortKey != null) {
512 byte [] b = prev.SortKey;
516 b [2] != 1 ? b [2] : Level2 (i, ext),
517 b [3] != 1 ? b [3] : Uni.Level3 (i));
519 // otherwise do nothing.
520 // (if the extender is the first char
521 // in the string, then just ignore.)
525 if (IsIgnorable (i, opt))
527 i = FilterOptions (i, opt);
529 Contraction ct = GetContraction (s, n, end);
531 if (ct.Replacement != null) {
532 GetSortKey (ct.Replacement, 0, ct.Replacement.Length, buf, opt);
534 byte [] b = ct.SortKey;
538 b [2] != 1 ? b [2] : Level2 (i, ext),
539 b [3] != 1 ? b [3] : Uni.Level3 (i));
543 n += ct.Source.Length - 1;
546 if (!Uni.IsIgnorableNonSpacing (i))
548 FillSortKeyRaw (i, ExtenderType.None, buf, opt);
553 void FillSortKeyRaw (int i, ExtenderType ext,
554 SortKeyBuffer buf, CompareOptions opt)
556 if (0x3400 <= i && i <= 0x4DB5) {
557 int diff = i - 0x3400;
558 buf.AppendCJKExtension (
559 (byte) (0x10 + diff / 254),
560 (byte) (diff % 254 + 2));
564 UnicodeCategory uc = char.GetUnicodeCategory ((char) i);
566 case UnicodeCategory.PrivateUse:
567 int diff = i - 0xE000;
569 (byte) (0xE5 + diff / 254),
570 (byte) (diff % 254 + 2),
574 case UnicodeCategory.Surrogate:
575 FillSurrogateSortKeyRaw (i, buf);
579 byte level2 = Level2 (i, ext);
580 if (Uni.HasSpecialWeight ((char) i)) {
581 byte level1 = Level1 (i);
587 Uni.IsJapaneseSmallLetter ((char) i),
588 ToDashTypeValue (ext, opt),
589 !Uni.IsHiragana ((char) i),
590 IsHalfKana ((char) i, opt)
592 if ((opt & COpt.IgnoreNonSpace) == 0 && ext == ExtenderType.Voiced)
593 // Append voice weight
594 buf.AppendNormal (1, 1, 1, 0);
604 void FillSurrogateSortKeyRaw (int i, SortKeyBuffer buf)
613 lower = (byte) ((i == 0xD800) ? 0x3E : 0x3F);
614 } else if (0xD840 <= i && i < 0xD880) {
618 } else if (0xDB80 <= i && i < 0xDC00) {
619 diffbase = 0xDB80 - 0x40;
623 diffbase = 0xDC00 - 0xF8 + 2;
627 int diff = i - diffbase;
630 (byte) (segment + diff / 254),
631 (byte) (diff % 254 + 2),
640 public int Compare (string s1, string s2)
642 return Compare (s1, s2, CompareOptions.None);
645 public int Compare (string s1, string s2, CompareOptions options)
647 return Compare (s1, 0, s1.Length, s2, 0, s2.Length, options);
650 private int CompareOrdinal (string s1, int idx1, int len1,
651 string s2, int idx2, int len2)
653 int min = len1 < len2 ? len1 : len2;
654 int end1 = idx1 + min;
655 int end2 = idx2 + min;
656 if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
657 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
658 for (int i1 = idx1, i2 = idx2;
659 i1 < end1 && i2 < end2; i1++, i2++)
660 if (s1 [i1] != s2 [i2])
661 return s1 [i1] - s2 [i2];
662 return len1 == len2 ? 0 :
663 len1 == min ? - 1 : 1;
666 public int Compare (string s1, int idx1, int len1,
667 string s2, int idx2, int len2, CompareOptions options)
669 // quick equality check
670 if (idx1 == idx2 && len1 == len2 &&
671 Object.ReferenceEquals (s1, s2))
673 // FIXME: this should be done inside Compare() at
675 // int ord = CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
678 if (options == CompareOptions.Ordinal)
679 return CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
681 #if false // stable easy version, depends on GetSortKey().
682 SortKey sk1 = GetSortKey (s1, idx1, len1, options);
683 SortKey sk2 = GetSortKey (s2, idx2, len2, options);
684 byte [] d1 = sk1.KeyData;
685 byte [] d2 = sk2.KeyData;
686 int len = d1.Length > d2.Length ? d2.Length : d1.Length;
687 for (int i = 0; i < len; i++)
688 if (d1 [i] != d2 [i])
689 return d1 [i] < d2 [i] ? -1 : 1;
690 return d1.Length == d2.Length ? 0 : d1.Length < d2.Length ? -1 : 1;
692 PreviousInfo prev1 = new PreviousInfo (false);
693 byte [] sk1 = new byte [4];
694 byte [] sk2 = new byte [4];
696 int ret = CompareInternal (options, s1, idx1, len1, s2, idx2, len2, out dummy, out dummy2, true, ref prev1, sk1, sk2);
697 return ret == 0 ? 0 : ret < 0 ? -1 : 1;
701 int CompareInternal (COpt opt, string s1, int idx1, int len1, string s2,
703 out bool targetConsumed, out bool sourceConsumed,
704 bool skipHeadingExtenders, ref PreviousInfo prev1,
705 byte [] charSortKey, byte [] charSortKey2)
709 int end1 = idx1 + len1;
710 int end2 = idx2 + len2;
711 targetConsumed = false;
712 sourceConsumed = false;
713 PreviousInfo prev2 = new PreviousInfo (false);
715 // It holds final result that comes from the comparison
716 // at level 2 or lower. Even if Compare() found the
717 // difference at level 2 or lower, it still has to
718 // continue level 1 comparison. FinalResult is used
719 // when there was no further differences.
721 // It holds the comparison level to do. It starts from
722 // 5, and becomes 1 when we do primary-only comparison.
723 int currentLevel = 5;
730 // Skip heading extenders
731 if (skipHeadingExtenders) {
732 for (; idx1 < end1; idx1++)
733 if (GetExtenderType (s1 [idx1]) == ExtenderType.None)
735 for (; idx2 < end2; idx2++)
736 if (GetExtenderType (s2 [idx2]) == ExtenderType.None)
740 ExtenderType ext1 = ExtenderType.None;
741 ExtenderType ext2 = ExtenderType.None;
743 int quickCheckPos1 = idx1;
744 int quickCheckPos2 = idx2;
745 bool stringSort = (opt & COpt.StringSort) != 0;
746 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
747 Escape escape1 = new Escape ();
748 Escape escape2 = new Escape ();
751 for (; idx1 < end1; idx1++)
752 if (!IsIgnorable (s1 [idx1], opt))
754 for (; idx2 < end2; idx2++)
755 if (!IsIgnorable (s2 [idx2], opt))
759 if (escape1.Source == null)
762 start1 = escape1.Start;
763 idx1 = escape1.Index;
765 quickCheckPos1 = escape1.Optional;
766 escape1.Source = null;
770 if (escape2.Source == null)
773 start2 = escape2.Start;
774 idx2 = escape2.Index;
776 quickCheckPos2 = escape2.Optional;
777 escape2.Source = null;
781 // If comparison is unstable, then this part is one of the most doubtful part.
782 // Here we run quick codepoint comparison and run back to "stable" area to
783 // compare characters.
785 // Strictly to say, even the first character
786 // could be compared here, but it messed
787 // backward step, so I just avoided mess.
788 if (quickCheckPos1 < idx1 && quickCheckPos2 < idx2) {
789 while (idx1 < end1 && idx2 < end2 &&
790 s1 [idx1] == s2 [idx2]) {
794 if (idx1 == end1 || idx2 == end2)
795 continue; // check replacement
797 int backwardEnd1 = quickCheckPos1;
798 int backwardEnd2 = quickCheckPos2;
799 quickCheckPos1 = idx1;
800 quickCheckPos2 = idx2;
805 for (;idx1 > backwardEnd1; idx1--)
806 if (Category (s1 [idx1]) != 1)
808 for (;idx2 > backwardEnd2; idx2--)
809 if (Category (s2 [idx2]) != 1)
811 for (;idx1 > backwardEnd1; idx1--)
812 if (IsSafe (s1 [idx1]))
814 for (;idx2 > backwardEnd2; idx2--)
815 if (IsSafe (s2 [idx2]))
824 int i1 = FilterOptions (s1 [idx1], opt);
825 int i2 = FilterOptions (s2 [idx2], opt);
826 bool special1 = false;
827 bool special2 = false;
829 // If current character is an expander, then
830 // repeat the previous character.
831 ext1 = GetExtenderType (i1);
832 if (ext1 != ExtenderType.None) {
833 if (prev1.Code < 0) {
834 if (prev1.SortKey == null) {
842 i1 = FilterExtender (prev1.Code, ext1, opt);
844 ext2 = GetExtenderType (i2);
845 if (ext2 != ExtenderType.None) {
846 if (prev2.Code < 0) {
847 if (prev2.SortKey == null) {
855 i2 = FilterExtender (prev2.Code, ext2, opt);
858 byte cat1 = Category (i1);
859 byte cat2 = Category (i2);
861 // Handle special weight characters
862 if (!stringSort && currentLevel > 4) {
864 lv5At1 = escape1.Source != null ?
865 escape1.Index - escape1.Start :
867 // here Windows has a bug that it does
868 // not consider thirtiary weight.
869 lv5Value1 = Level1 (i1) << 8 + Uni.Level3 (i1);
874 lv5At2 = escape2.Source != null ?
875 escape2.Index - escape2.Start :
877 // here Windows has a bug that it does
878 // not consider thirtiary weight.
879 lv5Value2 = Level1 (i2) << 8 + Uni.Level3 (i2);
883 if (cat1 == 6 || cat2 == 6) {
889 Contraction ct1 = null;
890 if (ext1 == ExtenderType.None)
891 ct1 = GetContraction (s1, idx1, end1);
896 else if (ct1 != null) {
897 offset1 = ct1.Source.Length;
898 if (ct1.SortKey != null) {
900 for (int i = 0; i < ct1.SortKey.Length; i++)
901 sk1 [i] = ct1.SortKey [i];
905 else if (escape1.Source == null) {
907 escape1.Start = start1;
908 escape1.Index = cur1 + ct1.Source.Length;
910 escape1.Optional = quickCheckPos1;
911 s1 = ct1.Replacement;
922 sk1 [1] = Level1 (i1);
923 if (!ignoreNonSpace && currentLevel > 1)
924 sk1 [2] = Level2 (i1, ext1);
925 if (currentLevel > 2)
926 sk1 [3] = Uni.Level3 (i1);
927 if (currentLevel > 3)
928 special1 = Uni.HasSpecialWeight ((char) i1);
933 Contraction ct2 = null;
934 if (ext2 == ExtenderType.None)
935 ct2 = GetContraction (s2, idx2, end2);
939 else if (ct2 != null) {
940 idx2 += ct2.Source.Length;
941 if (ct2.SortKey != null) {
943 for (int i = 0; i < ct2.SortKey.Length; i++)
944 sk2 [i] = ct2.SortKey [i];
948 else if (escape2.Source == null) {
950 escape2.Start = start2;
951 escape2.Index = cur2 + ct2.Source.Length;
953 escape2.Optional = quickCheckPos2;
954 s2 = ct2.Replacement;
965 sk2 [1] = Level1 (i2);
966 if (!ignoreNonSpace && currentLevel > 1)
967 sk2 [2] = Level2 (i2, ext2);
968 if (currentLevel > 2)
969 sk2 [3] = Uni.Level3 (i2);
970 if (currentLevel > 3)
971 special2 = Uni.HasSpecialWeight ((char) i2);
977 // Add offset here so that it does not skip
978 // idx1 while s2 has a replacement.
981 // add diacritical marks in s1 here
982 if (!ignoreNonSpace) {
983 while (idx1 < end1) {
984 if (Category (s1 [idx1]) != 1)
988 sk1 [2] = (byte) (sk1 [2] +
989 Level2 (s1 [idx1], ExtenderType.None));
993 // add diacritical marks in s2 here
994 while (idx2 < end2) {
995 if (Category (s2 [idx2]) != 1)
999 sk2 [2] = (byte) (sk2 [2] +
1000 Level2 (s2 [idx2], ExtenderType.None));
1005 int ret = sk1 [0] - sk2 [0];
1006 ret = ret != 0 ? ret : sk1 [1] - sk2 [1];
1009 if (currentLevel == 1)
1011 if (!ignoreNonSpace) {
1012 ret = sk1 [2] - sk2 [2];
1015 currentLevel = frenchSort ? 2 : 1;
1019 if (currentLevel == 2)
1021 ret = sk1 [3] - sk2 [3];
1027 if (currentLevel == 3)
1029 if (special1 != special2) {
1030 finalResult = special1 ? 1 : -1;
1035 ret = CompareFlagPair (
1036 !Uni.IsJapaneseSmallLetter ((char) i1),
1037 !Uni.IsJapaneseSmallLetter ((char) i2));
1038 ret = ret != 0 ? ret :
1039 ToDashTypeValue (ext1, opt) -
1040 ToDashTypeValue (ext2, opt);
1041 ret = ret != 0 ? ret : CompareFlagPair (
1042 Uni.IsHiragana ((char) i1),
1043 Uni.IsHiragana ((char) i2));
1044 ret = ret != 0 ? ret : CompareFlagPair (
1045 !IsHalfKana ((char) i1, opt),
1046 !IsHalfKana ((char) i2, opt));
1055 // If there were only level 3 or lower differences,
1056 // then we still have to find diacritical differences
1058 if (!ignoreNonSpace &&
1059 finalResult != 0 && currentLevel > 2) {
1060 while (idx1 < end1 && idx2 < end2) {
1061 if (!Uni.IsIgnorableNonSpacing (s1 [idx1]))
1063 if (!Uni.IsIgnorableNonSpacing (s2 [idx2]))
1065 finalResult = Level2 (FilterOptions (s1 [idx1], opt), ext1) - Level2 (FilterOptions (s2 [idx2], opt), ext2);
1066 if (finalResult != 0)
1070 // they should work only for the first character
1071 ext1 = ExtenderType.None;
1072 ext2 = ExtenderType.None;
1075 if (currentLevel == 1 && finalResult != 0) {
1077 if (Uni.IsIgnorableNonSpacing (s1 [idx1]))
1080 if (Uni.IsIgnorableNonSpacing (s2 [idx2]))
1083 // we still have to handle level 5
1084 if (finalResult == 0) {
1085 finalResult = lv5At1 - lv5At2;
1086 if (finalResult == 0)
1087 finalResult = lv5Value1 - lv5Value2;
1089 if (finalResult == 0) {
1091 targetConsumed = true;
1093 sourceConsumed = true;
1095 return idx1 != end1 ? 1 : idx2 == end2 ? finalResult : -1;
1098 int CompareFlagPair (bool b1, bool b2)
1100 return b1 == b2 ? 0 : b1 ? 1 : -1;
1105 #region IsPrefix() and IsSuffix()
1107 public bool IsPrefix (string src, string target, CompareOptions opt)
1109 return IsPrefix (src, target, 0, src.Length, opt);
1112 public bool IsPrefix (string s, string target, int start, int length, CompareOptions opt)
1114 if (target.Length == 0)
1116 PreviousInfo prev = new PreviousInfo (false);
1117 byte [] sk1 = new byte [4];
1118 byte [] sk2 = new byte [4];
1119 return IsPrefix (opt, s, target, start, length, true, ref prev, sk1, sk2);
1122 bool IsPrefix (COpt opt, string s, string target, int start, int length, bool skipHeadingExtenders, ref PreviousInfo prev, byte [] sk1, byte [] sk2)
1124 bool consumed, dummy;
1125 CompareInternal (opt, s, start, length,
1126 target, 0, target.Length,
1127 out consumed, out dummy, skipHeadingExtenders,
1128 ref prev, sk1, sk2);
1134 public bool IsSuffix (string src, string target, CompareOptions opt)
1136 return IsSuffix (src, target, src.Length - 1, src.Length, opt);
1139 public bool IsSuffix (string s, string target, int start, int length, CompareOptions opt)
1141 if (target.Length == 0)
1143 int last = LastIndexOf (s, target, start, length, opt);
1144 return last >= 0 && Compare (s, last, s.Length - last, target, 0, target.Length, opt) == 0;
1146 // quick check : simple codepoint comparison
1147 if (s.Length >= target.Length) {
1149 int se = start - length;
1150 for (int i = target.Length - 1; si >= se && i >= 0; i--, si--)
1151 if (s [si] != target [i])
1153 if (si == start + target.Length)
1157 PreviousInfo prev = new PreviousInfo (false);
1158 byte [] sk1 = new byte [4];
1159 byte [] sk2 = new byte [4];
1160 return IsSuffix (opt, s, target, start, length, ref prev, sk1, sk2);
1164 bool IsSuffix (COpt opt, string s, string t, int start, int length, ref PreviousInfo prev, byte [] sk1, byte [] sk2)
1167 for (;tstart < t.Length; tstart++)
1168 if (!IsIgnorable (t [tstart], opt))
1170 if (tstart == t.Length)
1171 return true; // as if target is String.Empty.
1174 // This is still not working. If it is not likely to get working, then just remove it.
1176 int send = start - length;
1177 int ti = t.Length - 1;
1180 int sStep = start + 1;
1181 int tStep = t.Length;
1182 bool sourceConsumed, targetConsumed;
1184 for (; send < si; si--)
1185 if (!IsIgnorable (s [si]))
1187 for (; tend < ti; ti--)
1188 if (!IsIgnorable (t [ti]))
1192 for (; send < si; si--)
1193 if (IsSafe (s [si]))
1195 for (; tend < ti; ti--)
1196 if (IsSafe (t [ti]))
1198 Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, length, t, ti, tstart, sStep - si, tStep - ti);
1199 if (CompareInternal (opt, s, si, sStep - si,
1201 out targetConsumed, out sourceConsumed,
1213 // FIXME: it is not efficient for very long target.
1214 bool sourceConsumed, targetConsumed;
1215 int mismatchCount = 0;
1216 for (int i = 0; i < length; i++) {
1217 prev = new PreviousInfo (false); // prev.Reset();
1219 int ret = CompareInternal (opt, s, start - i, i + 1,
1220 t, tstart, t.Length - tstart,
1222 out sourceConsumed, true, ref prev,
1226 if (!sourceConsumed && targetConsumed)
1228 if (!targetConsumed) {
1230 if (mismatchCount > Uni.MaxExpansionLength)
1231 // The largest length of an
1232 // expansion is 3, so if the
1233 // target was not consumed more
1234 // than 3 times, then the tail
1235 // character does not match.
1245 #region IndexOf() / LastIndexOf()
1247 public int IndexOf (string s, string target, CompareOptions opt)
1249 return IndexOf (s, target, 0, s.Length, opt);
1252 public int IndexOf (string s, string target, int start, int length, CompareOptions opt)
1254 PreviousInfo prev = new PreviousInfo (false);
1255 byte [] checkedFlags = s.Length > 50 ? new byte [16] : null;
1256 byte [] targetSortKey = new byte [4];
1257 byte [] sk1 = new byte [4];
1258 byte [] sk2 = new byte [4];
1260 return IndexOf (opt, s, target, start, length,
1261 checkedFlags, targetSortKey, ref prev, sk1, sk2);
1264 public int IndexOf (string s, char target, CompareOptions opt)
1266 return IndexOf (s, target, 0, s.Length, opt);
1269 public int IndexOf (string s, char target, int start, int length, CompareOptions opt)
1271 PreviousInfo prev = new PreviousInfo (false);
1272 byte [] sk1 = new byte [4];
1273 byte [] sk2 = new byte [4];
1274 byte [] checkedFlags = s.Length > 50 ? new byte [16] : null;
1275 byte [] targetSortKey = new byte [4];
1277 // If target is contraction, then use string search.
1278 Contraction ct = GetContraction (target);
1280 if (ct.Replacement != null)
1281 return IndexOf (opt, s, ct.Replacement,
1282 start, length, checkedFlags, targetSortKey, ref prev, sk1, sk2);
1284 return IndexOfSortKey (opt, s, start, length, ct.SortKey, char.MinValue, -1, true, checkedFlags, ref prev, sk1);
1286 int ti = FilterOptions ((int) target, opt);
1287 targetSortKey [0] = Category (ti);
1288 targetSortKey [1] = Level1 (ti);
1289 if ((opt & COpt.IgnoreNonSpace) == 0)
1291 Level2 (ti, ExtenderType.None);
1292 targetSortKey [3] = Uni.Level3 (ti);
1293 return IndexOfSortKey (opt, s, start, length,
1294 targetSortKey, target, ti,
1295 !Uni.HasSpecialWeight ((char) ti), checkedFlags, ref prev, sk1);
1299 // Searches target byte[] keydata
1300 int IndexOfSortKey (COpt opt, string s, int start, int length, byte [] sortkey, char target, int ti, bool noLv4, byte [] checkedFlags, ref PreviousInfo prev, byte [] sk)
1302 int end = start + length;
1307 if (MatchesForward (opt, s, ref idx, end, ti, sortkey, noLv4, checkedFlags, ref prev, sk))
1313 // Searches string. Search head character (or keydata when
1314 // the head is contraction sortkey) and try IsPrefix().
1315 int IndexOf (COpt opt, string s, string target, int start, int length, byte [] checkedFlags, byte [] targetSortKey, ref PreviousInfo prev, byte [] sk1, byte [] sk2)
1318 for (; tidx < target.Length; tidx++)
1319 if (!IsIgnorable (target [tidx], opt))
1321 if (tidx == target.Length)
1323 Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1324 string replace = ct != null ? ct.Replacement : null;
1325 byte [] sk = replace == null ? targetSortKey : null;
1327 char tc = char.MinValue;
1329 if (ct != null && sk != null) {
1330 for (int i = 0; i < ct.SortKey.Length; i++)
1331 sk [i] = ct.SortKey [i];
1332 } else if (sk != null) {
1334 ti = FilterOptions (target [tidx], opt);
1335 sk [0] = Category (ti);
1336 sk [1] = Level1 (ti);
1337 if ((opt & COpt.IgnoreNonSpace) == 0)
1338 sk [2] = Level2 (ti, ExtenderType.None);
1339 sk [3] = Uni.Level3 (ti);
1340 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1343 for (tidx++; tidx < target.Length; tidx++) {
1344 if (Category (target [tidx]) != 1)
1348 sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1354 if (replace != null)
1355 idx = IndexOf (opt, s, replace, start, length, checkedFlags, targetSortKey, ref prev, sk1, sk2);
1357 idx = IndexOfSortKey (opt, s, start, length, sk, tc, ti, noLv4, checkedFlags, ref prev, sk1);
1360 length -= idx - start;
1362 if (IsPrefix (opt, s, target, start, length, false, ref prev, sk1, sk2))
1364 Contraction cts = GetContraction (s, start, length);
1366 start += cts.Source.Length;
1367 length -= cts.Source.Length;
1372 } while (length > 0);
1377 // There are the same number of IndexOf() related methods,
1378 // with the same functionalities for each.
1381 public int LastIndexOf (string s, string target, CompareOptions opt)
1383 return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1386 public int LastIndexOf (string s, string target, int start, int length, CompareOptions opt)
1388 PreviousInfo prev = new PreviousInfo (false);
1389 byte [] sk1 = new byte [4];
1390 byte [] sk2 = new byte [4];
1391 byte [] checkedFlags = s.Length > 50 ? new byte [16] : null;
1392 byte [] targetSortKey = new byte [4];
1393 return LastIndexOf (opt, s, target, start, length,
1394 checkedFlags, targetSortKey, ref prev, sk1, sk2);
1397 public int LastIndexOf (string s, char target, CompareOptions opt)
1399 return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1402 public int LastIndexOf (string s, char target, int start, int length, CompareOptions opt)
1404 PreviousInfo prev = new PreviousInfo (false);
1405 byte [] sk1 = new byte [4];
1406 byte [] sk2 = new byte [4];
1407 byte [] checkedFlags = s.Length > 50 ? new byte [16] : null;
1408 byte [] targetSortKey = new byte [4];
1410 // If target is contraction, then use string search.
1411 Contraction ct = GetContraction (target);
1413 if (ct.Replacement != null)
1414 return LastIndexOf (opt, s,
1415 ct.Replacement, start, length,
1416 checkedFlags, targetSortKey, ref prev, sk1, sk2);
1418 return LastIndexOfSortKey (opt, s, start,
1419 start, length, ct.SortKey,
1420 char.MinValue, -1, true,
1421 checkedFlags, ref prev, sk1);
1424 int ti = FilterOptions ((int) target, opt);
1425 targetSortKey [0] = Category (ti);
1426 targetSortKey [1] = Level1 (ti);
1427 if ((opt & COpt.IgnoreNonSpace) == 0)
1428 targetSortKey [2] = Level2 (ti, ExtenderType.None);
1429 targetSortKey [3] = Uni.Level3 (ti);
1430 return LastIndexOfSortKey (opt, s, start, start,
1431 length, targetSortKey, target,
1432 ti, !Uni.HasSpecialWeight ((char) ti),
1433 checkedFlags, ref prev, sk1);
1437 // Searches target byte[] keydata
1438 int LastIndexOfSortKey (COpt opt, string s, int start, int orgStart, int length, byte [] sortkey, char target, int ti, bool noLv4, byte [] checkedFlags, ref PreviousInfo prev, byte [] sk)
1440 int end = start - length;
1444 if (MatchesBackward (opt, s, ref idx, end, orgStart,
1445 ti, sortkey, noLv4, checkedFlags, ref prev, sk))
1451 // Searches string. Search head character (or keydata when
1452 // the head is contraction sortkey) and try IsPrefix().
1453 int LastIndexOf (COpt opt, string s, string target, int start, int length, byte [] checkedFlags, byte [] targetSortKey, ref PreviousInfo prev, byte [] sk1, byte [] sk2)
1455 int orgStart = start;
1457 for (; tidx < target.Length; tidx++)
1458 if (!IsIgnorable (target [tidx], opt))
1460 if (tidx == target.Length)
1462 Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1463 string replace = ct != null ? ct.Replacement : null;
1464 byte [] sk = replace == null ? targetSortKey : null;
1467 char tc = char.MinValue;
1469 if (ct != null && sk != null) {
1470 for (int i = 0; i < ct.SortKey.Length; i++)
1471 sk [i] = ct.SortKey [i];
1472 } else if (sk != null) {
1474 ti = FilterOptions (target [tidx], opt);
1475 sk [0] = Category (ti);
1476 sk [1] = Level1 (ti);
1477 if ((opt & COpt.IgnoreNonSpace) == 0)
1478 sk [2] = Level2 (ti, ExtenderType.None);
1479 sk [3] = Uni.Level3 (ti);
1480 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1483 for (tidx++; tidx < target.Length; tidx++) {
1484 if (Category (target [tidx]) != 1)
1488 sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1495 if (replace != null)
1496 idx = LastIndexOf (opt, s, replace,
1497 start, length, checkedFlags,
1498 targetSortKey, ref prev, sk1, sk2);
1500 idx = LastIndexOfSortKey (opt, s, start, orgStart, length, sk, tc, ti, noLv4, checkedFlags, ref prev, sk1);
1503 length -= start - idx;
1506 if (IsPrefix (opt, s, target, idx, orgStart - idx + 1, false, ref prev, sk1, sk2)) {
1507 for (;idx < orgStart; idx++)
1508 if (!IsIgnorable (s [idx], opt))
1512 Contraction cts = GetContraction (s, idx, orgStart - idx + 1);
1514 start -= cts.Source.Length;
1515 length -= cts.Source.Length;
1520 } while (length > 0);
1524 private bool MatchesForward (COpt opt, string s, ref int idx, int end, int ti, byte [] sortkey, bool noLv4, byte [] checkedFlags, ref PreviousInfo prev, byte [] sk)
1527 if (checkedFlags != null && si < 128 && (checkedFlags [si / 8] & (1 << (si % 8))) != 0) {
1531 ExtenderType ext = GetExtenderType (s [idx]);
1532 Contraction ct = null;
1533 if (MatchesForwardCore (opt, s, ref idx, end, ti, sortkey, noLv4, ext, ref ct, checkedFlags, ref prev, sk))
1535 if (checkedFlags != null && ct == null && ext == ExtenderType.None && si < 128) {
1536 checkedFlags [si / 8] |= (byte) (1 << (si % 8));
1541 private bool MatchesForwardCore (COpt opt, string s, ref int idx, int end, int ti, byte [] sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, byte [] checkedFlags, ref PreviousInfo prev, byte [] charSortKey)
1543 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
1545 if (ext == ExtenderType.None)
1546 ct = GetContraction (s, idx, end);
1547 else if (prev.Code < 0) {
1548 if (prev.SortKey == null) {
1552 charSortKey = prev.SortKey;
1555 si = FilterExtender (prev.Code, ext, opt);
1556 // if lv4 exists, it never matches contraction
1558 idx += ct.Source.Length;
1561 if (ct.SortKey != null) {
1562 for (int i = 0; i < sortkey.Length; i++)
1563 charSortKey [i] = sortkey [i];
1565 prev.SortKey = charSortKey;
1567 // Here is the core of LAMESPEC
1568 // described at the top of the source.
1570 return MatchesForward (opt, ct.Replacement, ref dummy,
1571 ct.Replacement.Length, ti, sortkey, noLv4, checkedFlags, ref prev, charSortKey);
1575 si = FilterOptions (s [idx], opt);
1577 charSortKey [0] = Category (si);
1578 bool noMatch = false;
1579 if (sortkey [0] == charSortKey [0])
1580 charSortKey [1] = Level1 (si);
1583 if (!ignoreNonSpace && sortkey [1] == charSortKey [1])
1584 charSortKey [2] = Level2 (si, ext);
1585 else if (!ignoreNonSpace)
1588 for (; idx < end; idx++) {
1589 if (Category (s [idx]) != 1)
1594 charSortKey [3] = Uni.Level3 (si);
1595 if (charSortKey [0] != 1)
1598 for (; idx < end; idx++) {
1599 if (Category (s [idx]) != 1)
1603 if (charSortKey [2] == 0)
1604 charSortKey [2] = 2;
1605 charSortKey [2] = (byte) (charSortKey [2]
1606 + Level2 (s [idx], ExtenderType.None));
1609 return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4);
1612 private bool MatchesPrimitive (COpt opt, byte [] source, int si, ExtenderType ext, byte [] target, int ti, bool noLv4)
1614 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
1615 if (source [0] != target [0] ||
1616 source [1] != target [1] ||
1617 (!ignoreNonSpace && source [2] != target [2]) ||
1618 source [3] != target [3])
1620 if (noLv4 && (si < 0 || !Uni.HasSpecialWeight ((char) si)))
1624 // Since target can never be an extender, if the source
1625 // is an expander and it matters, then they never match.
1626 if (!ignoreNonSpace && ext == ExtenderType.Conditional)
1628 if (Uni.IsJapaneseSmallLetter ((char) si) !=
1629 Uni.IsJapaneseSmallLetter ((char) ti) ||
1630 ToDashTypeValue (ext, opt) !=
1631 // FIXME: we will have to specify correct value for target
1632 ToDashTypeValue (ExtenderType.None, opt) ||
1633 !Uni.IsHiragana ((char) si) !=
1634 !Uni.IsHiragana ((char) ti) ||
1635 IsHalfKana ((char) si, opt) !=
1636 IsHalfKana ((char) ti, opt))
1641 private bool MatchesBackward (COpt opt, string s, ref int idx, int end, int orgStart, int ti, byte [] sortkey, bool noLv4, byte [] checkedFlags, ref PreviousInfo prev, byte [] sk)
1644 if (checkedFlags != null && si < 128 && (checkedFlags [si / 8] & (1 << (si % 8))) != 0) {
1648 ExtenderType ext = GetExtenderType (s [idx]);
1649 Contraction ct = null;
1650 if (MatchesBackwardCore (opt, s, ref idx, end, orgStart, ti, sortkey, noLv4, ext, ref ct, checkedFlags, ref prev, sk))
1652 if (checkedFlags != null && ct == null && ext == ExtenderType.None && si < 128) {
1653 checkedFlags [si / 8] |= (byte) (1 << (si % 8));
1658 private bool MatchesBackwardCore (COpt opt, string s, ref int idx, int end, int orgStart, int ti, byte [] sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, byte [] checkedFlags, ref PreviousInfo prev, byte [] charSortKey)
1660 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
1663 // To handle extenders in source, we need to
1664 // check next _primary_ character.
1665 if (ext != ExtenderType.None) {
1666 byte diacritical = 0;
1667 for (int tmp = 0; ; tmp--) {
1668 if (tmp < 0) // heading extender
1670 if (IsIgnorable (s [tmp], opt))
1672 int tmpi = FilterOptions (s [tmp], opt);
1673 byte category = Category (tmpi);
1674 if (category == 1) {
1675 diacritical = Level2 (tmpi, ExtenderType.None);
1678 si = FilterExtender (tmpi, ext, opt);
1679 charSortKey [0] = category;
1680 charSortKey [1] = Level1 (si);
1681 if (!ignoreNonSpace)
1682 charSortKey [2] = Level2 (si, ext);
1683 charSortKey [3] = Uni.Level3 (si);
1684 if (ext != ExtenderType.Conditional &&
1687 (charSortKey [2] == 0) ?
1688 (byte) (diacritical + 2) :
1694 if (ext == ExtenderType.None)
1695 ct = GetContraction (s, idx, end);
1696 // if lv4 exists, it never matches contraction
1698 idx -= ct.Source.Length;
1701 if (ct.SortKey != null) {
1702 for (int i = 0; i < sortkey.Length; i++)
1703 charSortKey [i] = sortkey [i];
1705 prev.SortKey = charSortKey;
1707 // Here is the core of LAMESPEC
1708 // described at the top of the source.
1709 int dummy = ct.Replacement.Length - 1;
1710 return MatchesBackward (opt,
1711 ct.Replacement, ref dummy,
1712 dummy, -1, ti, sortkey, noLv4,
1713 checkedFlags, ref prev, charSortKey);
1715 } else if (ext == ExtenderType.None) {
1717 si = FilterOptions (s [idx], opt);
1719 bool noMatch = false;
1720 charSortKey [0] = Category (si);
1721 if (charSortKey [0] == sortkey [0])
1722 charSortKey [1] = Level1 (si);
1725 if (!ignoreNonSpace && charSortKey [1] == sortkey [1])
1726 charSortKey [2] = Level2 (si, ext);
1727 else if (!ignoreNonSpace)
1731 charSortKey [3] = Uni.Level3 (si);
1732 if (charSortKey [0] != 1)
1735 if (ext == ExtenderType.None) {
1736 for (int tmp = cur + 1; tmp < orgStart; tmp++) {
1737 if (Category (s [tmp]) != 1)
1741 if (charSortKey [2] == 0)
1742 charSortKey [2] = 2;
1743 charSortKey [2] = (byte) (charSortKey [2]
1744 + Level2 (s [tmp], ExtenderType.None));
1747 return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4);