internal class SimpleCollator
{
// this environment variable is for debugging quick check.
+#pragma warning disable 169, 414
static bool QuickCheckDisabled =
Environment.internalGetEnvironmentVariable (
"MONO_COLLATION_QUICK_CHECK_DISABLED") == "yes";
+#pragma warning restore 169, 414
unsafe internal struct Context
{
- public Context (CompareOptions opt, byte* alwaysMatchFlags, byte* neverMatchFlags, byte* buffer1, byte* buffer2, byte* prev1, bool quickCheckPossible)
+ public Context (CompareOptions opt, byte* alwaysMatchFlags, byte* neverMatchFlags, byte* buffer1, byte* buffer2, byte* prev1/*, bool quickCheckPossible*/)
{
Option = opt;
AlwaysMatchFlags = alwaysMatchFlags;
Buffer2 = buffer2;
PrevSortKey = prev1;
PrevCode = -1;
- QuickCheckPossible = quickCheckPossible;
+// QuickCheckPossible = quickCheckPossible;
}
public readonly CompareOptions Option;
public byte* Buffer2;
public int PrevCode;
public byte* PrevSortKey;
- public readonly bool QuickCheckPossible;
+// public readonly bool QuickCheckPossible;
public void ClearPrevInfo ()
{
new SimpleCollator (CultureInfo.InvariantCulture);
readonly TextInfo textInfo; // for ToLower().
- readonly bool frenchSort;
- unsafe readonly byte* cjkCatTable;
- unsafe readonly byte* cjkLv1Table;
readonly CodePointIndexer cjkIndexer;
- unsafe readonly byte* cjkLv2Table;
- readonly CodePointIndexer cjkLv2Indexer;
- readonly int lcid;
readonly Contraction [] contractions;
readonly Level2Map [] level2Maps;
-
// This flag marks characters as "unsafe", where the character
// could be used as part of a contraction (whose length > 1).
readonly byte [] unsafeFlags;
+ unsafe readonly byte* cjkCatTable;
+ unsafe readonly byte* cjkLv1Table;
+ unsafe readonly byte* cjkLv2Table;
+ readonly CodePointIndexer cjkLv2Indexer;
+ readonly int lcid;
+ readonly bool frenchSort;
+
+
const int UnsafeFlagLength = 0x300 / 8;
// readonly byte [] contractionFlags = new byte [16];
throw new SystemException (String.Format ("MONO internal error. Failed to get TailContraction. start = {0} end = {1} string = '{2}'", start, end, s));
for (int i = 0; i < clist.Length; i++) {
Contraction ct = clist [i];
- int diff = ct.Source [0] - s [end + 1];
- if (diff > 0)
- return null; // it's already sorted
- else if (diff < 0)
- continue;
char [] chars = ct.Source;
-
- bool match = true;
if (chars.Length > start - end)
continue;
- for (int n = 0; n < chars.Length; n++)
- if (s [start - n] != chars [chars.Length - 1 - n]) {
+ if (chars [chars.Length - 1] != s [start])
+ continue;
+
+ bool match = true;
+ for (int n = 0, spos = start - chars.Length + 1;
+ n < chars.Length;
+ n++, spos++) {
+ if (s [spos] != chars [n]) {
match = false;
break;
}
+ }
if (match)
return ct;
}
if (x != 0)
i = x;
}
+ if ((opt & COpt.OrdinalIgnoreCase) != 0)
+ i = textInfo.ToLower ((char) i);
if ((opt & COpt.IgnoreCase) != 0)
i = textInfo.ToLower ((char) i);
if ((opt & COpt.IgnoreKanaType) != 0)
{
byte* prevbuf = stackalloc byte [4];
ClearBuffer (prevbuf, 4);
- Context ctx = new Context (opt, null, null, null, null, prevbuf, false);
+ Context ctx = new Context (opt, null, null, null, null, prevbuf);
for (int n = start; n < end; n++) {
int i = s [n];
public int Compare (string s1, string s2)
{
- return Compare (s1, s2, CompareOptions.None);
+ return Compare (s1, 0, s1.Length, s2, 0, s2.Length, CompareOptions.None);
}
-
- public int Compare (string s1, string s2, CompareOptions options)
- {
- return Compare (s1, 0, s1.Length, s2, 0, s2.Length, options);
- }
-
+/*
private int CompareOrdinal (string s1, int idx1, int len1,
string s2, int idx2, int len2)
{
return len1 == len2 ? 0 :
len1 == min ? - 1 : 1;
}
-
- public unsafe int Compare (string s1, int idx1, int len1,
+*/
+ internal unsafe int Compare (string s1, int idx1, int len1,
string s2, int idx2, int len2, CompareOptions options)
{
- // quick equality check
- if (idx1 == idx2 && len1 == len2 &&
- Object.ReferenceEquals (s1, s2))
- return 0;
- // FIXME: this should be done inside Compare() at
- // any time.
-// int ord = CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
-// if (ord == 0)
-// return 0;
- if (options == CompareOptions.Ordinal)
- return CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
-#if NET_2_0
- if (options == CompareOptions.OrdinalIgnoreCase)
- return CompareOrdinalIgnoreCase (s1, idx1, len1, s2, idx2, len2);
-#endif
-
#if false // stable easy version, depends on GetSortKey().
SortKey sk1 = GetSortKey (s1, idx1, len1, options);
SortKey sk2 = GetSortKey (s2, idx2, len2, options);
byte* sk2 = stackalloc byte [4];
ClearBuffer (sk1, 4);
ClearBuffer (sk2, 4);
- Context ctx = new Context (options, null, null, sk1, sk2, null,
- QuickCheckPossible (s1, idx1, idx1 + len1, s2, idx2, idx2 + len2));
+ Context ctx = new Context (options, null, null, sk1, sk2, null);
+ // QuickCheckPossible (s1, idx1, idx1 + len1, s2, idx2, idx2 + len2));
bool dummy, dummy2;
int ret = CompareInternal (s1, idx1, len1, s2, idx2, len2, out dummy, out dummy2, true, false, ref ctx);
buffer [i] = 0;
}
+/*
bool QuickCheckPossible (string s1, int idx1, int end1,
string s2, int idx2, int end2)
{
+#if true
+ // looks like it is buggy and inefficient, so just disable it.
+ return false;
+#else
if (QuickCheckDisabled)
return false;
// if (s1.Length > 100 || s2.Length > 100)
if (s2 [i] < 0x20 && (s2 [i] < '\x9' || s2 [i] > '\xD') || s2 [i] >= 0x80 || s2 [i] == '-' || s2 [i] == '\'')
return false;
return true;
+#endif
}
+*/
unsafe int CompareInternal (string s1, int idx1, int len1, string s2,
int idx2, int len2,
sourceConsumed = false;
PreviousInfo prev2 = new PreviousInfo (false);
- if (opt == CompareOptions.None && ctx.QuickCheckPossible)
- return CompareQuick (s1, idx1, len1, s2, idx2, len2, out sourceConsumed, out targetConsumed, immediateBreakup);
+// if (opt == CompareOptions.None && ctx.QuickCheckPossible)
+// return CompareQuick (s1, idx1, len1, s2, idx2, len2, out sourceConsumed, out targetConsumed, immediateBreakup);
// It holds final result that comes from the comparison
// at level 2 or lower. Even if Compare() found the
byte cat2 = Category (i2);
// Handle special weight characters
- if (!stringSort && currentLevel > 4) {
- if (cat1 == 6) {
+ if (cat1 == 6) {
+ if (!stringSort && currentLevel == 5) {
lv5At1 = escape1.Source != null ?
escape1.Index - escape1.Start :
cur1 - start1;
// here Windows has a bug that it does
// not consider thirtiary weight.
lv5Value1 = Level1 (i1) << 8 + Uni.Level3 (i1);
- ctx.PrevCode = i1;
- idx1++;
}
- if (cat2 == 6) {
+ ctx.PrevCode = i1;
+ idx1++;
+ }
+ if (cat2 == 6) {
+ if (!stringSort && currentLevel == 5) {
lv5At2 = escape2.Source != null ?
escape2.Index - escape2.Start :
cur2 - start2;
// here Windows has a bug that it does
// not consider thirtiary weight.
lv5Value2 = Level1 (i2) << 8 + Uni.Level3 (i2);
- prev2.Code = i2;
- idx2++;
}
- if (cat1 == 6 || cat2 == 6) {
- currentLevel = 4;
- continue;
+ prev2.Code = i2;
+ idx2++;
+ }
+ if (cat1 == 6 || cat2 == 6) {
+ if (currentLevel == 5) {
+ if (lv5Value1 == lv5Value2) {
+ // There is not really difference here.
+ lv5At1 = lv5At2 = -1;
+ lv5Value1 = lv5Value2 = 0;
+ }
+ else
+ currentLevel = 4;
}
+ continue;
}
Contraction ct1 = null;
// idx1 while s2 has a replacement.
idx1 += offset1;
- // add diacritical marks in s1 here
if (!ignoreNonSpace) {
+ // add diacritical marks in s1 here
while (idx1 < end1) {
if (Category (s1 [idx1]) != 1)
break;
break;
}
}
- // we still have to handle level 5
+ // we still have to check level 5
if (finalResult == 0) {
- finalResult = lv5At1 - lv5At2;
- if (finalResult == 0)
- finalResult = lv5Value1 - lv5Value2;
+ if (lv5At1 < 0 && lv5At2 >= 0)
+ finalResult = -1;
+ else if (lv5At2 < 0 && lv5At1 >= 0)
+ finalResult = 1;
+ else {
+ finalResult = lv5At1 - lv5At2;
+ if (finalResult == 0)
+ finalResult = lv5Value1 - lv5Value2;
+ }
}
if (finalResult == 0) {
if (idx2 == end2)
byte* sk2 = stackalloc byte [4];
ClearBuffer (sk1, 4);
ClearBuffer (sk2, 4);
- Context ctx = new Context (opt, null, null, sk1, sk2, null,
- QuickCheckPossible (s, start, start + length, target, 0, target.Length));
+ Context ctx = new Context (opt, null, null, sk1, sk2, null);
+ //QuickCheckPossible (s, start, start + length, target, 0, target.Length));
return IsPrefix (s, target, start, length, true, ref ctx);
}
return IsSuffix (opt, s, target, start, length, ref prev, sk1, sk2);
*/
}
-
+/*
unsafe bool IsSuffix (string s, string t, int start, int length, ref Context ctx)
{
int tstart = 0;
return false;
#endif
}
-
+*/
#endregion
#region IndexOf() / LastIndexOf()
+ // string
+
public int IndexOf (string s, string target, CompareOptions opt)
{
return IndexOf (s, target, 0, s.Length, opt);
bool no = false;
for (int j = 0; j < target.Length; j++) {
if (testedTargetPos < j) {
- if (target [j] >= 0x80) {
+ char c = target [j];
+ if (c == 0 || c >= 0x80) {
testWasUnable = true;
return -1;
}
testedTargetPos = j;
}
if (testedSourcePos < i + j) {
- if (s [i + j] >= 0x80) {
+ char c = s [i + j];
+ if (c == 0 || c >= 0x80) {
testWasUnable = true;
return -1;
}
public unsafe int IndexOf (string s, string target, int start, int length, CompareOptions opt)
{
+ if (opt == CompareOptions.Ordinal)
+ throw new NotSupportedException ("Should not be reached");
+ if (opt == CompareOptions.OrdinalIgnoreCase)
+ throw new NotSupportedException ("Should not be reached");
if (opt == CompareOptions.None) {
bool testWasUnable;
int ret = QuickIndexOf (s, target, start, length, out testWasUnable);
ClearBuffer (targetSortKey, 4);
ClearBuffer (sk1, 4);
ClearBuffer (sk2, 4);
- Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
+ Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null);
return IndexOf (s, target, start, length,
targetSortKey, ref ctx);
}
+ // note that it wouldn't be used since ordinal IndexOf()
+ // should be directed to icall.
+ int IndexOfOrdinal (string s, string target, int start, int length)
+ {
+ if (target.Length == 0)
+ return 0;
+ else if (target.Length > length)
+ return -1;
+
+ int end = start + length - target.Length + 1;
+ for (int i = start; i < end; i++) {
+ bool no = false;
+ for (int j = 0; j < target.Length; j++) {
+ if (s [i + j] != target [j]) {
+ no = true;
+ break;
+ }
+ }
+ if (no)
+ continue;
+ return i;
+ }
+ return -1;
+ }
+
+ // char
+
public int IndexOf (string s, char target, CompareOptions opt)
{
return IndexOf (s, target, 0, s.Length, opt);
public unsafe int IndexOf (string s, char target, int start, int length, CompareOptions opt)
{
+ if (opt == CompareOptions.Ordinal)
+ throw new NotSupportedException ("Should not be reached");
+ if (opt == CompareOptions.OrdinalIgnoreCase)
+ throw new NotSupportedException ("Should not be reached");
byte* alwaysMatchFlags = stackalloc byte [16];
byte* neverMatchFlags = stackalloc byte [16];
byte* targetSortKey = stackalloc byte [4];
ClearBuffer (targetSortKey, 4);
ClearBuffer (sk1, 4);
ClearBuffer (sk2, 4);
- Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
+ Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null);
// If target is contraction, then use string search.
Contraction ct = GetContraction (target);
}
}
+ // note that it wouldn't be used since ordinal IndexOf()
+ // should be directed to icall.
+ int IndexOfOrdinal (string s, char target, int start, int length)
+ {
+ int end = start + length;
+ for (int i = start; i < end; i++)
+ if (s [i] == target)
+ return i;
+ return -1;
+ }
+
// Searches target byte[] keydata
unsafe int IndexOfSortKey (string s, int start, int length, byte* sortkey, char target, int ti, bool noLv4, ref Context ctx)
{
if (!IsIgnorable (target [tidx], opt))
break;
if (tidx == target.Length)
- return start;
+ // FIXME: this is likely a hack. A string that is consists of \0 differs from those of other ignorable characters.
+ return IndexOfOrdinal (target, '\0', 0, target.Length) >= 0 ? IndexOfOrdinal (s, target, start, length) : start;
Contraction ct = GetContraction (target, tidx, target.Length - tidx);
string replace = ct != null ? ct.Replacement : null;
byte* sk = replace == null ? targetSortKey : null;
}
//
- // There are the same number of IndexOf() related methods,
- // with the same functionalities for each.
+ // There are the same number of LastIndexOf() methods as
+ // IndexOf() related methods, with the same functionalities
+ // for each.
//
+ // string
+
public int LastIndexOf (string s, string target, CompareOptions opt)
{
return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
public unsafe int LastIndexOf (string s, string target, int start, int length, CompareOptions opt)
{
+ if (opt == CompareOptions.Ordinal)
+ return LastIndexOfOrdinal (s, target, start, length);
+ if (opt == CompareOptions.OrdinalIgnoreCase)
+ throw new NotSupportedException ("Should not be reached");
byte* alwaysMatchFlags = stackalloc byte [16];
byte* neverMatchFlags = stackalloc byte [16];
byte* targetSortKey = stackalloc byte [4];
ClearBuffer (sk1, 4);
ClearBuffer (sk2, 4);
// For some unknown reason CompareQuick() does not work fine w/ LastIndexOf().
- Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
+ Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null);
return LastIndexOf (s, target, start, length,
targetSortKey, ref ctx);
}
+ int LastIndexOfOrdinal (string s, string target, int start, int length)
+ {
+ if (target.Length == 0)
+ return start;
+ if (s.Length < target.Length || target.Length > length)
+ return -1;
+ int end = start - length + target.Length -1;
+ char tail = target [target.Length - 1];
+ for (int i = start; i > end;) {
+ if (s [i] != tail) {
+ i--;
+ continue;
+ }
+ int x = i - target.Length + 1;
+ i--;
+ bool mismatch = false;
+ for (int j = target.Length - 2; j >= 0; j--)
+ if (s [x + j] != target [j]) {
+ mismatch = true;
+ break;
+ }
+ if (mismatch)
+ continue;
+ return x;
+ }
+ return -1;
+ }
+
+ // char
+
public int LastIndexOf (string s, char target, CompareOptions opt)
{
return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
public unsafe int LastIndexOf (string s, char target, int start, int length, CompareOptions opt)
{
+ if (opt == CompareOptions.Ordinal)
+ throw new NotSupportedException ();
+ if (opt == CompareOptions.OrdinalIgnoreCase)
+ throw new NotSupportedException ();
byte* alwaysMatchFlags = stackalloc byte [16];
byte* neverMatchFlags = stackalloc byte [16];
byte* targetSortKey = stackalloc byte [4];
ClearBuffer (targetSortKey, 4);
ClearBuffer (sk1, 4);
ClearBuffer (sk2, 4);
- Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
+ Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null);
// If target is a replacement contraction, then use
// string search.
if (!IsIgnorable (target [tidx], opt))
break;
if (tidx == target.Length)
- return start;
+ // FIXME: this is likely a hack. A string that is consists of \0 differs from those of other ignorable characters.
+ return IndexOfOrdinal (target, '\0', 0, target.Length) >= 0 ? LastIndexOfOrdinal (s, target, start, length) : start;
Contraction ct = GetContraction (target, tidx, target.Length - tidx);
string replace = ct != null ? ct.Replacement : null;
byte* sk = replace == null ? targetSortKey : null;
// check next _primary_ character.
if (ext != ExtenderType.None) {
byte diacritical = 0;
- for (int tmp = 0; ; tmp--) {
+ for (int tmp = idx; ; tmp--) {
if (tmp < 0) // heading extender
return false;
if (IsIgnorable (s [tmp], opt))