//
-// SimpleCollator.cs
+// SimpleCollator.cs : the core collator implementation
//
-// This class will demonstrate CompareInfo functionality that will just work.
+// Author:
+// Atsushi Enomoto <atsushi@ximian.com>
+//
+// Copyright (C) 2005 Novell, Inc (http://www.novell.com)
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the
+// "Software"), to deal in the Software without restriction, including
+// without limitation the rights to use, copy, modify, merge, publish,
+// distribute, sublicense, and/or sell copies of the Software, and to
+// permit persons to whom the Software is furnished to do so, subject to
+// the following conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
//
using Uni = Mono.Globalization.Unicode.MSCompatUnicodeTable;
using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
+using COpt = System.Globalization.CompareOptions;
namespace Mono.Globalization.Unicode
{
internal class SimpleCollator
{
+ // this environment variable is for debugging quick check.
+ static bool QuickCheckDisabled =
+ Environment.GetEnvironmentVariable (
+ "MONO_COLLATION_QUICK_CHECK_DISABLED") == "yes";
+
+ unsafe internal struct Context
+ {
+ public Context (CompareOptions opt, byte* alwaysMatchFlags, byte* neverMatchFlags, byte* buffer1, byte* buffer2, byte* prev1, bool quickCheckPossible)
+ {
+ Option = opt;
+ AlwaysMatchFlags = alwaysMatchFlags;
+ NeverMatchFlags = neverMatchFlags;
+ Buffer1 = buffer1;
+ Buffer2 = buffer2;
+ PrevSortKey = prev1;
+ PrevCode = -1;
+ QuickCheckPossible = quickCheckPossible;
+ }
+
+ public readonly CompareOptions Option;
+ public readonly byte* NeverMatchFlags;
+ public readonly byte* AlwaysMatchFlags;
+ public byte* Buffer1;
+ public byte* Buffer2;
+ public int PrevCode;
+ public byte* PrevSortKey;
+ public readonly bool QuickCheckPossible;
+
+ public void ClearPrevInfo ()
+ {
+ PrevCode = -1;
+ PrevSortKey = null;
+ }
+ }
+
+ unsafe struct PreviousInfo
+ {
+ public int Code;
+ public byte* SortKey;
+
+ public PreviousInfo (bool dummy)
+ {
+ Code = -1;
+ SortKey = null;
+ }
+ }
+
+ struct Escape
+ {
+ public string Source;
+ public int Index;
+ public int Start;
+ public int End;
+ public int Optional;
+ }
+
static SimpleCollator invariant =
new SimpleCollator (CultureInfo.InvariantCulture);
- SortKeyBuffer buf;
- // CompareOptions expanded.
- bool ignoreNonSpace; // used in IndexOf()
- bool ignoreSymbols;
- bool ignoreWidth;
- bool ignoreCase;
- bool ignoreKanaType;
- TextInfo textInfo; // for ToLower().
- bool frenchSort;
+ readonly TextInfo textInfo; // for ToLower().
+ readonly bool frenchSort;
unsafe readonly byte* cjkCatTable;
unsafe readonly byte* cjkLv1Table;
readonly CodePointIndexer cjkIndexer;
// This flag marks characters as "unsafe", where the character
// could be used as part of a contraction (whose length > 1).
- readonly bool [] unsafeFlags;
+ readonly byte [] unsafeFlags;
- const int UnsafeFlagLength = 0x300;
+ const int UnsafeFlagLength = 0x300 / 8;
- // temporary sortkey buffer for index search/comparison
- byte [] charSortKey = new byte [4];
- byte [] charSortKey2 = new byte [4];
- byte [] charSortKeyIndexTarget = new byte [4];
+// readonly byte [] contractionFlags = new byte [16];
+
+ // This array is internally used inside IndexOf() to store
+ // "no need to check" ASCII characters.
+ //
+ // Now that it should be thread safe, this array is allocated
+ // at every time.
+// byte [] neverMatchFlags = new byte [128 / 8];
#region .ctor() and split functions
{
lcid = culture.LCID;
textInfo = culture.TextInfo;
- buf = new SortKeyBuffer (culture.LCID);
unsafe {
SetCJKTable (culture, ref cjkIndexer,
frenchSort = t.FrenchSort;
Uni.BuildTailoringTables (culture, t, ref contractions,
ref level2Maps);
- unsafeFlags = new bool [UnsafeFlagLength];
- foreach (Contraction c in contractions)
+ unsafeFlags = new byte [UnsafeFlagLength];
+ foreach (Contraction c in contractions) {
if (c.Source.Length > 1)
foreach (char ch in c.Source)
- unsafeFlags [(int) ch] = true;
+ unsafeFlags [(int) ch / 8 ]
+ |= (byte) (1 << ((int) ch & 7));
+// for (int i = 0; i < c.Source.Length; i++) {
+// int ch = c.Source [i];
+// if (ch > 127)
+// continue;
+// contractionFlags [ch / 8] |= (byte) (1 << (ch & 7));
+// }
+ }
+ if (lcid != 127)
+ foreach (Contraction c in invariant.contractions) {
+ if (c.Source.Length > 1)
+ foreach (char ch in c.Source)
+ unsafeFlags [(int) ch / 8 ]
+ |= (byte) (1 << ((int) ch & 7));
+// for (int i = 0; i < c.Source.Length; i++) {
+// int ch = c.Source [i];
+// if (ch > 127)
+// continue;
+// contractionFlags [ch / 8] |= (byte) (1 << (ch & 7));
+// }
+ }
// FIXME: Since tailorings are mostly for latin
// (and in some cases Cyrillic) characters, it would
/*
// dump tailoring table
-Console.WriteLine ("******** building table for {0} : c - {1} d - {2}",
+Console.WriteLine ("******** building table for {0} : contractions - {1} diacritical - {2}",
culture.LCID, contractions.Length, level2Maps.Length);
foreach (Contraction c in contractions) {
foreach (char cc in c.Source)
return ret;
}
- bool IsHalfKana (int cp)
- {
- return ignoreWidth || Uni.IsHalfWidthKana ((char) cp);
- }
-
- void SetOptions (CompareOptions options)
+ static bool IsHalfKana (int cp, COpt opt)
{
- this.ignoreNonSpace = (options & CompareOptions.IgnoreNonSpace) != 0;
- this.ignoreSymbols = (options & CompareOptions.IgnoreSymbols) != 0;
- this.ignoreWidth = (options & CompareOptions.IgnoreWidth) != 0;
- this.ignoreCase = (options & CompareOptions.IgnoreCase) != 0;
- this.ignoreKanaType = (options & CompareOptions.IgnoreKanaType) != 0;
- previousChar = previousChar2 = -1;
- previousSortKey = previousSortKey2 = null;
- escape1.Source = escape2.Source = null;
+ return (opt & COpt.IgnoreWidth) != 0 ||
+ Uni.IsHalfWidthKana ((char) cp);
}
Contraction GetContraction (string s, int start, int end)
{
+// int si = s [start];
+// if (si < 128 && (contractionFlags [si / 8] & (1 << (si & 7))) == 0)
+// return null;
Contraction c = GetContraction (s, start, end, contractions);
if (c != null || lcid == 127)
return c;
Contraction GetTailContraction (string s, int start, int end)
{
+// int si = s [end - 1];
+// if (si < 128 && (contractionFlags [si / 8] & (1 << (si & 7))) == 0)
+// return null;
Contraction c = GetTailContraction (s, start, end, contractions);
if (c != null || lcid == 127)
return c;
Contraction GetTailContraction (string s, int start, int end, Contraction [] clist)
{
+ if (start == end || end < -1 || start >= s.Length || s.Length <= end + 1)
+ throw new SystemException (String.Format ("MONO internal error. Failed to get TailContraction. start = {0} end = {1} string = '{2}'", start, end, s));
for (int i = 0; i < clist.Length; i++) {
Contraction ct = clist [i];
- int diff = ct.Source [0] - s [end];
+ int diff = ct.Source [0] - s [end + 1];
if (diff > 0)
return null; // it's already sorted
else if (diff < 0)
continue;
char [] chars = ct.Source;
- if (start - end + 1 < chars.Length)
- continue;
+
bool match = true;
- int offset = start - chars.Length + 1;
+ if (chars.Length > start - end)
+ continue;
for (int n = 0; n < chars.Length; n++)
- if (s [offset + n] != chars [n]) {
+ if (s [start - n] != chars [chars.Length - 1 - n]) {
match = false;
break;
}
return null;
}
- int FilterOptions (int i)
+ int FilterOptions (int i, COpt opt)
{
- if (ignoreWidth) {
+ if ((opt & COpt.IgnoreWidth) != 0) {
int x = Uni.ToWidthCompat (i);
if (x != 0)
i = x;
}
- if (ignoreCase)
+ if ((opt & COpt.IgnoreCase) != 0)
i = textInfo.ToLower ((char) i);
- if (ignoreKanaType)
+ if ((opt & COpt.IgnoreKanaType) != 0)
i = Uni.ToKanaTypeInsensitive (i);
return i;
}
- int previousChar = -1;
- byte [] previousSortKey = null;
- int previousChar2 = -1;
- byte [] previousSortKey2 = null;
-
enum ExtenderType {
None,
Simple,
}
}
- byte ToDashTypeValue (ExtenderType ext)
+ static byte ToDashTypeValue (ExtenderType ext, COpt opt)
{
- if (ignoreNonSpace) // LAMESPEC: huh, why?
+ if ((opt & COpt.IgnoreNonSpace) != 0) // LAMESPEC: huh, why?
return 3;
switch (ext) {
case ExtenderType.None:
}
}
- int FilterExtender (int i, ExtenderType ext)
+ int FilterExtender (int i, ExtenderType ext, COpt opt)
{
if (ext == ExtenderType.Conditional &&
Uni.HasSpecialWeight ((char) i)) {
- bool half = IsHalfKana ((char) i);
+ bool half = IsHalfKana ((char) i, opt);
bool katakana = !Uni.IsHiragana ((char) i);
switch (Level1 (i) & 7) {
case 2:
return i;
}
- bool IsIgnorable (int i)
+ static bool IsIgnorable (int i, COpt opt)
{
- return Uni.IsIgnorable (i) ||
- ignoreSymbols && Uni.IsIgnorableSymbol (i) ||
- ignoreNonSpace && Uni.IsIgnorableNonSpacing (i);
+ return Uni.IsIgnorable (i, (byte) (1 +
+ ((opt & COpt.IgnoreSymbols) != 0 ? 2 : 0) +
+ ((opt & COpt.IgnoreNonSpace) != 0 ? 4 : 0)));
+
}
bool IsSafe (int i)
{
- return i >= unsafeFlags.Length ? true : !unsafeFlags [i];
+ return i / 8 >= unsafeFlags.Length ? true : (unsafeFlags [i / 8] & (1 << (i % 8))) == 0;
}
#region GetSortKey()
public SortKey GetSortKey (string s, int start, int length, CompareOptions options)
{
- SetOptions (options);
-
+ SortKeyBuffer buf = new SortKeyBuffer (lcid);
buf.Initialize (options, lcid, s, frenchSort);
int end = start + length;
- GetSortKey (s, start, end);
+ GetSortKey (s, start, end, buf, options);
return buf.GetResultAndReset ();
}
- void GetSortKey (string s, int start, int end)
+ unsafe void GetSortKey (string s, int start, int end,
+ SortKeyBuffer buf, CompareOptions opt)
{
+ byte* prevbuf = stackalloc byte [4];
+ ClearBuffer (prevbuf, 4);
+ Context ctx = new Context (opt, null, null, null, null, prevbuf, false);
+
for (int n = start; n < end; n++) {
int i = s [n];
ExtenderType ext = GetExtenderType (i);
if (ext != ExtenderType.None) {
- i = FilterExtender (previousChar, ext);
+ i = FilterExtender (ctx.PrevCode, ext, opt);
if (i >= 0)
- FillSortKeyRaw (i, ext);
- else if (previousSortKey != null) {
- byte [] b = previousSortKey;
+ FillSortKeyRaw (i, ext, buf, opt);
+ else if (ctx.PrevSortKey != null) {
+ byte* b = ctx.PrevSortKey;
buf.AppendNormal (
b [0],
b [1],
continue;
}
- if (IsIgnorable (i))
+ if (IsIgnorable (i, opt))
continue;
- i = FilterOptions (i);
+ i = FilterOptions (i, opt);
Contraction ct = GetContraction (s, n, end);
if (ct != null) {
if (ct.Replacement != null) {
- GetSortKey (ct.Replacement, 0, ct.Replacement.Length);
+ GetSortKey (ct.Replacement, 0, ct.Replacement.Length, buf, opt);
} else {
- byte [] b = ct.SortKey;
+ byte* b = ctx.PrevSortKey;
+ for (int bi = 0; bi < ct.SortKey.Length; bi++)
+ b [bi] = ct.SortKey [bi];
buf.AppendNormal (
b [0],
b [1],
b [2] != 1 ? b [2] : Level2 (i, ext),
b [3] != 1 ? b [3] : Uni.Level3 (i));
- previousSortKey = b;
- previousChar = -1;
+ ctx.PrevCode = -1;
}
n += ct.Source.Length - 1;
}
else {
if (!Uni.IsIgnorableNonSpacing (i))
- previousChar = i;
- FillSortKeyRaw (i, ExtenderType.None);
+ ctx.PrevCode = i;
+ FillSortKeyRaw (i, ExtenderType.None, buf, opt);
}
}
}
- void FillSortKeyRaw (int i, ExtenderType ext)
+ void FillSortKeyRaw (int i, ExtenderType ext,
+ SortKeyBuffer buf, CompareOptions opt)
{
if (0x3400 <= i && i <= 0x4DB5) {
int diff = i - 0x3400;
0);
return;
case UnicodeCategory.Surrogate:
- FillSurrogateSortKeyRaw (i);
+ FillSurrogateSortKeyRaw (i, buf);
return;
}
level2,
Uni.Level3 (i),
Uni.IsJapaneseSmallLetter ((char) i),
- ToDashTypeValue (ext),
+ ToDashTypeValue (ext, opt),
!Uni.IsHiragana ((char) i),
- IsHalfKana ((char) i)
+ IsHalfKana ((char) i, opt)
);
- if (!ignoreNonSpace && ext == ExtenderType.Voiced)
+ if ((opt & COpt.IgnoreNonSpace) == 0 && ext == ExtenderType.Voiced)
// Append voice weight
buf.AppendNormal (1, 1, 1, 0);
}
Uni.Level3 (i));
}
- void FillSurrogateSortKeyRaw (int i)
+ void FillSurrogateSortKeyRaw (int i, SortKeyBuffer buf)
{
int diffbase = 0;
int segment = 0;
return Compare (s1, 0, s1.Length, s2, 0, s2.Length, options);
}
- class Escape
+ private int CompareOrdinal (string s1, int idx1, int len1,
+ string s2, int idx2, int len2)
{
- public string Source;
- public int Index;
- public int Start;
- public int End;
- public int Optional;
+ int min = len1 < len2 ? len1 : len2;
+ int end1 = idx1 + min;
+ int end2 = idx2 + min;
+ if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
+ throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
+ for (int i1 = idx1, i2 = idx2;
+ i1 < end1 && i2 < end2; i1++, i2++)
+ if (s1 [i1] != s2 [i2])
+ return s1 [i1] - s2 [i2];
+ return len1 == len2 ? 0 :
+ len1 == min ? - 1 : 1;
}
- // Those instances are reused not to invoke instantiation
- // during Compare().
- Escape escape1 = new Escape ();
- Escape escape2 = new Escape ();
+ // mostly equivalent to CompareOrdinal, but the return value is
+ // not based on codepoints.
+ private int CompareQuick (string s1, int idx1, int len1,
+ string s2, int idx2, int len2, out bool sourceConsumed,
+ out bool targetConsumed, bool immediateBreakup)
+ {
+ sourceConsumed = false;
+ targetConsumed = false;
+ int min = len1 < len2 ? len1 : len2;
+ int end1 = idx1 + min;
+ int end2 = idx2 + min;
+ if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
+ throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
+ for (int i1 = idx1, i2 = idx2;
+ i1 < end1 && i2 < end2; i1++, i2++)
+ if (s1 [i1] != s2 [i2]) {
+ if (immediateBreakup)
+ return -1;
+ int ret = Category (s1 [i1]) - Category (s2 [i2]);
+ if (ret == 0)
+ ret = Level1 (s1 [i1]) - Level1 (s2 [i2]);
+ // no level2 and 4
+ if (ret == 0)
+ ret = Uni.Level3 (s1 [i1]) - Uni.Level3 (s2 [i2]);
+ if (ret == 0)
+ throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. '{0}' {2} {3} '{1}' {4} {5}", s1, s2, idx1, end1, idx2, end2));
+ return ret;
+ }
+ sourceConsumed = len1 <= len2;
+ targetConsumed = len1 >= len2;
+ return len1 == len2 ? 0 :
+ len1 == min ? - 1 : 1;
+ }
- private int CompareOrdinal (string s1, int idx1, int len1,
+ private int CompareOrdinalIgnoreCase (string s1, int idx1, int len1,
string s2, int idx2, int len2)
{
int min = len1 < len2 ? len1 : len2;
int end1 = idx1 + min;
int end2 = idx2 + min;
+ if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
+ throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
+ TextInfo ti = invariant.textInfo;
for (int i1 = idx1, i2 = idx2;
i1 < end1 && i2 < end2; i1++, i2++)
- if (s1 [i1] != s2 [i2])
- return s1 [i1] - s2 [i2];
+ if (ti.ToLower (s1 [i1]) != ti.ToLower (s2 [i2]))
+ return ti.ToLower (s1 [i1]) - ti.ToLower (s2 [i2]);
return len1 == len2 ? 0 :
len1 == min ? - 1 : 1;
}
- public int Compare (string s1, int idx1, int len1,
+ public unsafe int Compare (string s1, int idx1, int len1,
string s2, int idx2, int len2, CompareOptions options)
{
// quick equality check
// return 0;
if (options == CompareOptions.Ordinal)
return CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
+#if NET_2_0
+ if (options == CompareOptions.OrdinalIgnoreCase)
+ return CompareOrdinalIgnoreCase (s1, idx1, len1, s2, idx2, len2);
+#endif
#if false // stable easy version, depends on GetSortKey().
SortKey sk1 = GetSortKey (s1, idx1, len1, options);
return d1 [i] < d2 [i] ? -1 : 1;
return d1.Length == d2.Length ? 0 : d1.Length < d2.Length ? -1 : 1;
#else
- SetOptions (options);
+ byte* sk1 = stackalloc byte [4];
+ byte* sk2 = stackalloc byte [4];
+ ClearBuffer (sk1, 4);
+ ClearBuffer (sk2, 4);
+ Context ctx = new Context (options, null, null, sk1, sk2, null,
+ QuickCheckPossible (s1, idx1, idx1 + len1, s2, idx2, idx2 + len2));
+
bool dummy, dummy2;
- int ret = CompareInternal (s1, idx1, len1, s2, idx2, len2, (options & CompareOptions.StringSort) != 0, out dummy, out dummy2, true);
+ int ret = CompareInternal (s1, idx1, len1, s2, idx2, len2, out dummy, out dummy2, true, false, ref ctx);
return ret == 0 ? 0 : ret < 0 ? -1 : 1;
#endif
}
- int CompareInternal (string s1, int idx1, int len1, string s2,
- int idx2, int len2, bool stringSort,
+ unsafe void ClearBuffer (byte* buffer, int size)
+ {
+ for (int i = 0; i < size; i++)
+ buffer [i] = 0;
+ }
+
+ bool QuickCheckPossible (string s1, int idx1, int end1,
+ string s2, int idx2, int end2)
+ {
+ if (QuickCheckDisabled)
+ return false;
+// if (s1.Length > 100 || s2.Length > 100)
+// return false;
+ for (int i = idx1; i < end1; i++)
+ if (s1 [i] < 0x20 && (s1 [i] < '\x9' || s1 [i] > '\xD') || s1 [i] >= 0x80 || s1 [i] == '-' || s1 [i] == '\'')
+ return false;
+ for (int i = idx2; i < end2; i++)
+ if (s2 [i] < 0x20 && (s2 [i] < '\x9' || s2 [i] > '\xD') || s2 [i] >= 0x80 || s2 [i] == '-' || s2 [i] == '\'')
+ return false;
+ return true;
+ }
+
+ unsafe int CompareInternal (string s1, int idx1, int len1, string s2,
+ int idx2, int len2,
out bool targetConsumed, out bool sourceConsumed,
- bool skipHeadingExtenders)
+ bool skipHeadingExtenders, bool immediateBreakup,
+ ref Context ctx)
{
+ COpt opt = ctx.Option;
int start1 = idx1;
int start2 = idx2;
int end1 = idx1 + len1;
int end2 = idx2 + len2;
targetConsumed = false;
sourceConsumed = false;
+ PreviousInfo prev2 = new PreviousInfo (false);
+
+ if (opt == CompareOptions.None && ctx.QuickCheckPossible)
+ return CompareQuick (s1, idx1, len1, s2, idx2, len2, out sourceConsumed, out targetConsumed, immediateBreakup);
// It holds final result that comes from the comparison
// at level 2 or lower. Even if Compare() found the
int quickCheckPos1 = idx1;
int quickCheckPos2 = idx2;
+ bool stringSort = (opt & COpt.StringSort) != 0;
+ bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
+ Escape escape1 = new Escape ();
+ Escape escape2 = new Escape ();
while (true) {
for (; idx1 < end1; idx1++)
- if (!IsIgnorable (s1 [idx1]))
+ if (!IsIgnorable (s1 [idx1], opt))
break;
for (; idx2 < end2; idx2++)
- if (!IsIgnorable (s2 [idx2]))
+ if (!IsIgnorable (s2 [idx2], opt))
break;
if (idx1 >= end1) {
int cur1 = idx1;
int cur2 = idx2;
- byte [] sk1 = null;
- byte [] sk2 = null;
- int i1 = FilterOptions (s1 [idx1]);
- int i2 = FilterOptions (s2 [idx2]);
+ byte* sk1 = null;
+ byte* sk2 = null;
+ int i1 = FilterOptions (s1 [idx1], opt);
+ int i2 = FilterOptions (s2 [idx2], opt);
bool special1 = false;
bool special2 = false;
// repeat the previous character.
ext1 = GetExtenderType (i1);
if (ext1 != ExtenderType.None) {
- if (previousChar < 0) {
- if (previousSortKey == null) {
+ if (ctx.PrevCode < 0) {
+ if (ctx.PrevSortKey == null) {
// nothing to extend
idx1++;
continue;
}
- sk1 = previousSortKey;
+ sk1 = ctx.PrevSortKey;
}
else
- i1 = FilterExtender (previousChar, ext1);
+ i1 = FilterExtender (ctx.PrevCode, ext1, opt);
}
ext2 = GetExtenderType (i2);
if (ext2 != ExtenderType.None) {
- if (previousChar2 < 0) {
- if (previousSortKey2 == null) {
+ if (prev2.Code < 0) {
+ if (prev2.SortKey == null) {
// nothing to extend
idx2++;
continue;
}
- sk2 = previousSortKey2;
+ sk2 = prev2.SortKey;
}
else
- i2 = FilterExtender (previousChar2, ext2);
+ i2 = FilterExtender (prev2.Code, ext2, opt);
}
byte cat1 = Category (i1);
// here Windows has a bug that it does
// not consider thirtiary weight.
lv5Value1 = Level1 (i1) << 8 + Uni.Level3 (i1);
- previousChar = i1;
+ ctx.PrevCode = i1;
idx1++;
}
if (cat2 == 6) {
// here Windows has a bug that it does
// not consider thirtiary weight.
lv5Value2 = Level1 (i2) << 8 + Uni.Level3 (i2);
- previousChar2 = i2;
+ prev2.Code = i2;
idx2++;
}
if (cat1 == 6 || cat2 == 6) {
else if (ct1 != null) {
offset1 = ct1.Source.Length;
if (ct1.SortKey != null) {
- sk1 = charSortKey;
+ sk1 = ctx.Buffer1;
for (int i = 0; i < ct1.SortKey.Length; i++)
sk1 [i] = ct1.SortKey [i];
- previousChar = -1;
- previousSortKey = sk1;
+ ctx.PrevCode = -1;
+ ctx.PrevSortKey = sk1;
}
else if (escape1.Source == null) {
escape1.Source = s1;
}
}
else {
- sk1 = charSortKey;
+ sk1 = ctx.Buffer1;
sk1 [0] = cat1;
sk1 [1] = Level1 (i1);
if (!ignoreNonSpace && currentLevel > 1)
if (currentLevel > 3)
special1 = Uni.HasSpecialWeight ((char) i1);
if (cat1 > 1)
- previousChar = i1;
+ ctx.PrevCode = i1;
}
Contraction ct2 = null;
else if (ct2 != null) {
idx2 += ct2.Source.Length;
if (ct2.SortKey != null) {
- sk2 = charSortKey2;
+ sk2 = ctx.Buffer2;
for (int i = 0; i < ct2.SortKey.Length; i++)
sk2 [i] = ct2.SortKey [i];
- previousChar2 = -1;
- previousSortKey2 = sk2;
+ prev2.Code = -1;
+ prev2.SortKey = sk2;
}
else if (escape2.Source == null) {
escape2.Source = s2;
}
}
else {
- sk2 = charSortKey2;
+ sk2 = ctx.Buffer2;
sk2 [0] = cat2;
sk2 [1] = Level1 (i2);
if (!ignoreNonSpace && currentLevel > 1)
if (currentLevel > 3)
special2 = Uni.HasSpecialWeight ((char) i2);
if (cat2 > 1)
- previousChar2 = i2;
+ prev2.Code = i2;
idx2++;
}
ret = sk1 [2] - sk2 [2];
if (ret != 0) {
finalResult = ret;
+ if (immediateBreakup)
+ return -1; // different
currentLevel = frenchSort ? 2 : 1;
continue;
}
ret = sk1 [3] - sk2 [3];
if (ret != 0) {
finalResult = ret;
+ if (immediateBreakup)
+ return -1; // different
currentLevel = 2;
continue;
}
if (currentLevel == 3)
continue;
if (special1 != special2) {
+ if (immediateBreakup)
+ return -1; // different
finalResult = special1 ? 1 : -1;
currentLevel = 3;
continue;
!Uni.IsJapaneseSmallLetter ((char) i1),
!Uni.IsJapaneseSmallLetter ((char) i2));
ret = ret != 0 ? ret :
- ToDashTypeValue (ext1) -
- ToDashTypeValue (ext2);
+ ToDashTypeValue (ext1, opt) -
+ ToDashTypeValue (ext2, opt);
ret = ret != 0 ? ret : CompareFlagPair (
Uni.IsHiragana ((char) i1),
Uni.IsHiragana ((char) i2));
ret = ret != 0 ? ret : CompareFlagPair (
- !IsHalfKana ((char) i1),
- !IsHalfKana ((char) i2));
+ !IsHalfKana ((char) i1, opt),
+ !IsHalfKana ((char) i2, opt));
if (ret != 0) {
+ if (immediateBreakup)
+ return -1; // different
finalResult = ret;
currentLevel = 3;
continue;
break;
if (!Uni.IsIgnorableNonSpacing (s2 [idx2]))
break;
- finalResult = Level2 (FilterOptions ((s1 [idx1])), ext1) - Level2 (FilterOptions (s2 [idx2]), ext2);
+ finalResult = Level2 (FilterOptions (s1 [idx1], opt), ext1) - Level2 (FilterOptions (s2 [idx2], opt), ext2);
if (finalResult != 0)
break;
idx1++;
}
}
if (currentLevel == 1 && finalResult != 0) {
- while (idx1 < end1)
+ while (idx1 < end1) {
if (Uni.IsIgnorableNonSpacing (s1 [idx1]))
idx1++;
- while (idx2 < end2)
+ else
+ break;
+ }
+ while (idx2 < end2) {
if (Uni.IsIgnorableNonSpacing (s2 [idx2]))
idx2++;
+ else
+ break;
+ }
}
// we still have to handle level 5
if (finalResult == 0) {
return IsPrefix (src, target, 0, src.Length, opt);
}
- public bool IsPrefix (string s, string target, int start, int length, CompareOptions opt)
+ public unsafe bool IsPrefix (string s, string target, int start, int length, CompareOptions opt)
{
- SetOptions (opt);
- return IsPrefix (s, target, start, length,
- (opt & CompareOptions.StringSort) != 0, true);
+ if (target.Length == 0)
+ return true;
+ byte* sk1 = stackalloc byte [4];
+ byte* sk2 = stackalloc byte [4];
+ ClearBuffer (sk1, 4);
+ ClearBuffer (sk2, 4);
+ Context ctx = new Context (opt, null, null, sk1, sk2, null,
+ QuickCheckPossible (s, start, start + length, target, 0, target.Length));
+ return IsPrefix (s, target, start, length, true, ref ctx);
}
- public bool IsPrefix (string s, string target, int start, int length, bool stringSort, bool skipHeadingExtenders)
+ unsafe bool IsPrefix (string s, string target, int start, int length, bool skipHeadingExtenders, ref Context ctx)
{
bool consumed, dummy;
- int ret = CompareInternal (s, start, length,
- target, 0, target.Length, stringSort,
- out consumed, out dummy, skipHeadingExtenders);
+ CompareInternal (s, start, length,
+ target, 0, target.Length,
+ out consumed, out dummy, skipHeadingExtenders,
+ true, ref ctx);
return consumed;
}
return IsSuffix (src, target, src.Length - 1, src.Length, opt);
}
- public bool IsSuffix (string s, string target, int start, int length, CompareOptions opt)
+ public unsafe bool IsSuffix (string s, string target, int start, int length, CompareOptions opt)
{
+ if (target.Length == 0)
+ return true;
+ int last = LastIndexOf (s, target, start, length, opt);
+ return last >= 0 && Compare (s, last, s.Length - last, target, 0, target.Length, opt) == 0;
+/*
// quick check : simple codepoint comparison
if (s.Length >= target.Length) {
int si = start;
return true;
}
- SetOptions (opt);
- return IsSuffix (s, target, start, length,
- (opt & CompareOptions.StringSort) != 0);
+ PreviousInfo prev = new PreviousInfo (false);
+ byte* sk1 = stackalloc byte [4];
+ byte* sk2 = stackalloc byte [4];
+ ClearBuffer (sk1, 4);
+ ClearBuffer (sk2, 4);
+ return IsSuffix (opt, s, target, start, length, ref prev, sk1, sk2);
+*/
}
- bool IsSuffix (string s, string t, int start, int length,
- bool stringSort)
+ unsafe bool IsSuffix (string s, string t, int start, int length, ref Context ctx)
{
int tstart = 0;
+ COpt opt = ctx.Option;
+
for (;tstart < t.Length; tstart++)
- if (!IsIgnorable (t [tstart]))
+ if (!IsIgnorable (t [tstart], opt))
break;
if (tstart == t.Length)
return true; // as if target is String.Empty.
if (IsSafe (t [ti]))
break;
Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, length, t, ti, tstart, sStep - si, tStep - ti);
- if (CompareInternal (s, si, sStep - si,
- t, ti, tStep - ti, stringSort,
+ if (CompareInternal (opt, s, si, sStep - si,
+ t, ti, tStep - ti,
out targetConsumed, out sourceConsumed,
true) != 0)
return false;
bool sourceConsumed, targetConsumed;
int mismatchCount = 0;
for (int i = 0; i < length; i++) {
- escape1.Source = escape2.Source = null;
- previousSortKey = previousSortKey2 = null;
- previousChar = previousChar2 = -1;
+ ctx.ClearPrevInfo ();
int ret = CompareInternal (s, start - i, i + 1,
t, tstart, t.Length - tstart,
- stringSort, out targetConsumed,
- out sourceConsumed, true);
+ out targetConsumed,
+ // FIXME: could immediately breakup
+ out sourceConsumed, true, true, ref ctx);
if (ret == 0)
return true;
if (!sourceConsumed && targetConsumed)
return IndexOf (s, target, 0, s.Length, opt);
}
- public int IndexOf (string s, string target, int start, int length, CompareOptions opt)
+ public unsafe int IndexOf (string s, string target, int start, int length, CompareOptions opt)
{
- SetOptions (opt);
+ byte* alwaysMatchFlags = stackalloc byte [16];
+ byte* neverMatchFlags = stackalloc byte [16];
+ byte* targetSortKey = stackalloc byte [4];
+ byte* sk1 = stackalloc byte [4];
+ byte* sk2 = stackalloc byte [4];
+ ClearBuffer (alwaysMatchFlags, 16);
+ ClearBuffer (neverMatchFlags, 16);
+ ClearBuffer (targetSortKey, 4);
+ ClearBuffer (sk1, 4);
+ ClearBuffer (sk2, 4);
+ Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null,
+ QuickCheckPossible (s, start, start + length, target, 0, target.Length));
+
return IndexOf (s, target, start, length,
- (opt & CompareOptions.StringSort) != 0);
+ targetSortKey, ref ctx);
}
public int IndexOf (string s, char target, CompareOptions opt)
return IndexOf (s, target, 0, s.Length, opt);
}
- public int IndexOf (string s, char target, int start, int length, CompareOptions opt)
+ public unsafe int IndexOf (string s, char target, int start, int length, CompareOptions opt)
{
- SetOptions (opt);
+ byte* alwaysMatchFlags = stackalloc byte [16];
+ byte* neverMatchFlags = stackalloc byte [16];
+ byte* targetSortKey = stackalloc byte [4];
+ byte* sk1 = stackalloc byte [4];
+ byte* sk2 = stackalloc byte [4];
+ ClearBuffer (alwaysMatchFlags, 16);
+ ClearBuffer (neverMatchFlags, 16);
+ ClearBuffer (targetSortKey, 4);
+ ClearBuffer (sk1, 4);
+ ClearBuffer (sk2, 4);
+ Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
// If target is contraction, then use string search.
Contraction ct = GetContraction (target);
if (ct != null) {
if (ct.Replacement != null)
- return IndexOf (s, ct.Replacement, start, length,
- (opt & CompareOptions.StringSort) != 0);
- else
- return IndexOfSortKey (s, start, length, ct.SortKey, char.MinValue, -1, true);
+ return IndexOf (s, ct.Replacement,
+ start, length, targetSortKey, ref ctx);
+ else {
+ for (int i = 0; i < ct.SortKey.Length; i++)
+ sk2 [i] = ct.SortKey [i];
+ return IndexOfSortKey (s, start, length, sk2, char.MinValue, -1, true, ref ctx);
+ }
} else {
- int ti = FilterOptions ((int) target);
- charSortKeyIndexTarget [0] = Category (ti);
- charSortKeyIndexTarget [1] = Level1 (ti);
- if (!ignoreNonSpace)
- charSortKeyIndexTarget [2] =
+ int ti = FilterOptions ((int) target, opt);
+ targetSortKey [0] = Category (ti);
+ targetSortKey [1] = Level1 (ti);
+ if ((opt & COpt.IgnoreNonSpace) == 0)
+ targetSortKey [2] =
Level2 (ti, ExtenderType.None);
- charSortKeyIndexTarget [3] = Uni.Level3 (ti);
+ targetSortKey [3] = Uni.Level3 (ti);
return IndexOfSortKey (s, start, length,
- charSortKeyIndexTarget, target, ti,
- !Uni.HasSpecialWeight ((char) ti));
+ targetSortKey, target, ti,
+ !Uni.HasSpecialWeight ((char) ti), ref ctx);
}
}
// Searches target byte[] keydata
- int IndexOfSortKey (string s, int start, int length, byte [] sortkey, char target, int ti, bool noLv4)
+ unsafe int IndexOfSortKey (string s, int start, int length, byte* sortkey, char target, int ti, bool noLv4, ref Context ctx)
{
int end = start + length;
int idx = start;
+
while (idx < end) {
int cur = idx;
- if (MatchesForward (s, ref idx, end, ti, sortkey, noLv4))
+ if (MatchesForward (s, ref idx, end, ti, sortkey, noLv4, ref ctx))
return cur;
}
return -1;
// Searches string. Search head character (or keydata when
// the head is contraction sortkey) and try IsPrefix().
- int IndexOf (string s, string target, int start, int length, bool stringSort)
+ unsafe int IndexOf (string s, string target, int start, int length, byte* targetSortKey, ref Context ctx)
{
+ COpt opt = ctx.Option;
int tidx = 0;
for (; tidx < target.Length; tidx++)
- if (!IsIgnorable (target [tidx]))
+ if (!IsIgnorable (target [tidx], opt))
break;
if (tidx == target.Length)
return start;
Contraction ct = GetContraction (target, tidx, target.Length - tidx);
string replace = ct != null ? ct.Replacement : null;
- byte [] sk = replace == null ? charSortKeyIndexTarget : null;
+ byte* sk = replace == null ? targetSortKey : null;
bool noLv4 = true;
char tc = char.MinValue;
int ti = -1;
sk [i] = ct.SortKey [i];
} else if (sk != null) {
tc = target [tidx];
- ti = FilterOptions (target [tidx]);
+ ti = FilterOptions (target [tidx], opt);
sk [0] = Category (ti);
sk [1] = Level1 (ti);
- if (!ignoreNonSpace)
+ if ((opt & COpt.IgnoreNonSpace) == 0)
sk [2] = Level2 (ti, ExtenderType.None);
sk [3] = Uni.Level3 (ti);
noLv4 = !Uni.HasSpecialWeight ((char) ti);
do {
int idx = 0;
if (replace != null)
- idx = IndexOf (s, replace, start, length, stringSort);
+ idx = IndexOf (s, replace, start, length, targetSortKey, ref ctx);
else
- idx = IndexOfSortKey (s, start, length, sk, tc, ti, noLv4);
+ idx = IndexOfSortKey (s, start, length, sk, tc, ti, noLv4, ref ctx);
if (idx < 0)
return -1;
length -= idx - start;
start = idx;
- if (IsPrefix (s, target, start, length, stringSort, false))
+ if (IsPrefix (s, target, start, length, false, ref ctx))
return idx;
Contraction cts = GetContraction (s, start, length);
if (cts != null) {
return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
}
- public int LastIndexOf (string s, string target, int start, int length, CompareOptions opt)
+ public unsafe int LastIndexOf (string s, string target, int start, int length, CompareOptions opt)
{
- SetOptions (opt);
+ byte* alwaysMatchFlags = stackalloc byte [16];
+ byte* neverMatchFlags = stackalloc byte [16];
+ byte* targetSortKey = stackalloc byte [4];
+ byte* sk1 = stackalloc byte [4];
+ byte* sk2 = stackalloc byte [4];
+ ClearBuffer (alwaysMatchFlags, 16);
+ ClearBuffer (neverMatchFlags, 16);
+ ClearBuffer (targetSortKey, 4);
+ ClearBuffer (sk1, 4);
+ ClearBuffer (sk2, 4);
+ // For some unknown reason CompareQuick() does not work fine w/ LastIndexOf().
+ Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
return LastIndexOf (s, target, start, length,
- (opt & CompareOptions.StringSort) != 0);
+ targetSortKey, ref ctx);
}
public int LastIndexOf (string s, char target, CompareOptions opt)
return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
}
- public int LastIndexOf (string s, char target, int start, int length, CompareOptions opt)
+ public unsafe int LastIndexOf (string s, char target, int start, int length, CompareOptions opt)
{
- SetOptions (opt);
-
- // If target is contraction, then use string search.
+ byte* alwaysMatchFlags = stackalloc byte [16];
+ byte* neverMatchFlags = stackalloc byte [16];
+ byte* targetSortKey = stackalloc byte [4];
+ byte* sk1 = stackalloc byte [4];
+ byte* sk2 = stackalloc byte [4];
+ ClearBuffer (alwaysMatchFlags, 16);
+ ClearBuffer (neverMatchFlags, 16);
+ ClearBuffer (targetSortKey, 4);
+ ClearBuffer (sk1, 4);
+ ClearBuffer (sk2, 4);
+ Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
+
+ // If target is a replacement contraction, then use
+ // string search.
Contraction ct = GetContraction (target);
if (ct != null) {
if (ct.Replacement != null)
- return LastIndexOf (s, ct.Replacement, start, length, (opt & CompareOptions.StringSort) != 0);
- else
- return LastIndexOfSortKey (s, start, start, length, ct.SortKey, char.MinValue, -1, true);
+ return LastIndexOf (s,
+ ct.Replacement, start, length,
+ targetSortKey, ref ctx);
+ else {
+ for (int bi = 0; bi < ct.SortKey.Length; bi++)
+ sk2 [bi] = ct.SortKey [bi];
+ return LastIndexOfSortKey (s, start,
+ start, length, sk2,
+ -1, true, ref ctx);
+ }
}
else {
- int ti = FilterOptions ((int) target);
- charSortKeyIndexTarget [0] = Category (ti);
- charSortKeyIndexTarget [1] = Level1 (ti);
- if (!ignoreNonSpace)
- charSortKeyIndexTarget [2] = Level2 (ti, ExtenderType.None);
- charSortKeyIndexTarget [3] = Uni.Level3 (ti);
- return LastIndexOfSortKey (s, start, start, length, charSortKeyIndexTarget, target, ti, !Uni.HasSpecialWeight ((char) ti));
+ int ti = FilterOptions ((int) target, opt);
+ targetSortKey [0] = Category (ti);
+ targetSortKey [1] = Level1 (ti);
+ if ((opt & COpt.IgnoreNonSpace) == 0)
+ targetSortKey [2] = Level2 (ti, ExtenderType.None);
+ targetSortKey [3] = Uni.Level3 (ti);
+ return LastIndexOfSortKey (s, start, start,
+ length, targetSortKey,
+ ti, !Uni.HasSpecialWeight ((char) ti),
+ ref ctx);
}
}
// Searches target byte[] keydata
- int LastIndexOfSortKey (string s, int start, int orgStart, int length, byte [] sortkey, char target, int ti, bool noLv4)
+ unsafe int LastIndexOfSortKey (string s, int start, int orgStart, int length, byte* sortkey, int ti, bool noLv4, ref Context ctx)
{
int end = start - length;
int idx = start;
while (idx > end) {
int cur = idx;
- if (MatchesBackward (s, ref idx, end, orgStart, ti, sortkey, noLv4))
+ if (MatchesBackward (s, ref idx, end, orgStart,
+ ti, sortkey, noLv4, ref ctx))
return cur;
}
return -1;
// Searches string. Search head character (or keydata when
// the head is contraction sortkey) and try IsPrefix().
- int LastIndexOf (string s, string target, int start, int length, bool stringSort)
+ unsafe int LastIndexOf (string s, string target, int start, int length, byte* targetSortKey, ref Context ctx)
{
+ COpt opt = ctx.Option;
int orgStart = start;
int tidx = 0;
for (; tidx < target.Length; tidx++)
- if (!IsIgnorable (target [tidx]))
+ if (!IsIgnorable (target [tidx], opt))
break;
if (tidx == target.Length)
return start;
Contraction ct = GetContraction (target, tidx, target.Length - tidx);
string replace = ct != null ? ct.Replacement : null;
- byte [] sk = replace == null ? charSortKeyIndexTarget : null;
+ byte* sk = replace == null ? targetSortKey : null;
bool noLv4 = true;
- char tc = char.MinValue;
int ti = -1;
if (ct != null && sk != null) {
for (int i = 0; i < ct.SortKey.Length; i++)
sk [i] = ct.SortKey [i];
} else if (sk != null) {
- tc = target [tidx];
- ti = FilterOptions (target [tidx]);
+ ti = FilterOptions (target [tidx], opt);
sk [0] = Category (ti);
sk [1] = Level1 (ti);
- if (!ignoreNonSpace)
+ if ((opt & COpt.IgnoreNonSpace) == 0)
sk [2] = Level2 (ti, ExtenderType.None);
sk [3] = Uni.Level3 (ti);
noLv4 = !Uni.HasSpecialWeight ((char) ti);
int idx = 0;
if (replace != null)
- idx = LastIndexOf (s, replace, start, length, stringSort);
+ idx = LastIndexOf (s, replace,
+ start, length,
+ targetSortKey, ref ctx);
else
- idx = LastIndexOfSortKey (s, start, orgStart, length, sk, tc, ti, noLv4);
+ idx = LastIndexOfSortKey (s, start, orgStart, length, sk, ti, noLv4, ref ctx);
if (idx < 0)
return -1;
length -= start - idx;
start = idx;
- if (IsPrefix (s, target, idx, orgStart - idx + 1, stringSort, false)) {
+ if (IsPrefix (s, target, idx, orgStart - idx + 1, false, ref ctx)) {
for (;idx < orgStart; idx++)
- if (!IsIgnorable (s [idx]))
+ if (!IsIgnorable (s [idx], opt))
break;
return idx;
}
return -1;
}
- private bool MatchesForward (string s, ref int idx, int end, int ti, byte [] sortkey, bool noLv4)
+ unsafe bool MatchesForward (string s, ref int idx, int end, int ti, byte* sortkey, bool noLv4, ref Context ctx)
{
- int si = -1;
+ COpt opt = ctx.Option;
+ int si = s [idx];
+ if (ctx.AlwaysMatchFlags != null && si < 128 && (ctx.AlwaysMatchFlags [si / 8] & (1 << (si % 8))) != 0)
+ return true;
+ if (ctx.NeverMatchFlags != null &&
+ si < 128 &&
+ (ctx.NeverMatchFlags [si / 8] & (1 << (si % 8))) != 0) {
+ idx++;
+ return false;
+ }
ExtenderType ext = GetExtenderType (s [idx]);
Contraction ct = null;
+ if (MatchesForwardCore (s, ref idx, end, ti, sortkey, noLv4, ext, ref ct, ref ctx)) {
+ if (ctx.AlwaysMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
+ ctx.AlwaysMatchFlags [si / 8] |= (byte) (1 << (si % 8));
+ return true;
+ }
+ if (ctx.NeverMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
+ ctx.NeverMatchFlags [si / 8] |= (byte) (1 << (si % 8));
+ return false;
+ }
+
+ unsafe bool MatchesForwardCore (string s, ref int idx, int end, int ti, byte* sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, ref Context ctx)
+ {
+ COpt opt = ctx.Option;
+ byte* charSortKey = ctx.Buffer1;
+ bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
+ int si = -1;
if (ext == ExtenderType.None)
ct = GetContraction (s, idx, end);
- else if (previousChar < 0) {
- if (previousSortKey == null) {
+ else if (ctx.PrevCode < 0) {
+ if (ctx.PrevSortKey == null) {
idx++;
return false;
}
- charSortKey = previousSortKey;
+ charSortKey = ctx.PrevSortKey;
}
else
- si = FilterExtender (previousChar, ext);
+ si = FilterExtender (ctx.PrevCode, ext, opt);
// if lv4 exists, it never matches contraction
if (ct != null) {
idx += ct.Source.Length;
if (!noLv4)
return false;
if (ct.SortKey != null) {
- for (int i = 0; i < sortkey.Length; i++)
+ for (int i = 0; i < 4; i++)
charSortKey [i] = sortkey [i];
- previousChar = -1;
- previousSortKey = charSortKey;
+ ctx.PrevCode = -1;
+ ctx.PrevSortKey = charSortKey;
} else {
// Here is the core of LAMESPEC
// described at the top of the source.
int dummy = 0;
return MatchesForward (ct.Replacement, ref dummy,
- ct.Replacement.Length, ti, sortkey, noLv4);
+ ct.Replacement.Length, ti, sortkey, noLv4, ref ctx);
}
} else {
if (si < 0)
- si = FilterOptions (s [idx]);
+ si = FilterOptions (s [idx], opt);
+ idx++;
charSortKey [0] = Category (si);
- charSortKey [1] = Level1 (si);
- if (!ignoreNonSpace)
+ bool noMatch = false;
+ if (sortkey [0] == charSortKey [0])
+ charSortKey [1] = Level1 (si);
+ else
+ noMatch = true;
+ if (!ignoreNonSpace && sortkey [1] == charSortKey [1])
charSortKey [2] = Level2 (si, ext);
+ else if (!ignoreNonSpace)
+ noMatch = true;
+ if (noMatch) {
+ for (; idx < end; idx++) {
+ if (Category (s [idx]) != 1)
+ break;
+ }
+ return false;
+ }
charSortKey [3] = Uni.Level3 (si);
if (charSortKey [0] != 1)
- previousChar = si;
- idx++;
+ ctx.PrevCode = si;
}
for (; idx < end; idx++) {
if (Category (s [idx]) != 1)
+ Level2 (s [idx], ExtenderType.None));
}
- return MatchesPrimitive (charSortKey, si, ext, sortkey, ti, noLv4);
+ return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4);
}
- private bool MatchesPrimitive (byte [] charSortKey, int si, ExtenderType ext, byte [] sortkey, int ti, bool noLv4)
+ unsafe bool MatchesPrimitive (COpt opt, byte* source, int si, ExtenderType ext, byte* target, int ti, bool noLv4)
{
- if (charSortKey [0] != sortkey [0] ||
- charSortKey [1] != sortkey [1] ||
- (!ignoreNonSpace && charSortKey [2] != sortkey [2]) ||
- charSortKey [3] != sortkey [3])
+ bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
+ if (source [0] != target [0] ||
+ source [1] != target [1] ||
+ (!ignoreNonSpace && source [2] != target [2]) ||
+ source [3] != target [3])
return false;
if (noLv4 && (si < 0 || !Uni.HasSpecialWeight ((char) si)))
return true;
return false;
if (Uni.IsJapaneseSmallLetter ((char) si) !=
Uni.IsJapaneseSmallLetter ((char) ti) ||
- ToDashTypeValue (ext) !=
+ ToDashTypeValue (ext, opt) !=
// FIXME: we will have to specify correct value for target
- ToDashTypeValue (ExtenderType.None) ||
+ ToDashTypeValue (ExtenderType.None, opt) ||
!Uni.IsHiragana ((char) si) !=
!Uni.IsHiragana ((char) ti) ||
- IsHalfKana ((char) si) !=
- IsHalfKana ((char) ti))
+ IsHalfKana ((char) si, opt) !=
+ IsHalfKana ((char) ti, opt))
return false;
return true;
}
- private bool MatchesBackward (string s, ref int idx, int end, int orgStart, int ti, byte [] sortkey, bool noLv4)
+ unsafe bool MatchesBackward (string s, ref int idx, int end, int orgStart, int ti, byte* sortkey, bool noLv4, ref Context ctx)
+ {
+ int si = s [idx];
+ if (ctx.AlwaysMatchFlags != null && si < 128 && (ctx.AlwaysMatchFlags [si / 8] & (1 << (si % 8))) != 0)
+ return true;
+ if (ctx.NeverMatchFlags != null && si < 128 && (ctx.NeverMatchFlags [si / 8] & (1 << (si % 8))) != 0) {
+ idx--;
+ return false;
+ }
+ ExtenderType ext = GetExtenderType (s [idx]);
+ Contraction ct = null;
+ if (MatchesBackwardCore (s, ref idx, end, orgStart, ti, sortkey, noLv4, ext, ref ct, ref ctx)) {
+ if (ctx.AlwaysMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
+ ctx.AlwaysMatchFlags [si / 8] |= (byte) (1 << (si % 8));
+ return true;
+ }
+ if (ctx.NeverMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128) {
+ ctx.NeverMatchFlags [si / 8] |= (byte) (1 << (si % 8));
+ }
+ return false;
+ }
+
+ unsafe bool MatchesBackwardCore (string s, ref int idx, int end, int orgStart, int ti, byte* sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, ref Context ctx)
{
+ COpt opt = ctx.Option;
+ byte* charSortKey = ctx.Buffer1;
+ bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
int cur = idx;
int si = -1;
- ExtenderType ext = GetExtenderType (s [idx]);
// To handle extenders in source, we need to
// check next _primary_ character.
if (ext != ExtenderType.None) {
for (int tmp = 0; ; tmp--) {
if (tmp < 0) // heading extender
return false;
- if (IsIgnorable (s [tmp]))
+ if (IsIgnorable (s [tmp], opt))
continue;
- int tmpi = FilterOptions (s [tmp]);
+ int tmpi = FilterOptions (s [tmp], opt);
byte category = Category (tmpi);
if (category == 1) {
diacritical = Level2 (tmpi, ExtenderType.None);
continue;
}
- si = FilterExtender (tmpi, ext);
+ si = FilterExtender (tmpi, ext, opt);
charSortKey [0] = category;
charSortKey [1] = Level1 (si);
if (!ignoreNonSpace)
}
idx--;
}
- Contraction ct = null;
if (ext == ExtenderType.None)
- ct = GetContraction (s, idx, end);
+ ct = GetTailContraction (s, idx, end);
// if lv4 exists, it never matches contraction
if (ct != null) {
idx -= ct.Source.Length;
if (!noLv4)
return false;
if (ct.SortKey != null) {
- for (int i = 0; i < sortkey.Length; i++)
+ for (int i = 0; i < 4; i++)
charSortKey [i] = sortkey [i];
- previousChar = -1;
- previousSortKey = charSortKey;
+ ctx.PrevCode = -1;
+ ctx.PrevSortKey = charSortKey;
} else {
// Here is the core of LAMESPEC
// described at the top of the source.
int dummy = ct.Replacement.Length - 1;
- return MatchesBackward (ct.Replacement,
- ref dummy, dummy, -1, ti, sortkey, noLv4);
+ return 0 <= LastIndexOfSortKey (
+ ct.Replacement, dummy, dummy,
+ ct.Replacement.Length, sortkey,
+ ti, noLv4, ref ctx);
}
} else if (ext == ExtenderType.None) {
if (si < 0)
- si = FilterOptions (s [idx]);
+ si = FilterOptions (s [idx], opt);
+ idx--;
+ bool noMatch = false;
charSortKey [0] = Category (si);
- charSortKey [1] = Level1 (si);
- if (!ignoreNonSpace)
+ if (charSortKey [0] == sortkey [0])
+ charSortKey [1] = Level1 (si);
+ else
+ noMatch = true;
+ if (!ignoreNonSpace && charSortKey [1] == sortkey [1])
charSortKey [2] = Level2 (si, ext);
+ else if (!ignoreNonSpace)
+ noMatch = true;
+ if (noMatch)
+ return false;
charSortKey [3] = Uni.Level3 (si);
if (charSortKey [0] != 1)
- previousChar = si;
- idx--;
+ ctx.PrevCode = si;
}
if (ext == ExtenderType.None) {
for (int tmp = cur + 1; tmp < orgStart; tmp++) {
+ Level2 (s [tmp], ExtenderType.None));
}
}
- return MatchesPrimitive (charSortKey, si, ext, sortkey, ti, noLv4);
+ return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4);
}
#endregion
}