//
// SimpleCollator.cs
//
// This class will demonstrate CompareInfo functionality that will just work.
//

//
// Here's a summary for supporting contractions, expansions and diacritical 
// remappings.
//
// Diacritical mapping is a simple tailoring rule that "remaps" diacritical
// weight value from one to another. For now it applies to all range of
// characters, but at some stage we might need to limit the remapping ranges.
//
// A Contraction consists of a string (actually char[]) and a sortkey entry
// (i.e. byte[]). It indicates that a sequence of characters is interpreted
// as a single character that has the mapped sortkey value. There is no
// character which goes across "special rules". When the collator encountered
// such a sequence of characters, it just returns the sortkey value without
// further replacement.
//
// Since it is still likely to happen that a contraction sequence matches
// other character than the identical sequence (depending on CompareOptions
// and of course, defined sortkey value itself), comparison cannot be done
// at source char[] level.
//
// (to be continued.)
//

//
// In IndexOf(), the first character in the target string or the target char
// itself is turned into sortkey bytes. If the character has a contraction and
// that is sortkey map, then it is used instead. If the contraction exists and
// that is replacement map, then the first character of the replacement string
// is searched instead. IndexOf() always searches only for the top character,
// and if it returned negative value, then it returns -1. Otherwise, it then
// tries IsPrefix() from that location. If it returns true, then it returns
// the index.
//

// LAMESPEC: IndexOf() is lame as a whole API. It never matches in the middle
// of expansion and there is no proper way to return such indexes within
// a single int return value.
//
// For example, try below in .NET:
//	IndexOf("\u00E6", "a")
//	IndexOf("\u00E6", "e")
//


using System;
using System.Collections;
using System.Globalization;

using Uni = Mono.Globalization.Unicode.MSCompatUnicodeTable;
using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;

namespace Mono.Globalization.Unicode
{
	internal class SimpleCollator
	{
		static SimpleCollator invariant =
			new SimpleCollator (CultureInfo.InvariantCulture);

		internal static readonly byte [] ignorableFlags =
			Uni.ignorableFlags;
		internal static readonly byte [] categories =
			Uni.categories;
		internal static readonly byte [] level1 =
			Uni.level1;
		internal static readonly byte [] level2 =
			Uni.level2;
		internal static readonly byte [] level3 =
			Uni.level3;
		internal static readonly ushort [] widthCompat =
			Uni.widthCompat;
		internal static readonly CodePointIndexer categoryIndexer =
			UUtil.Category;
		internal static readonly CodePointIndexer lv1Indexer =
			UUtil.Level1;
		internal static readonly CodePointIndexer lv2Indexer =
			UUtil.Level2;
		internal static readonly CodePointIndexer lv3Indexer =
			UUtil.Level3;
		internal static readonly CodePointIndexer widthIndexer =
			UUtil.WidthCompat;


		SortKeyBuffer buf;
		// CompareOptions expanded.
		bool ignoreNonSpace; // used in IndexOf()
		bool ignoreSymbols;
		bool ignoreWidth;
		bool ignoreCase;
		bool ignoreKanaType;
		TextInfo textInfo; // for ToLower().
		bool frenchSort;
		readonly ushort [] cjkTable;
		readonly CodePointIndexer cjkIndexer;
		readonly byte [] cjkLv2Table;
		readonly CodePointIndexer cjkLv2Indexer;
		readonly int lcid;
		readonly Contraction [] contractions;
		readonly Level2Map [] level2Maps;

		// temporary sortkey buffer for index search/comparison
		byte [] charSortKey = new byte [4];
		byte [] charSortKey2 = new byte [4];
		// temporary expansion store for IsPrefix/Suffix
		int escapedSourceIndex;

		#region Tailoring support classes
		// Possible mapping types are:
		//
		//	- string to string (ReplacementMap)
		//	- string to SortKey (SortKeyMap)
		//	- diacritical byte to byte (DiacriticalMap)
		//
		// There could be mapping from string to sortkeys, but
		// for now there is none as such.
		//
		internal class Contraction
		{
			public readonly char [] Source;
			// only either of them is used.
			public readonly string Replacement;
			public readonly byte [] SortKey;

			public Contraction (char [] source,
				string replacement, byte [] sortkey)
			{
				Source = source;
				Replacement = replacement;
				SortKey = sortkey;
			}
		}

		internal class ContractionComparer : IComparer
		{
			public static readonly ContractionComparer Instance =
				new ContractionComparer ();

			public int Compare (object o1, object o2)
			{
				Contraction c1 = (Contraction) o1;
				Contraction c2 = (Contraction) o2;
				char [] a1 = c1.Source;
				char [] a2 = c2.Source;
				int min = a1.Length > a2.Length ?
					a2.Length : a1.Length;
				for (int i = 0; i < min; i++)
					if (a1 [i] != a2 [i])
						return a1 [i] - a2 [i];
				return a1.Length - a2.Length;
			}
		}

		internal class Level2Map
		{
			public byte Source;
			public byte Replace;

			public Level2Map (byte source, byte replace)
			{
				Source = source;
				Replace = replace;
			}
		}

		internal class Level2MapComparer : IComparer
		{
			public static readonly Level2MapComparer Instance =
				new Level2MapComparer ();

			public int Compare (object o1, object o2)
			{
				Level2Map m1 = (Level2Map) o1;
				Level2Map m2 = (Level2Map) o2;
				return (m1.Source - m2.Source);
			}
		}

		#endregion

		#region .ctor() and split functions

		public SimpleCollator (CultureInfo culture)
		{
			lcid = culture.LCID;
			textInfo = culture.TextInfo;
			buf = new SortKeyBuffer (culture.LCID);

			SetCJKTable (culture, ref cjkTable, ref cjkIndexer,
				ref cjkLv2Table, ref cjkLv2Indexer);

			// Get tailoring info
			TailoringInfo t = null;
			for (CultureInfo ci = culture; ci.LCID != 127; ci = ci.Parent) {
				t = Uni.GetTailoringInfo (ci.LCID);
				if (t != null)
					break;
			}
			if (t == null) // then use invariant
				t = Uni.GetTailoringInfo (127);

			frenchSort = t.FrenchSort;
			BuildTailoringTables (culture, t, ref contractions,
				ref level2Maps);
			// FIXME: Since tailorings are mostly for latin
			// (and in some cases Cyrillic) characters, it would
			// be much better for performance to store "start 
			// indexes" for > 370 (culture-specific letters).

/*
// dump tailoring table
Console.WriteLine ("******** building table for {0} : c - {1} d - {2}",
culture.LCID, contractions.Length, level2Maps.Length);
foreach (Contraction c in contractions) {
foreach (char cc in c.Source)
Console.Write ("{0:X4} ", (int) cc);
Console.WriteLine (" -> '{0}'", c.Replacement);
}
*/
		}

		private void BuildTailoringTables (CultureInfo culture,
			TailoringInfo t,
			ref Contraction [] contractions,
			ref Level2Map [] diacriticals)
		{
			// collect tailoring entries.
			ArrayList cmaps = new ArrayList ();
			ArrayList dmaps = new ArrayList ();
			char [] tarr = Uni.TailoringValues;
			int idx = t.TailoringIndex;
			int end = idx + t.TailoringCount;
			while (idx < end) {
				int ss = idx + 1;
				char [] src = null;
				switch (tarr [idx]) {
				case '\x1': // SortKeyMap
					idx++;
					while (tarr [ss] != 0)
						ss++;
					src = new char [ss - idx];
					Array.Copy (tarr, idx, src, 0, ss - idx);
					byte [] sortkey = new byte [4];
					for (int i = 0; i < 4; i++)
						sortkey [i] = (byte) tarr [ss + 1 + i];
					cmaps.Add (new Contraction (
						src, null, sortkey));
					// it ends with 0
					idx = ss + 6;
					break;
				case '\x2': // DiacriticalMap
					dmaps.Add (new Level2Map (
						(byte) tarr [idx + 1],
						(byte) tarr [idx + 2]));
					idx += 3;
					break;
				case '\x3': // ReplacementMap
					idx++;
					while (tarr [ss] != 0)
						ss++;
					src = new char [ss - idx];
					Array.Copy (tarr, idx, src, 0, ss - idx);
					ss++;
					int l = ss;
					while (tarr [l] != 0)
						l++;
					string r = new string (tarr, ss, l - ss);
					cmaps.Add (new Contraction (
						src, r, null));
					idx = l + 1;
					break;
				default:
					throw new NotImplementedException (String.Format ("Mono INTERNAL ERROR (Should not happen): Collation tailoring table is broken for culture {0} ({1}) at 0x{2:X}", culture.LCID, culture.Name, idx));
				}
			}
			cmaps.Sort (ContractionComparer.Instance);
			dmaps.Sort (Level2MapComparer.Instance);
			contractions = cmaps.ToArray (typeof (Contraction))
				as Contraction [];
			diacriticals = dmaps.ToArray (typeof (Level2Map))
				as Level2Map [];
		}

		private void SetCJKTable (CultureInfo culture,
			ref ushort [] cjkTable, ref CodePointIndexer cjkIndexer,
			ref byte [] cjkLv2Table, ref CodePointIndexer cjkLv2Indexer)
		{
			// custom CJK table support.
			switch (GetNeutralCulture (culture).Name) {
			case "zh-CHS":
				cjkTable = Uni.CjkCHS;
				cjkIndexer = UUtil.CjkCHS;
				break;
			case "zh-CHT":
				cjkTable = Uni.CjkCHT;
				cjkIndexer = UUtil.Cjk;
				break;
			case "ja":
				cjkTable = Uni.CjkJA;
				cjkIndexer = UUtil.Cjk;
				break;
			case "ko":
				cjkTable = Uni.CjkKO;
				cjkLv2Table = Uni.CjkKOLv2;
				cjkIndexer = UUtil.Cjk;
				cjkLv2Indexer = UUtil.Cjk;
				break;
			}
		}

		static CultureInfo GetNeutralCulture (CultureInfo info)
		{
			CultureInfo ret = info;
			while (ret.Parent != null && ret.Parent.LCID != 127)
				ret = ret.Parent;
			return ret;
		}

		#endregion

		byte Category (int cp)
		{
			if (cp < 0x3000 || cjkTable == null)
				return categories [categoryIndexer.ToIndex (cp)];
			int idx = cjkIndexer.ToIndex (cp);
			ushort cjk = idx < 0 ? (ushort) 0 : cjkTable [idx];
			return cjk != 0 ? (byte) ((cjk & 0xFF00) >> 8) :
				categories [categoryIndexer.ToIndex (cp)];
		}

		byte Level1 (int cp)
		{
			if (cp < 0x3000 || cjkTable == null)
				return level1 [lv1Indexer.ToIndex (cp)];
			int idx = cjkIndexer.ToIndex (cp);
			ushort cjk = idx < 0 ? (ushort) 0 : cjkTable [idx];
			return cjk != 0 ? (byte) (cjk & 0xFF) :
				level1 [lv1Indexer.ToIndex (cp)];
		}

		byte Level2 (int cp)
		{
			if (cp < 0x3000 || cjkLv2Table == null)
				return level2 [lv2Indexer.ToIndex (cp)];
			int idx = cjkLv2Indexer.ToIndex (cp);
			byte ret = idx < 0 ? (byte) 0 : cjkLv2Table [idx];
			if (ret != 0)
				return ret;
			ret = level2 [lv2Indexer.ToIndex (cp)];
			if (level2Maps.Length == 0)
				return ret;
			for (int i = 0; i < level2Maps.Length; i++) {
				if (level2Maps [i].Source == ret)
					return level2Maps [i].Replace;
				else if (level2Maps [i].Source > ret)
					break;
			}
			return ret;
		}

		void SetOptions (CompareOptions options)
		{
			this.ignoreNonSpace = (options & CompareOptions.IgnoreNonSpace) != 0;
			this.ignoreSymbols = (options & CompareOptions.IgnoreSymbols) != 0;
			this.ignoreWidth = (options & CompareOptions.IgnoreWidth) != 0;
			this.ignoreCase = (options & CompareOptions.IgnoreCase) != 0;
			this.ignoreKanaType = (options & CompareOptions.IgnoreKanaType) != 0;
		}

		Contraction GetContraction (string s, int start, int end)
		{
			Contraction c = GetContraction (s, start, end, contractions);
			if (c != null || lcid == 127)
				return c;
			return GetContraction (s, start, end, invariant.contractions);
		}

		Contraction GetContraction (string s, int start, int end, Contraction [] clist)
		{
			for (int i = 0; i < clist.Length; i++) {
				Contraction ct = clist [i];
				int diff = ct.Source [0] - s [start];
				if (diff > 0)
					return null; // it's already sorted
				else if (diff < 0)
					continue;
				char [] chars = ct.Source;
				if (end - start < chars.Length)
					continue;
				bool match = true;
				for (int n = 0; n < chars.Length; n++)
					if (s [start + n] != chars [n]) {
						match = false;
						break;
					}
				if (match)
					return ct;
			}
			return null;
		}

		Contraction GetTailContraction (string s, int start, int end)
		{
			Contraction c = GetTailContraction (s, start, end, contractions);
			if (c != null || lcid == 127)
				return c;
			return GetTailContraction (s, start, end, invariant.contractions);
		}

		Contraction GetTailContraction (string s, int start, int end, Contraction [] clist)
		{
			for (int i = 0; i < clist.Length; i++) {
				Contraction ct = clist [i];
				int diff = ct.Source [0] - s [end];
				if (diff > 0)
					return null; // it's already sorted
				else if (diff < 0)
					continue;
				char [] chars = ct.Source;
				if (start - end + 1 < chars.Length)
					continue;
				bool match = true;
				int offset = start - chars.Length + 1;
				for (int n = 0; n < chars.Length; n++)
					if (s [offset + n] != chars [n]) {
						match = false;
						break;
					}
				if (match)
					return ct;
			}
			return null;
		}

		Contraction GetContraction (char c)
		{
			Contraction ct = GetContraction (c, contractions);
			if (ct != null || lcid == 127)
				return ct;
			return GetContraction (c, invariant.contractions);
		}

		Contraction GetContraction (char c, Contraction [] clist)
		{
			for (int i = 0; i < clist.Length; i++) {
				Contraction ct = clist [i];
				if (ct.Source [0] > c)
					return null; // it's already sorted
				if (ct.Source [0] == c && ct.Source.Length == 1)
					return ct;
			}
			return null;
		}

		int FilterOptions (int i)
		{
			if (ignoreWidth)
				i = widthCompat [widthIndexer.ToIndex (i)];
			if (ignoreCase)
				i = textInfo.ToLower ((char) i);
			if (ignoreKanaType)
				i = Uni.ToKanaTypeInsensitive (i);
			return i;
		}

		#region GetSortKey()

		public SortKey GetSortKey (string s)
		{
			return GetSortKey (s, CompareOptions.None);
		}

		public SortKey GetSortKey (string s, CompareOptions options)
		{
			return GetSortKey (s, 0, s.Length, options);
		}

		public SortKey GetSortKey (string s, int start, int length, CompareOptions options)
		{
			SetOptions (options);

			buf.Initialize (options, s, frenchSort);
			int end = start + length;
			previousChar = -1;
			GetSortKey (s, start, end);
			return buf.GetResultAndReset ();
		}

		int previousChar = -1;
		byte [] previousSortKey = null;

		enum ExtenderType {
			None,
			Simple,
			Voiced,
			Conditional,
			Buggy,
		}

		ExtenderType GetExtenderType (int i)
		{
			// LAMESPEC: Windows expects true for U+3005, but 
			// sometimes it does not represent to repeat just
			// one character.
			// Windows also expects true for U+3031 and U+3032,
			// but they should *never* repeat one character.

			if (i < 0x3005 || i > 0xFF70)
				return ExtenderType.None;
			if (i == 0xFE7C || i == 0xFE7D)
				return ExtenderType.Simple;
			if (i == 0xFF70)
				return ExtenderType.Conditional;
			if (i > 0x30FE)
				return ExtenderType.None;
			switch (i) {
			case 0x3005: // LAMESPEC: see above.
				return ExtenderType.Buggy;
			case 0x3031: // LAMESPEC: see above.
			case 0x3032: // LAMESPEC: see above.
			case 0x309D:
			case 0x30FD:
				return ExtenderType.Simple;
			case 0x309E:
			case 0x30FE:
				return ExtenderType.Voiced;
			case 0x30FC:
				return ExtenderType.Conditional;
			default:
				return ExtenderType.None;
			}
		}

		byte ToDashTypeValue (ExtenderType ext)
		{
			if (ignoreNonSpace) // LAMESPEC: huh, why?
				return 3;
			switch (ext) {
			case ExtenderType.None:
				return 3;
			case ExtenderType.Conditional:
				return 5;
			default:
				return 4;
			}
		}

		bool IsIgnorable (int i)
		{
			return Uni.IsIgnorable (i) ||
				ignoreSymbols && Uni.IsIgnorableSymbol (i);
		}

		void GetSortKey (string s, int start, int end)
		{
			for (int n = start; n < end; n++) {
				int i = s [n];

				ExtenderType ext = GetExtenderType (i);
				if (ext != ExtenderType.None) {
					i = previousChar;
					if (i >= 0)
						FillSortKeyRaw (i, ext);
					else if (previousSortKey != null) {
						byte [] b = previousSortKey;
						buf.AppendNormal (
							b [0],
							b [1],
							b [2] != 1 ? b [2] : Level2 (i),
							b [3] != 1 ? b [3] : level3 [lv3Indexer.ToIndex (i)]);
					}
					// otherwise do nothing.
					// (if the extender is the first char
					// in the string, then just ignore.)
					continue;
				}

				if (IsIgnorable (i))
					continue;
				i = FilterOptions (i);

				Contraction ct = GetContraction (s, n, end);
				if (ct != null) {
					if (ct.Replacement != null) {
						GetSortKey (ct.Replacement, 0, ct.Replacement.Length);
					} else {
						byte [] b = ct.SortKey;
						buf.AppendNormal (
							b [0],
							b [1],
							b [2] != 1 ? b [2] : Level2 (i),
							b [3] != 1 ? b [3] : level3 [lv3Indexer.ToIndex (i)]);
						previousSortKey = b;
						previousChar = -1;
					}
					n += ct.Source.Length - 1;
				}
				else {
					previousChar = i;
					FillSortKeyRaw (i, ExtenderType.None);
				}
			}
		}

		void FillSortKeyRaw (int i, ExtenderType ext)
		{
			if (0x3400 <= i && i <= 0x4DB5) {
				int diff = i - 0x3400;
				buf.AppendCJKExtension (
					(byte) (0x10 + diff / 254),
					(byte) (diff % 254 + 2));
				return;
			}

			UnicodeCategory uc = char.GetUnicodeCategory ((char) i);
			switch (uc) {
			case UnicodeCategory.PrivateUse:
				int diff = i - 0xE000;
				buf.AppendNormal (
					(byte) (0xE5 + diff / 254),
					(byte) (diff % 254 + 2),
					0,
					0);
				return;
			case UnicodeCategory.Surrogate:
				FillSurrogateSortKeyRaw (i);
				return;
			}

			byte level2 = Level2 (i);
			if (ext == ExtenderType.Buggy)
				level2 = 5;
			if (Uni.HasSpecialWeight ((char) i)) {
				byte level1 = Level1 (i);
				if (ext == ExtenderType.Conditional)
					level1 = (byte) ((level1 & 0xF) % 8);
				buf.AppendKana (
					Category (i),
					level1,
					level2,
					Uni.Level3 (i),
					Uni.IsJapaneseSmallLetter ((char) i),
					ToDashTypeValue (ext),
					!Uni.IsHiragana ((char) i),
					Uni.IsHalfWidthKana ((char) i)
					);
				if (!ignoreNonSpace && ext == ExtenderType.Voiced)
					// Append voice weight
					buf.AppendNormal (1, 1, 1, 0);
			}
			else
				buf.AppendNormal (
					Category (i),
					Level1 (i),
					level2,
					Uni.Level3 (i));
		}

		void FillSurrogateSortKeyRaw (int i)
		{
			int diffbase = 0;
			int segment = 0;
			byte lower = 0;

			if (i < 0xD840) {
				diffbase = 0xD800;
				segment = 0x41;
				lower = (byte) ((i == 0xD800) ? 0x3E : 0x3F);
			} else if (0xD840 <= i && i < 0xD880) {
				diffbase = 0xD840;
				segment = 0xF2;
				lower = 0x3E;
			} else if (0xDB80 <= i && i < 0xDC00) {
				diffbase = 0xDB80 - 0x40;
				segment = 0xFE;
				lower = 0x3E;
			} else {
				diffbase = 0xDC00 - 0xF8 + 2;
				segment = 0x41;
				lower = 0x3F;
			}
			int diff = i - diffbase;

			buf.AppendNormal (
				(byte) (segment + diff / 254),
				(byte) (diff % 254 + 2),
				lower,
				lower);
		}

		#endregion

		#region Compare()

		public int Compare (string s1, string s2)
		{
			return Compare (s1, s2, CompareOptions.None);
		}

		public int Compare (string s1, string s2, CompareOptions options)
		{
			return Compare (s1, 0, s1.Length, s2, 0, s2.Length, options);
		}

		class Escape
		{
			public string Source;
			public int Index;
			public int Start;
			public int End;
		}

		// Those instances are reused not to invoke instantiation
		// during Compare().
		Escape escape1 = new Escape ();
		Escape escape2 = new Escape ();

		private int CompareOrdinal (string s1, int idx1, int len1,
			string s2, int idx2, int len2)
		{
			int min = len1 < len2 ? len1 : len2;
			int end1 = idx1 + min;
			int end2 = idx2 + min;
			for (int i1 = idx1, i2 = idx2;
				i1 < end1 && i2 < end2; i1++, i2++)
				if (s1 [i1] != s2 [i2])
					return s1 [i1] - s2 [i2];
			return len1 == len2 ? 0 :
				len1 == min ? - 1 : 1;
		}

		public int Compare (string s1, int idx1, int len1,
			string s2, int idx2, int len2, CompareOptions options)
		{
			// quick equality check
			if (idx1 == idx2 && len1 == len2 &&
				Object.ReferenceEquals (s1, s2))
				return 0;
			// FIXME: this should be done inside Compare() at
			// any time.
//			int ord = CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
//			if (ord == 0)
//				return 0;
			if (options == CompareOptions.Ordinal)
				return CompareOrdinal (s1, idx1, len1, s2, idx2, len2);

#if false // stable easy version, depends on GetSortKey().
			SortKey sk1 = GetSortKey (s1, idx1, len1, options);
			SortKey sk2 = GetSortKey (s2, idx2, len2, options);
			byte [] d1 = sk1.KeyData;
			byte [] d2 = sk2.KeyData;
			int len = d1.Length > d2.Length ? d2.Length : d1.Length;
			for (int i = 0; i < len; i++)
				if (d1 [i] != d2 [i])
					return d1 [i] < d2 [i] ? -1 : 1;
			return d1.Length == d2.Length ? 0 : d1.Length < d2.Length ? -1 : 1;
#else
			SetOptions (options);
			escape1.Source = null;
			escape2.Source = null;
			int ret = Compare (s1, idx1, len1, s2, idx2, len2, (options & CompareOptions.StringSort) != 0);
			return ret == 0 ? 0 : ret < 0 ? -1 : 1;
#endif
		}

		int Compare (string s1, int idx1, int len1, string s2,
			int idx2, int len2, bool stringSort)
		{
			int start1 = idx1;
			int start2 = idx2;
			int end1 = idx1 + len1;
			int end2 = idx2 + len2;

			// It holds final result that comes from the comparison
			// at level 2 or lower. Even if Compare() found the
			// difference at level 2 or lower, it still has to
			// continue level 1 comparison. FinalResult is used
			// when there was no further differences.
			int finalResult = 0;
			// It holds the comparison level to do. It starts from
			// 5, and becomes 1 when we do primary-only comparison.
			int currentLevel = 5;

			int lv5At1 = -1;
			int lv5At2 = -1;
			int lv5Value1 = 0;
			int lv5Value2 = 0;

			while (true) {
				if (idx1 >= end1) {
					if (escape1.Source == null)
						break;
					s1 = escape1.Source;
					start1 = escape1.Start;
					idx1 = escape1.Index;
					end1 = escape1.End;
					escape1.Source = null;
				}
				if (idx2 >= end2) {
					if (escape2.Source == null)
						break;
					s2 = escape2.Source;
					start2 = escape2.Start;
					idx2 = escape2.Index;
					end2 = escape2.End;
					escape2.Source = null;
				}
#if false
// FIXME: optimization could be done here.

				if (s1 [idx1] == s2 [idx2]) {
					idx1++;
					idx2++;
					continue;
				}
//				while (idx1 >= start1 && !IsSafe ((int) s [idx1]))
//					idx1--;
//				while (idx2 >= start2 && !IsSafe ((int) s [idx2]))
//					idx2--;
#endif

				int cur1 = idx1;
				int cur2 = idx2;
				byte [] sk1 = null;
				byte [] sk2 = null;
				int i1 = FilterOptions (s1 [idx1]);
				int i2 = FilterOptions (s2 [idx2]);
				bool special1 = false;
				bool special2 = false;
				byte cat1 = Category (i1);
				byte cat2 = Category (i2);

				// Handle special weight characters
				if (!stringSort && currentLevel > 4) {
					if (cat1 == 6) {
						lv5At1 = escape1.Source != null ?
							escape1.Index - escape1.Start :
							cur1 - start1;
						// here Windows has a bug that it does
						// not consider thirtiary weight.
						lv5Value1 = Level1 (i1) << 8 + Uni.Level3 (i1);
						idx1++;
					}
					if (cat2 == 6) {
						lv5At2 = escape2.Source != null ?
							escape2.Index - escape2.Start :
							cur2 - start2;
						// here Windows has a bug that it does
						// not consider thirtiary weight.
						lv5Value2 = Level1 (i2) << 8 + Uni.Level3 (i2);
						idx2++;
					}
					if (cat1 == 6 || cat2 == 6) {
						currentLevel = 4;
						continue;
					}
				}

				Contraction ct1 = GetContraction (s1, idx1, end1);
				if (ct1 != null) {
					idx1 += ct1.Source.Length;
					if (ct1.SortKey != null)
						sk1 = ct1.SortKey;
					else if (escape1.Source == null) {
						escape1.Source = s1;
						escape1.Start = start1;
						escape1.Index = cur1 + ct1.Source.Length;
						escape1.End = end1;
						s1 = ct1.Replacement;
						idx1 = 0;
						start1 = 0;
						end1 = s1.Length;
						continue;
					}
				}
				else {
					sk1 = charSortKey;
					sk1 [0] = cat1;
					sk1 [1] = Level1 (i1);
					if (!ignoreNonSpace && currentLevel > 1)
						sk1 [2] = Level2 (i1);
					if (currentLevel > 2)
						sk1 [3] = Uni.Level3 (i1);
					if (currentLevel > 3)
						special1 = Uni.HasSpecialWeight ((char) i1);
					idx1++;
				}

				// add diacritical marks in s1 here
				while (idx1 < end1) {
					if (Category (s1 [idx1]) != 1)
						break;
					sk1 [1] = (byte) (sk1 [1] + 
						Level2 (s1 [idx1]));
					idx1++;
				}

				Contraction ct2 = GetContraction (s2, idx2, end2);
				if (ct2 != null) {
					idx2 += ct2.Source.Length;
					if (ct2.SortKey != null)
						sk2 = ct2.SortKey;
					else if (escape2.Source == null) {
						escape2.Source = s2;
						escape2.Start = 0;
						escape2.Index = cur2 + ct2.Source.Length;
						escape2.End = end2;
						s2 = ct2.Replacement;
						idx2 = 0;
						end2 = s2.Length;
						continue;
					}
				}
				else {
					sk2 = charSortKey2;
					sk2 [0] = cat2;
					sk2 [1] = Level1 (i2);
					if (!ignoreNonSpace && currentLevel > 1)
						sk2 [2] = Level2 (i2);
					if (currentLevel > 2)
						sk2 [3] = Uni.Level3 (i2);
					if (currentLevel > 3)
						special2 = Uni.HasSpecialWeight ((char) i2);
					idx2++;
				}

				// add diacritical marks in s2 here
				while (idx2 < end2) {
					if (Category (s2 [idx2]) != 1)
						break;
					sk2 [1] = (byte) (sk2 [1] + 
						Level2 (s2 [idx2]));
					idx2++;
				}

				int ret = sk1 [0] - sk2 [0];
				ret = ret != 0 ? ret : sk1 [1] - sk2 [1];
				if (ret != 0)
					return ret;
				if (currentLevel == 1)
					continue;
				if (!ignoreNonSpace) {
					ret = sk1 [2] - sk2 [2];
					if (ret != 0) {
						finalResult = ret;
						currentLevel = frenchSort ? 2 : 1;
						continue;
					}
				}
				if (currentLevel == 2)
					continue;
				ret = sk1 [3] - sk2 [3];
				if (ret != 0) {
					finalResult = ret;
					currentLevel = 2;
					continue;
				}
				if (currentLevel == 3)
					continue;
				if (special1 != special2) {
					finalResult = special1 ? 1 : -1;
					currentLevel = 3;
					continue;
				}
				if (special1) {
					ret = CompareFlagPair (
						Uni.IsJapaneseSmallLetter ((char) i1),
						Uni.IsJapaneseSmallLetter ((char) i2));
					ret = ret != 0 ? ret :
						Uni.GetJapaneseDashType ((char) i1) -
						Uni.GetJapaneseDashType ((char) i2);
					ret = ret != 0 ? ret : CompareFlagPair (
						!Uni.IsHiragana ((char) i1),
						!Uni.IsHiragana ((char) i2));
					ret = ret != 0 ? ret : CompareFlagPair (
						Uni.IsHalfWidthKana ((char) i1),
						Uni.IsHalfWidthKana ((char) i2));
					if (ret != 0) {
						finalResult = ret;
						currentLevel = 3;
						continue;
					}
				}
			}
			for (; idx1 < end1; idx1++)
				if (!IsIgnorable (s1 [idx1]))
					break;
			for (; idx2 < end2; idx2++)
				if (!IsIgnorable (s2 [idx2]))
					break;
			// If there were only level 3 or lower differences,
			// then we still have to find diacritical differences
			// if any.
			if (finalResult != 0 && currentLevel > 2) {
				while (idx1 < end1 && idx2 < end2) {
					if (!Uni.IsIgnorableNonSpacing (s1 [idx1]))
						break;
					if (!Uni.IsIgnorableNonSpacing (s2 [idx2]))
						break;
					finalResult = Level2 (FilterOptions ((s1 [idx1]))) - Level2 (FilterOptions (s2 [idx2]));
					if (finalResult != 0)
						break;
					idx1++;
					idx2++;
				}
			}
			// we still have to handle level 5
			if (finalResult == 0) {
				finalResult = lv5At1 - lv5At2;
				if (finalResult == 0)
					finalResult = lv5Value1 - lv5Value2;
			}
			return idx1 != end1 ? 1 : idx2 == end2 ? finalResult : -1;
		}

		#endregion

		#region IsPrefix() and IsSuffix()

		public bool IsPrefix (string src, string target, CompareOptions opt)
		{
			return IsPrefix (src, target, 0, src.Length, opt);
		}

		public bool IsPrefix (string s, string target, int start, int length, CompareOptions opt)
		{
			SetOptions (opt);
			return IsPrefix (s, target, start, length);
		}

		// returns the consumed length in positive number, or -1 if
		// target was not a prefix.
		bool IsPrefix (string s, string target, int start, int length)
		{
			// quick check : simple codepoint comparison
			if (s.Length >= target.Length) {
				int si = start;
				for (int i = 0; si < length && i < target.Length; i++, si++)
					if (s [si] != target [i])
						break;
				if (si == start + target.Length)
					return true;
			}

			escapedSourceIndex = -1;
			return IsPrefixInternal (s, target, start, length);
		}

		bool IsPrefixInternal (string s, string target, int start, int length)
		{
			int si = start;
			int end = start + length;
			int ti = 0;
			string source = s;

			while (ti < target.Length) {
				if (IsIgnorable (target [ti])) {
					ti++;
					continue;
				}
				if (si >= end) {
					if (s == source)
						break;
					s = source;
					si = escapedSourceIndex;
					end = start + length;
					escapedSourceIndex = -1;
					continue;
				}
				if (IsIgnorable (s [si])) {
					si++;
					continue;
				}

				// Check contraction for target.
				Contraction ctt = GetContraction (target, ti, target.Length);
				if (ctt != null) {
					ti += ctt.Source.Length;
					if (ctt.SortKey != null) {
						int ret = GetMatchLength (ref s, ref si, ref end, -1, ctt.SortKey, true);
						if (ret < 0)
							return false;
						si += ret;
					} else {
						string r = ctt.Replacement;
						int i = 0;
						while (i < r.Length && si < end) {
							int ret = GetMatchLength (ref s, ref si, ref end, r [i]);
							if (ret < 0)
								return false;
							si += ret;
							i++;
						}
						if (i < r.Length && si >= end)
							return false;
					}
				}
				else {
					int ret = GetMatchLength (ref s, ref si, ref end, target [ti]);
					if (ret < 0)
						return false;
					si += ret;
					ti++;
				}
			}
			if (si == end) {
				// All codepoints in the compared range
				// matches. In that case, what matters 
				// is whether the remaining part of 
				// "target" is ignorable or not.
				while (ti < target.Length)
					if (!IsIgnorable (target [ti++]))
						return false;
				return true;
			}
			return true;
		}

		// WARNING: Don't invoke it outside IsPrefix().
		int GetMatchLength (ref string s, ref int idx, ref int end, char target)
		{
			int it = FilterOptions ((int) target);
			charSortKey [0] = Category (it);
			charSortKey [1] = Level1 (it);
			if (!ignoreNonSpace)
				charSortKey [2] = Level2 (it);
			charSortKey [3] = Uni.Level3 (it);

			return GetMatchLength (ref s, ref idx, ref end, it, charSortKey, !Uni.HasSpecialWeight ((char) it));
		}

		// WARNING: Don't invoke it outside IsPrefix().
		// returns consumed source length (mostly 1, source length in case of contraction)
		int GetMatchLength (ref string s, ref int idx, ref int end, int it, byte [] sortkey, bool noLv4)
		{
			Contraction ct = null;
			// If there is already expansion, then it should not
			// process further expansions.
			if (escapedSourceIndex < 0)
				ct = GetContraction (s, idx, end);
			if (ct != null) {
				if (ct.SortKey != null) {
					if (!noLv4)
						return -1;
					for (int i = 0; i < ct.SortKey.Length; i++)
						if (sortkey [i] != ct.SortKey [i])
							return -1;
					return ct.Source.Length;
				} else {
					escapedSourceIndex = idx + ct.Source.Length;
					s = ct.Replacement;
					idx = 0;
					end = s.Length;
					return GetMatchLength (ref s, ref idx, ref end, it, sortkey, noLv4);
				}
			} else {
				// primitive comparison
				if (Compare (s [idx], it, sortkey) != 0)
					return -1;
				return 1;
			}
		}

		// IsSuffix()

		public bool IsSuffix (string src, string target, CompareOptions opt)
		{
			return IsSuffix (src, target, src.Length - 1, src.Length, opt);
		}

		public bool IsSuffix (string s, string target, int start, int length, CompareOptions opt)
		{
			SetOptions (opt);

			// quick check : simple codepoint comparison
			if (s.Length >= target.Length) {
				int si = start;
				int se = start - length;
				for (int i = target.Length - 1; si >= se && i >= 0; i--, si--)
					if (s [si] != target [i])
						break;
				if (si == start + target.Length)
					return true;
			}

			escapedSourceIndex = -1;
			return IsSuffix (s, target, start, length);
		}

		bool IsSuffix (string s, string target, int start, int length)
		{
			int si = start;
			int ti = target.Length - 1;
			string source = s;
			int end = start - length + 1;

			while (ti >= 0) {
				if (IsIgnorable (target [ti])) {
					ti--;
					continue;
				}
				if (si < 0) {
					if (s == source)
						break;
					s = source;
					si = escapedSourceIndex;
					escapedSourceIndex = -1;
					continue;
				}
				if (IsIgnorable (s [si])) {
					si--;
					continue;
				}

				// Check contraction for target.
				Contraction ctt = GetTailContraction (target, ti, 0);
				if (ctt != null) {
					ti -= ctt.Source.Length;
					if (ctt.SortKey != null) {
						int ret = GetMatchLengthBack (ref s, ref si, ref end, -1, ctt.SortKey, true);
						if (ret < 0)
							return false;
						si -= ret;
					} else {
						string r = ctt.Replacement;
						int i = r.Length - 1;
						while (i >= 0 && si >= end) {
							int ret = GetMatchLengthBack (ref s, ref si, ref end, r [i]);
							if (ret < 0)
								return false;
							si -= ret;
							i--;
						}
						if (i >= 0 && si < end)
							return false;
					}
				}
				else {
					int ret = GetMatchLengthBack (ref s, ref si, ref end, target [ti]);
					if (ret < 0)
						return false;
					si -= ret;
					ti--;
				}
			}
			if (si < end) {
				// All codepoints in the compared range
				// matches. In that case, what matters 
				// is whether the remaining part of 
				// "target" is ignorable or not.
				while (ti >= 0)
					if (!IsIgnorable (target [ti--]))
						return false;
				return true;
			}
			return true;
		}

		// WARNING: Don't invoke it outside IsSuffix().
		int GetMatchLengthBack (ref string s, ref int idx, ref int end, char target)
		{
			int it = FilterOptions ((int) target);
			charSortKey [0] = Category (it);
			charSortKey [1] = Level1 (it);
			if (!ignoreNonSpace)
				charSortKey [2] = Level2 (it);
			charSortKey [3] = Uni.Level3 (it);

			return GetMatchLengthBack (ref s, ref idx, ref end, it, charSortKey, !Uni.HasSpecialWeight ((char) it));
		}

		// WARNING: Don't invoke it outside IsSuffix().
		// returns consumed source length (mostly 1, source length in case of contraction)
		int GetMatchLengthBack (ref string s, ref int idx, ref int end, int it, byte [] sortkey, bool noLv4)
		{
			Contraction ct = null;
			// If there is already expansion, then it should not
			// process further expansions.
			if (escapedSourceIndex < 0)
				ct = GetTailContraction (s, idx, end);
			if (ct != null) {
				if (ct.SortKey != null) {
					if (!noLv4)
						return -1;
					for (int i = 0; i < ct.SortKey.Length; i++)
						if (sortkey [i] != ct.SortKey [i])
							return -1;
					return ct.Source.Length;
				} else {
					escapedSourceIndex = idx - ct.Source.Length;
					s = ct.Replacement;
					idx = s.Length - 1;
					end = 0;
					return GetMatchLength (ref s, ref idx, ref end, it, sortkey, noLv4);
				}
			} else {
				// primitive comparison
				if (Compare (s [idx], it, sortkey) != 0)
					return -1;
				return 1;
			}
		}

		// Common use methods 

		// returns comparison result.
		private int Compare (char src, int ct, byte [] sortkey)
		{
			// char-by-char comparison.
			int cs = FilterOptions (src);
			if (cs == ct)
				return 0;
			// lv.1 to 3
			int ret = Category (cs) - Category (ct);
			if (ret != 0)
				return ret;
			ret = Level1 (cs) - Level1 (ct);
			if (ret != 0)
				return ret;
			if (!ignoreNonSpace) {
				ret = Level2 (cs) - Level2 (ct);
				if (ret != 0)
					return ret;
			}
			ret = Uni.Level3 (cs) - Uni.Level3 (ct);
			if (ret != 0)
				return ret;
			// lv.4 (only when required). No need to check cj coz
			// there is no pair of characters that has the same
			// primary level and differs here.
			if (!Uni.HasSpecialWeight (src))
				return 0;
			char target = (char) ct;
			ret = CompareFlagPair (
				Uni.IsJapaneseSmallLetter (src),
				Uni.IsJapaneseSmallLetter (target));
			if (ret != 0)
				return ret;
			ret = Uni.GetJapaneseDashType (src) -
				Uni.GetJapaneseDashType (target);
			if (ret != 0)
				return ret;
			ret = CompareFlagPair (Uni.IsHiragana (src),
				Uni.IsHiragana (target));
			if (ret != 0)
				return ret;
			ret = CompareFlagPair (Uni.IsHalfWidthKana (src),
				Uni.IsHalfWidthKana (target));
			return ret;
		}

		int CompareFlagPair (bool b1, bool b2)
		{
			return b1 == b2 ? 0 : b1 ? 1 : -1;
		}

		#endregion

		#region IndexOf() / LastIndexOf()

		// IndexOf (string, string, CompareOptions)
		// IndexOf (string, string, int, int, CompareOptions)
		// IndexOf (string, char, int, int, CompareOptions)
		// IndexOfPrimitiveChar (string, int, int, char)
		// IndexOfSortKey (string, int, int, byte[], char, int, bool)
		// IndexOf (string, string, int, int)

		public int IndexOf (string s, string target, CompareOptions opt)
		{
			return IndexOf (s, target, 0, s.Length, opt);
		}

		public int IndexOf (string s, string target, int start, int length, CompareOptions opt)
		{
			SetOptions (opt);
			return IndexOf (s, target, start, length);
		}

		public int IndexOf (string s, char target, CompareOptions opt)
		{
			return IndexOf (s, target, 0, s.Length, opt);
		}

		public int IndexOf (string s, char target, int start, int length, CompareOptions opt)
		{
			SetOptions (opt);

			// If target is contraction, then use string search.
			Contraction ct = GetContraction (target);
			if (ct != null) {
				if (ct.Replacement != null)
					return IndexOfPrimitiveChar (s, start, length, ct.Replacement [0]);
				else
					return IndexOfSortKey (s, start, length, ct.SortKey, char.MinValue, -1, true);
			}
			else
				return IndexOfPrimitiveChar (s, start, length, target);
		}

		// Searches target char w/o checking contractions
		int IndexOfPrimitiveChar (string s, int start, int length, char target)
		{
			int ti = FilterOptions ((int) target);
			charSortKey [0] = Category (ti);
			charSortKey [1] = Level1 (ti);
			if (!ignoreNonSpace)
				charSortKey [2] = Level2 (ti);
			charSortKey [3] = Uni.Level3 (ti);
			return IndexOfSortKey (s, start, length, charSortKey, target, ti, !Uni.HasSpecialWeight ((char) ti));
		}

		// Searches target byte[] keydata
		int IndexOfSortKey (string s, int start, int length, byte [] sortkey, char target, int ti, bool noLv4)
		{
			int end = start + length;
			for (int idx = start; idx < end; idx++) {
				int cur = idx;
				if (Matches (s, ref idx, end, ti, target, sortkey, noLv4, false))
					return cur;
			}
			return -1;
		}

		// Searches string. Search head character (or keydata when
		// the head is contraction sortkey) and try IsPrefix().
		int IndexOf (string s, string target, int start, int length)
		{
			Contraction ct = GetContraction (target, 0, target.Length);
			byte [] sortkey = ct != null ? ct.SortKey : null;
			string replace = ct != null ? ct.Replacement : null;
			do {
				int idx = 0;
				if (sortkey != null)
					idx = IndexOfSortKey (s, start, length, ct.SortKey, char.MinValue, -1, true);
				else if (replace != null)
					idx = IndexOf (ct.Replacement, target, 0, ct.Replacement.Length);
				else
					idx = IndexOfPrimitiveChar (s, start, length, target [0]);
				if (idx < 0)
					return -1;
				if (IsPrefix (s, target, idx, length - (idx - start)))
					return idx;
				start++;
				length--;
			} while (length > 0);
			return -1;
		}

		//
		// There are the same number of IndexOf() related methods,
		// with the same functionalities for each.
		//

		public int LastIndexOf (string s, string target, CompareOptions opt)
		{
			return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
		}

		public int LastIndexOf (string s, string target, int start, int length, CompareOptions opt)
		{
			SetOptions (opt);
			return LastIndexOf (s, target, start, length);
		}

		public int LastIndexOf (string s, char target, CompareOptions opt)
		{
			return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
		}

		public int LastIndexOf (string s, char target, int start, int length, CompareOptions opt)
		{
			SetOptions (opt);

			// If target is contraction, then use string search.
			Contraction ct = GetContraction (target);
			if (ct != null) {
				if (ct.Replacement != null)
					return LastIndexOfPrimitiveChar (s, start, length, ct.Replacement [0]);
				else
					return LastIndexOfSortKey (s, start, length, ct.SortKey, char.MinValue, -1, true);
			}
			else
				return LastIndexOfPrimitiveChar (s, start, length, target);
		}

		// Searches target char w/o checking contractions
		int LastIndexOfPrimitiveChar (string s, int start, int length, char target)
		{
			int ti = FilterOptions ((int) target);
			charSortKey [0] = Category (ti);
			charSortKey [1] = Level1 (ti);
			if (!ignoreNonSpace)
				charSortKey [2] = Level2 (ti);
			charSortKey [3] = Uni.Level3 (ti);
			return LastIndexOfSortKey (s, start, length, charSortKey, target, ti, !Uni.HasSpecialWeight ((char) ti));
		}

		// Searches target byte[] keydata
		int LastIndexOfSortKey (string s, int start, int length, byte [] sortkey, char target, int ti, bool noLv4)
		{
			int end = start - length;

			for (int idx = start; idx > end; idx--) {
				int cur = idx;
				if (Matches (s, ref idx, end, ti, target, sortkey, noLv4, true))
					return cur;
			}
			return -1;
		}

		// Searches string. Search head character (or keydata when
		// the head is contraction sortkey) and try IsPrefix().
		int LastIndexOf (string s, string target, int start, int length)
		{
			int orgStart = start;

			do {
				int idx = 0;
				Contraction ct = GetContraction (s, start, length);
				if (ct != null) {
					if (ct.SortKey != null)
						idx = LastIndexOfSortKey (s, start, length, ct.SortKey, char.MinValue, -1, true);
					else
						idx = LastIndexOf (ct.Replacement, target, ct.Replacement.Length - 1, ct.Replacement.Length);
				}
				else
					idx = LastIndexOfPrimitiveChar (s, start, length, target [0]);

				if (idx < 0)
					return -1;
				if (IsPrefix (s, target, idx, orgStart - idx + 1))
					return idx;
				length--;
				start--;
			} while (length > 0);
			return -1;
		}

		private bool Matches (string s, ref int idx, int end, int ti, char target, byte [] sortkey, bool noLv4, bool lastIndexOf)
		{
			switch (char.GetUnicodeCategory (s [idx])) {
			case UnicodeCategory.PrivateUse:
			case UnicodeCategory.Surrogate:
				if (s [idx] != target)
					return false;
				return true;
			}

			char sc = char.MinValue;
			Contraction ct = GetContraction (s, idx, end);
			// if lv4 exists, it never matches contraction
			if (ct != null && noLv4) {
				if (lastIndexOf)
					idx -= ct.Source.Length - 1;
				else
					idx += ct.Source.Length - 1;
				if (ct.SortKey != null) {
					for (int i = 0; i < sortkey.Length; i++)
						if (ct.SortKey [i] != sortkey [i])
							return false;
					return true;
				}
				// Here is the core of LAMESPEC
				// described at the top of the source.
				sc = ct.Replacement [0];
			}
			else
				sc = s [idx];

			if (sc == target)
				return true;
			int si = FilterOptions ((int) sc);
			if (Category (si) != sortkey [0] ||
				Level1 (si) != sortkey [1] ||
				!ignoreNonSpace && Level2 (si) != sortkey [2] ||
				Uni.Level3 (si) != sortkey [3])
				return false;
			if (noLv4 && !Uni.HasSpecialWeight ((char) si))
				return true;
			else if (noLv4)
				return false;
			if (Uni.IsJapaneseSmallLetter ((char) si) !=
				Uni.IsJapaneseSmallLetter ((char) ti) ||
				Uni.GetJapaneseDashType ((char) si) !=
				Uni.GetJapaneseDashType ((char) ti) ||
				!Uni.IsHiragana ((char) si) !=
				!Uni.IsHiragana ((char) ti) ||
				Uni.IsHalfWidthKana ((char) si) !=
				Uni.IsHalfWidthKana ((char) ti))
				return false;
			return true;
		}

		#endregion
	}
}