X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=mcs%2Fclass%2Fcorlib%2FMono.Globalization.Unicode%2Fcreate-mscompat-collation-table.cs;h=02bee38d373c50a0221d9d970d25cbfcb98d3590;hb=bd9f9ee7cb81823608edc76ef9d0b6416783fe71;hp=6811b83855af244089bec580709762265900f4cd;hpb=6c5e0f97434a60a1a5b7785cb68f83b6b57010d7;p=mono.git

diff --git a/mcs/class/corlib/Mono.Globalization.Unicode/create-mscompat-collation-table.cs b/mcs/class/corlib/Mono.Globalization.Unicode/create-mscompat-collation-table.cs
index 6811b83855a..02bee38d373 100644
--- a/mcs/class/corlib/Mono.Globalization.Unicode/create-mscompat-collation-table.cs
+++ b/mcs/class/corlib/Mono.Globalization.Unicode/create-mscompat-collation-table.cs
@@ -1,4 +1,31 @@
 //
+// create-mscompat-collation-table.cs : generates Windows-like sortkey tables.
+//
+// Author:
+//	Atsushi Enomoto  <atsushi@ximian.com>
+//
+// Copyright (C) 2005 Novell, Inc (http://www.novell.com)
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the
+// "Software"), to deal in the Software without restriction, including
+// without limitation the rights to use, copy, modify, merge, publish,
+// distribute, sublicense, and/or sell copies of the Software, and to
+// permit persons to whom the Software is furnished to do so, subject to
+// the following conditions:
+// 
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
 //
 // There are two kind of sort keys : which are computed and which are laid out
 // as an indexed array. Computed sort keys are:
@@ -6,24 +33,9 @@
 //	- Surrogate
 //	- PrivateUse
 //
-// Also, for composite characters it should prepare different index table.
-//
 // Though it is possible to "compute" level 3 weights, they are still dumped
 // to an array to avoid execution cost.
 //
-
-//
-// * sortkey getter signature
-//
-//	int GetSortKey (string s, int index, SortKeyBuffer buf)
-//	Stores sort key for corresponding character element into buf and
-//	returns the length of the consumed _source_ character element in s.
-//
-// * character length to consume
-//
-//	If there are characters whose primary weight is 0, they are consumed
-//	and considered as a part of the character element.
-//
 #define Binary
 
 using System;
@@ -33,6 +45,8 @@ using System.Globalization;
 using System.Text;
 using System.Xml;
 
+using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
+
 namespace Mono.Globalization.Unicode
 {
 	internal class MSCompatSortKeyTableGenerator
@@ -61,7 +75,8 @@ namespace Mono.Globalization.Unicode
 		const int DecompositionCompat = 0x11;
 		const int DecompositionCanonical = 0x12;
 
-		TextWriter Result = Console.Out;
+		TextWriter CSResult = Console.Out;
+		TextWriter CResult = TextWriter.Null;
 
 		byte [] fillIndex = new byte [256]; // by category
 		CharMapEntry [] map = new CharMapEntry [char.MaxValue + 1];
@@ -96,12 +111,20 @@ namespace Mono.Globalization.Unicode
 		byte [] diacritical = new byte [char.MaxValue + 1];
 
 		string [] diacritics = new string [] {
-			// LATIN
-			"WITH VERTICAL LINE ABOVE;",
-			"WITH GRAVE ACCENT;", "WITH ACUTE ACCENT;", "WITH CIRCUMFLEX ACCENT;",
-			"WITH ACUTE;", "WITH GRAVE;", "WITH DOT ABOVE;", " MIDDLE DOT;",
-			"WITH CIRCUMFLEX;", "WITH DIAERESIS;", "WITH CARON;", "WITH BREVE;",
-			" DIALYTIKA AND TONOS;", "WITH MACRON;", "WITH TILDE;", "WITH RING ABOVE;",
+			// LATIN, CYRILLIC etc.
+			"VERTICAL LINE ABOVE", "UPTURN", "DOUBLE-STRUCK",
+			"ABKHASIAN",
+			"MIDDLE HOOK", "WITH VERTICAL LINE ABOVE;", "WITH TONOS",
+			"WITH ACUTE ACCENT;", "WITH GRAVE ACCENT;",
+			"WITH ACUTE;", "WITH GRAVE;",
+			//
+			"WITH DOT ABOVE;", " MIDDLE DOT;",
+			"WITH CIRCUMFLEX ACCENT;", "WITH CIRCUMFLEX;",
+			"WITH DIALYTIKA;",
+			"WITH DIAERESIS;", "WITH CARON;", "WITH BREVE;",
+			"DIALYTIKA TONOS", "DIALYTIKA AND TONOS",
+			"ABKHASIAN CHE WITH DESCENDER",
+			"WITH MACRON;", "WITH TILDE;", "WITH RING ABOVE;",
 			"WITH OGONEK;", "WITH CEDILLA;",
 			//
 			" DOUBLE ACUTE;", " ACUTE AND DOT ABOVE;",
@@ -123,10 +146,10 @@ namespace Mono.Globalization.Unicode
 			" BREVE AND TILDE",
 			" CEDILLA AND BREVE",
 			" OGONEK AND MACRON",
-			//
-			"WITH OVERLINE",
+			// 0x40
+			"WITH OVERLINE", "DOUBLE VERTICAL LINE ABOVE",
 			"WITH HOOK;", "LEFT HOOK;", " WITH HOOK ABOVE;",
-			" DOUBLE GRAVE;",
+			" DOUBLE GRAVE",
 			" INVERTED BREVE",
 			"ROMAN NUMERAL",
 			" PRECEDED BY APOSTROPHE",
@@ -134,11 +157,12 @@ namespace Mono.Globalization.Unicode
 			" LINE BELOW;", " CIRCUMFLEX AND HOOK ABOVE",
 			" PALATAL HOOK",
 			" DOT BELOW;",
-			" RETROFLEX;", "DIAERESIS BELOW",
-			" RING BELOW",
+			" RETROFLEX;", "DIAERESIS BELOW", "RETROFLEX HOOK",
+			" RING BELOW", "LOW VERTICAL LINE",
 			//
 			" CIRCUMFLEX BELOW", "HORN AND ACUTE",
 			" BREVE BELOW;", " HORN AND GRAVE",
+			" LOW MACRON",
 			" TILDE BELOW",
 			" TOPBAR",
 			" DOT BELOW AND DOT ABOVE",
@@ -146,6 +170,7 @@ namespace Mono.Globalization.Unicode
 			" CIRCUMFLEX AND DOT BELOW",
 			" BREVE AND DOT BELOW",
 			" DOT BELOW AND MACRON",
+			" TONE TWO",
 			" HORN AND HOOK ABOVE",
 			" HORN AND DOT",
 			// CIRCLED, PARENTHESIZED and so on
@@ -155,10 +180,12 @@ namespace Mono.Globalization.Unicode
 			};
 		byte [] diacriticWeights = new byte [] {
 			// LATIN.
-			5,
-			0xF, 0xE, 0x12,
-			0xE, 0xF, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
-			0x17, 0x19, 0x1A, 0x1B, 0x1C,
+			3, 3, 3, 5, 5, 5, 5,
+			0xE, 0xF,
+			0xE, 0xF,
+			//
+			0x10, 0x11, 0x12, 0x12, 0x13, 0x13, 0x14, 0x15, 0x16,
+			0x16, 0x17, 0x17, 0x19, 0x1A, 0x1B, 0x1C,
 			//
 			0x1D, 0x1D, 0x1E, 0x1E, 0x1E, 0x1F, 0x1F, 0x1F,
 			0x20, 0x21, 0x22, 0x22, 0x23, 0x24,
@@ -166,12 +193,13 @@ namespace Mono.Globalization.Unicode
 			0x25, 0x25, 0x25, 0x26, 0x28, 0x28, 0x28,
 			0x29, 0x2A, 0x2B, 0x2C, 0x2F, 0x30,
 			//
-			0x40, 0x43, 0x43, 0x43, 0x44, 0x46, 0x47, 0x48,
-			0x52, 0x55, 0x55, 0x57, 0x58, 0x59, 0x59, 0x5A,
+			0x40, 0x41, 0x43, 0x43, 0x43, 0x44, 0x46, 0x47, 0x48,
+			0x52, 0x55, 0x55, 0x57, 0x58, 0x59, 0x59, 0x59,
+			0x5A, 0x5A,
 			//
-			0x60, 0x60, 0x61, 0x61, 0x63, 0x68, 0x68,
+			0x60, 0x60, 0x61, 0x61, 0x62, 0x63, 0x68, 0x68,
 			0x69, 0x69, 0x6A, 0x6D, 0x6E,
-			0x95, 0xAA,
+			0x87, 0x95, 0xAA,
 			// CIRCLED, PARENTHESIZED and so on.
 			0xEE, 0xEE, 0xEE, 0xEE, 0xEE,
 			0xF3, 0xF3, 0xF3
@@ -185,7 +213,6 @@ namespace Mono.Globalization.Unicode
 			0xE50, 0xE60, 0xED0, 0xEE0
 			};
 
-		char [] orderedCyrillic;
 		char [] orderedGurmukhi;
 		char [] orderedGujarati;
 		char [] orderedGeorgian;
@@ -212,11 +239,9 @@ namespace Mono.Globalization.Unicode
 
 		// cp -> level1 value
 		Hashtable arabicLetterPrimaryValues = new Hashtable ();
-		Hashtable cyrillicLetterPrimaryValues = new Hashtable ();
 
 		// letterName -> cp
 		Hashtable arabicNameMap = new Hashtable ();
-		Hashtable cyrillicNameMap = new Hashtable ();
 
 		// cp -> Hashtable [decompType] -> cp
 		Hashtable nfkdMap = new Hashtable ();
@@ -248,7 +273,9 @@ namespace Mono.Globalization.Unicode
 			ModifyParsedValues ();
 			GenerateCore ();
 			Console.Error.WriteLine ("generation done.");
+			CResult = new StreamWriter ("collation-tables.h", false);
 			Serialize ();
+			CResult.Close ();
 			Console.Error.WriteLine ("serialization done.");
 /*
 StreamWriter sw = new StreamWriter ("agelog.txt");
@@ -279,6 +306,11 @@ sw.Close ();
 				source, typeof (ushort), i);
 		}
 
+		void WriteByte (byte value)
+		{
+			
+		}
+
 		void Serialize ()
 		{
 			// Tailorings
@@ -288,12 +320,21 @@ sw.Close ();
 			byte [] level1 = new byte [map.Length];
 			byte [] level2 = new byte [map.Length];
 			byte [] level3 = new byte [map.Length];
-			ushort [] widthCompat = new ushort [map.Length];
+// widthCompat is now removed from the mapping table.
+// If it turned out that it is still required, grep this source and uncomment
+// widthCompat related lines. FIXME: remove those lines in the future.
+//			ushort [] widthCompat = new ushort [map.Length];
 			for (int i = 0; i < map.Length; i++) {
 				categories [i] = map [i].Category;
 				level1 [i] = map [i].Level1;
 				level2 [i] = map [i].Level2;
 				level3 [i] = ComputeLevel3Weight ((char) i);
+/*
+				// For Japanese Half-width characters, don't
+				// map widthCompat. It is IgnoreKanaType that
+				// handles those width differences.
+				if (0xFF6D <= i && i <= 0xFF9D)
+					continue;
 				switch (decompType [i]) {
 				case DecompositionNarrow:
 				case DecompositionWide:
@@ -303,158 +344,189 @@ sw.Close ();
 					widthCompat [i] = (ushort) decompValues [decompIndex [i]];
 					break;
 				}
+*/
 			}
 
 			// compress
 			ignorableFlags = CompressArray (ignorableFlags,
-				MSCompatUnicodeTableUtil.Ignorable);
-			categories = CompressArray (categories,
-				MSCompatUnicodeTableUtil.Category);
-			level1 = CompressArray (level1, 
-				MSCompatUnicodeTableUtil.Level1);
-			level2 = CompressArray (level2, 
-				MSCompatUnicodeTableUtil.Level2);
-			level3 = CompressArray (level3, 
-				MSCompatUnicodeTableUtil.Level3);
-			widthCompat = (ushort []) CodePointIndexer.CompressArray (
-				widthCompat, typeof (ushort),
-				MSCompatUnicodeTableUtil.WidthCompat);
-			cjkCHS = CompressArray (cjkCHS,
-				MSCompatUnicodeTableUtil.CjkCHS);
-			cjkCHT = CompressArray (cjkCHT,
-				MSCompatUnicodeTableUtil.Cjk);
-			cjkJA = CompressArray (cjkJA,
-				MSCompatUnicodeTableUtil.Cjk);
-			cjkKO = CompressArray (cjkKO,
-				MSCompatUnicodeTableUtil.Cjk);
-			cjkKOlv2 = CompressArray (cjkKOlv2,
-				MSCompatUnicodeTableUtil.Cjk);
+				UUtil.Ignorable);
+			categories = CompressArray (categories, UUtil.Category);
+			level1 = CompressArray (level1, UUtil.Level1);
+			level2 = CompressArray (level2, UUtil.Level2);
+			level3 = CompressArray (level3, UUtil.Level3);
+//			widthCompat = (ushort []) CodePointIndexer.CompressArray (
+//				widthCompat, typeof (ushort), UUtil.WidthCompat);
+			cjkCHS = CompressArray (cjkCHS, UUtil.CjkCHS);
+			cjkCHT = CompressArray (cjkCHT,UUtil.Cjk);
+			cjkJA = CompressArray (cjkJA, UUtil.Cjk);
+			cjkKO = CompressArray (cjkKO, UUtil.Cjk);
+			cjkKOlv2 = CompressArray (cjkKOlv2, UUtil.Cjk);
 
 			// Ignorables
-			Result.WriteLine ("internal static readonly byte [] ignorableFlags = new byte [] {");
+			CResult.WriteLine ("static const guint8  collation_table_ignorableFlags [] = {");
+			CSResult.WriteLine ("static readonly byte [] ignorableFlagsArr = new byte [] {");
 #if Binary
 			MemoryStream ms = new MemoryStream ();
 			BinaryWriter binary = new BinaryWriter (ms);
+			binary.Write (UUtil.ResourceVersion);
 			binary.Write (ignorableFlags.Length);
 #endif
 			for (int i = 0; i < ignorableFlags.Length; i++) {
 				byte value = ignorableFlags [i];
 				if (value < 10)
-					Result.Write ("{0},", value);
+					CSResult.Write ("{0},", value);
 				else
-					Result.Write ("0x{0:X02},", value);
+					CSResult.Write ("0x{0:X02},", value);
+				CResult.Write ("{0},", value);
 #if Binary
 				binary.Write (value);
 #endif
-				if ((i & 0xF) == 0xF)
-					Result.WriteLine ("// {0:X04}", i - 0xF);
+				if ((i & 0xF) == 0xF) {
+					CSResult.WriteLine ("// {0:X04}",
+						UUtil.Ignorable.ToCodePoint (i - 0xF));
+					CResult.WriteLine ();
+				}
 			}
-			Result.WriteLine ("};");
-			Result.WriteLine ();
+			CResult.WriteLine ("0};");
+			CSResult.WriteLine ("};");
+			CSResult.WriteLine ();
 
 			// Primary category
-			Result.WriteLine ("internal static readonly byte [] categories = new byte [] {");
+			CResult.WriteLine ("static const guint8 collation_table_category [] = {");
+			CSResult.WriteLine ("static readonly byte [] categoriesArr = new byte [] {");
 #if Binary
 			binary.Write (categories.Length);
 #endif
 			for (int i = 0; i < categories.Length; i++) {
 				byte value = categories [i];
 				if (value < 10)
-					Result.Write ("{0},", value);
+					CSResult.Write ("{0},", value);
 				else
-					Result.Write ("0x{0:X02},", value);
+					CSResult.Write ("0x{0:X02},", value);
+				CResult.Write ("{0},", value);
 #if Binary
 				binary.Write (value);
 #endif
-				if ((i & 0xF) == 0xF)
-					Result.WriteLine ("// {0:X04}", i - 0xF);
+				if ((i & 0xF) == 0xF) {
+					CSResult.WriteLine ("// {0:X04}",
+						UUtil.Category.ToCodePoint (i - 0xF));
+					CResult.WriteLine ();
+				}
 			}
-			Result.WriteLine ("};");
-			Result.WriteLine ();
+			CResult.WriteLine ("};");
+			CSResult.WriteLine ("};");
+			CSResult.WriteLine ();
 
 			// Primary weight value
-			Result.WriteLine ("internal static readonly byte [] level1 = new byte [] {");
+			CResult.WriteLine ("static const guint8 collation_table_level1 [] = {");
+			CSResult.WriteLine ("static readonly byte [] level1Arr = new byte [] {");
 #if Binary
 			binary.Write (level1.Length);
 #endif
 			for (int i = 0; i < level1.Length; i++) {
 				byte value = level1 [i];
 				if (value < 10)
-					Result.Write ("{0},", value);
+					CSResult.Write ("{0},", value);
 				else
-					Result.Write ("0x{0:X02},", value);
+					CSResult.Write ("0x{0:X02},", value);
+				CResult.Write ("{0},", value);
 #if Binary
 				binary.Write (value);
 #endif
-				if ((i & 0xF) == 0xF)
-					Result.WriteLine ("// {0:X04}", i - 0xF);
+				if ((i & 0xF) == 0xF) {
+					CSResult.WriteLine ("// {0:X04}",
+						UUtil.Level1.ToCodePoint (i - 0xF));
+					CResult.WriteLine ();
+				}
 			}
-			Result.WriteLine ("};");
-			Result.WriteLine ();
+			CResult.WriteLine ("0};");
+			CSResult.WriteLine ("};");
+			CSResult.WriteLine ();
 
 			// Secondary weight
-			Result.WriteLine ("internal static readonly byte [] level2 = new byte [] {");
+			CResult.WriteLine ("static const guint8 collation_table_level2 [] = {");
+			CSResult.WriteLine ("static readonly byte [] level2Arr = new byte [] {");
 #if Binary
 			binary.Write (level2.Length);
 #endif
 			for (int i = 0; i < level2.Length; i++) {
 				byte value = level2 [i];
 				if (value < 10)
-					Result.Write ("{0},", value);
+					CSResult.Write ("{0},", value);
 				else
-					Result.Write ("0x{0:X02},", value);
+					CSResult.Write ("0x{0:X02},", value);
+				CResult.Write ("{0},", value);
 #if Binary
 				binary.Write (value);
 #endif
-				if ((i & 0xF) == 0xF)
-					Result.WriteLine ("// {0:X04}", i - 0xF);
+				if ((i & 0xF) == 0xF) {
+					CSResult.WriteLine ("// {0:X04}",
+						UUtil.Level2.ToCodePoint (i - 0xF));
+					CResult.WriteLine ();
+				}
 			}
-			Result.WriteLine ("};");
-			Result.WriteLine ();
+			CResult.WriteLine ("0};");
+			CSResult.WriteLine ("};");
+			CSResult.WriteLine ();
 
 			// Thirtiary weight
-			Result.WriteLine ("internal static readonly byte [] level3 = new byte [] {");
+			CResult.WriteLine ("static const guint8 collation_table_level3 [] = {");
+			CSResult.WriteLine ("static readonly byte [] level3Arr = new byte [] {");
 #if Binary
 			binary.Write (level3.Length);
 #endif
 			for (int i = 0; i < level3.Length; i++) {
 				byte value = level3 [i];
 				if (value < 10)
-					Result.Write ("{0},", value);
+					CSResult.Write ("{0},", value);
 				else
-					Result.Write ("0x{0:X02},", value);
+					CSResult.Write ("0x{0:X02},", value);
+				CResult.Write ("{0},", value);
 #if Binary
 				binary.Write (value);
 #endif
-				if ((i & 0xF) == 0xF)
-					Result.WriteLine ("// {0:X04}", i - 0xF);
+				if ((i & 0xF) == 0xF) {
+					CSResult.WriteLine ("// {0:X04}",
+						UUtil.Level3.ToCodePoint (i - 0xF));
+					CResult.WriteLine ();
+				}
 			}
-			Result.WriteLine ("};");
-			Result.WriteLine ();
+			CResult.WriteLine ("0};");
+			CSResult.WriteLine ("};");
+			CSResult.WriteLine ();
 
+/*
 			// Width insensitivity mappings
 			// (for now it is more lightweight than dumping the
 			// entire NFKD table).
-			Result.WriteLine ("internal static readonly ushort [] widthCompat = new ushort [] {");
+			CResult.WriteLine ("static const guint16* widthCompat [] = {");
+			CSResult.WriteLine ("static readonly ushort [] widthCompatArr = new ushort [] {");
 #if Binary
 			binary.Write (widthCompat.Length);
 #endif
 			for (int i = 0; i < widthCompat.Length; i++) {
 				ushort value = widthCompat [i];
 				if (value < 10)
-					Result.Write ("{0},", value);
+					CSResult.Write ("{0},", value);
 				else
-					Result.Write ("0x{0:X02},", value);
+					CSResult.Write ("0x{0:X02},", value);
+				CResult.Write ("{0},", value);
 #if Binary
 				binary.Write (value);
 #endif
-				if ((i & 0xF) == 0xF)
-					Result.WriteLine ("// {0:X04}", i - 0xF);
+				if ((i & 0xF) == 0xF) {
+					CSResult.WriteLine ("// {0:X04}",
+						UUtil.WidthCompat.ToCodePoint (i - 0xF));
+					CResult.WriteLine ();
+				}
 			}
-			Result.WriteLine ("};");
-			Result.WriteLine ();
+			CResult.WriteLine ("0};");
+			CSResult.WriteLine ("};");
+			CSResult.WriteLine ();
+*/
+
 #if Binary
-			using (FileStream fs = File.Create ("../collation.core.bin")) {
+			using (FileStream fs = File.Create ("../resources/collation.core.bin")) {
 				byte [] array = ms.ToArray ();
 				fs.Write (array, 0, array.Length);
 			}
@@ -468,32 +540,70 @@ sw.Close ();
 			SerializeCJK ("cjkKOlv2", cjkKOlv2, 0x9FB0);
 		}
 
-		void SerializeCJK (string name, ushort [] cjk, int max)
+		void SerializeCJK (string name, ushort [] cjk, int max_unused)
 		{
-			int offset = 0;//char.MaxValue - cjk.Length;
-			Result.WriteLine ("static ushort [] {0} = new ushort [] {{", name);
+//			CResult.WriteLine ("static const int collation_table_collation_cjk_{0}_size [] = {1};", name, cjk.Length);
+			CSResult.WriteLine ("const int {0}ArrLength = {1};", name, cjk.Length);
+
+			int len = cjk.Length;
+			CResult.WriteLine ("static const guint8 collation_table_collation_cjk_{0} [] = {{", name);
+			CSResult.WriteLine ("static byte [] {0}Arr = new byte [] {{", name);
+			// the actual length is *2
+			for (int i = 0; i < 4; i++, len /= 256) {
+				CResult.Write ("{0},", len & 0xFF);
+				CSResult.Write ("0x{0:X04},", len & 0xFF);
+			}
+			CResult.WriteLine ();
+			CSResult.WriteLine ();
 #if Binary
 			MemoryStream ms = new MemoryStream ();
 			BinaryWriter binary = new BinaryWriter (ms);
+			binary.Write (UUtil.ResourceVersion);
+			binary.Write (cjk.Length); // the actual size is *2.
 #endif
+			// category
 			for (int i = 0; i < cjk.Length; i++) {
-				if (i + offset == max)
-					break;
-				ushort value = cjk [i];
+//				if (i == max)
+//					break;
+				byte value = (byte) (cjk [i] >> 8);
 				if (value < 10)
-					Result.Write ("{0},", value);
+					CSResult.Write ("{0},", value);
 				else
-					Result.Write ("0x{0:X04},", value);
+					CSResult.Write ("0x{0:X02},", value);
+				CResult.Write ("{0},", value);
 #if Binary
 				binary.Write (value);
 #endif
-				if ((i & 0xF) == 0xF)
-					Result.WriteLine ("// {0:X04}", i - 0xF + offset);
+				if ((i & 0xF) == 0xF) {
+					CSResult.WriteLine ("// {0:X04}", i - 0xF);
+					CResult.WriteLine ();
+				}
 			}
-			Result.WriteLine ("};");
-			Result.WriteLine ();
+
+			// level 1
+			for (int i = 0; i < cjk.Length; i++) {
+//				if (i == max)
+//					break;
+				byte value = (byte) (cjk [i] & 0xFF);
+				if (value < 10)
+					CSResult.Write ("{0},", value);
+				else
+					CSResult.Write ("0x{0:X02},", value);
+				CResult.Write ("{0},", value);
+#if Binary
+				binary.Write (value);
+#endif
+				if ((i & 0xF) == 0xF) {
+					CSResult.WriteLine ("// {0:X04}", i - 0xF);
+					CResult.WriteLine ();
+				}
+			}
+
+			CResult.WriteLine ("0};");
+			CSResult.WriteLine ("};");
+			CSResult.WriteLine ();
 #if Binary
-			using (FileStream fs = File.Create (String.Format ("../collation.{0}.bin", name))) {
+			using (FileStream fs = File.Create (String.Format ("../resources/collation.{0}.bin", name))) {
 				byte [] array = ms.ToArray ();
 				fs.Write (array, 0, array.Length);
 			}
@@ -502,30 +612,35 @@ sw.Close ();
 
 		void SerializeCJK (string name, byte [] cjk, int max)
 		{
-			int offset = 0;//char.MaxValue - cjk.Length;
-			Result.WriteLine ("static byte [] {0} = new byte [] {{", name);
+			CResult.WriteLine ("static const guint8 collation_table_collation_cjk_{0} [] = {{", name);
+			CSResult.WriteLine ("static byte [] {0}Arr = new byte [] {{", name);
 #if Binary
 			MemoryStream ms = new MemoryStream ();
 			BinaryWriter binary = new BinaryWriter (ms);
+			binary.Write (UUtil.ResourceVersion);
 #endif
 			for (int i = 0; i < cjk.Length; i++) {
-				if (i + offset == max)
+				if (i == max)
 					break;
 				byte value = cjk [i];
 				if (value < 10)
-					Result.Write ("{0},", value);
+					CSResult.Write ("{0},", value);
 				else
-					Result.Write ("0x{0:X02},", value);
+					CSResult.Write ("0x{0:X02},", value);
+				CResult.Write ("{0},", value);
 #if Binary
 				binary.Write (value);
 #endif
-				if ((i & 0xF) == 0xF)
-					Result.WriteLine ("// {0:X04}", i - 0xF + offset);
+				if ((i & 0xF) == 0xF) {
+					CSResult.WriteLine ("// {0:X04}", i - 0xF);
+					CResult.WriteLine ();
+				}
 			}
-			Result.WriteLine ("};");
-			Result.WriteLine ();
+			CResult.WriteLine ("0};");
+			CSResult.WriteLine ("};");
+			CSResult.WriteLine ();
 #if Binary
-			using (FileStream fs = File.Create (String.Format ("../collation.{0}.bin", name))) {
+			using (FileStream fs = File.Create (String.Format ("../resources/collation.{0}.bin", name))) {
 				byte [] array = ms.ToArray ();
 				fs.Write (array, 0, array.Length);
 			}
@@ -536,35 +651,46 @@ sw.Close ();
 		{
 			Hashtable indexes = new Hashtable ();
 			Hashtable counts = new Hashtable ();
-			Result.WriteLine ("static char [] tailorings = new char [] {");
+			CResult.WriteLine ("static const guint16 collation_table_tailoring [] = {");
+			CSResult.WriteLine ("static char [] tailoringArr = new char [] {");
 			int count = 0;
 #if Binary
 			MemoryStream ms = new MemoryStream ();
 			BinaryWriter binary = new BinaryWriter (ms);
+			// Here we don't need to output resource version.
+			// This is cached.
 #endif
 			foreach (Tailoring t in tailorings) {
 				if (t.Alias != 0)
 					continue;
-				Result.Write ("/*{0}*/", t.LCID);
+				CResult.Write ("/*{0}*/", t.LCID);
+				CSResult.Write ("/*{0}*/", t.LCID);
 				indexes.Add (t.LCID, count);
 				char [] values = t.ItemToCharArray ();
 				counts.Add (t.LCID, values.Length);
 				foreach (char c in values) {
-					Result.Write ("'\\x{0:X}', ", (int) c);
-					if (++count % 16 == 0)
-						Result.WriteLine (" // {0:X04}", count - 16);
+					CSResult.Write ("'\\x{0:X}', ", (int) c);
+					CResult.Write ("{0},", (int) c);
+					if (++count % 16 == 0) {
+						CSResult.WriteLine (" // {0:X04}", count - 16);
+						CResult.WriteLine ();
+					}
 #if Binary
 					binary.Write ((ushort) c);
 #endif
 				}
 			}
-			Result.WriteLine ("};");
+			CResult.WriteLine ("0};");
+			CSResult.WriteLine ("};");
 
-			Result.WriteLine ("static TailoringInfo [] tailoringInfos = new TailoringInfo [] {");
+			CResult.WriteLine ("static const guint32 collation_table_tailoring_infos [] = {");
+			CResult.WriteLine ("{0}, /*count*/", tailorings.Count);
+			CSResult.WriteLine ("static TailoringInfo [] tailoringInfos = new TailoringInfo [] {");
 #if Binary
 			byte [] rawdata = ms.ToArray ();
 			ms = new MemoryStream ();
 			binary = new BinaryWriter (ms);
+			binary.Write (UUtil.ResourceVersion);
 			binary.Write (tailorings.Count);
 #endif
 			foreach (Tailoring t in tailorings) {
@@ -580,7 +706,8 @@ sw.Close ();
 					foreach (Tailoring t2 in tailorings)
 						if (t2.LCID == t.LCID)
 							french = t2.FrenchSort;
-				Result.WriteLine ("new TailoringInfo ({0}, 0x{1:X}, {2}, {3}), ", t.LCID, idx, cnt, french ? "true" : "false");
+				CSResult.WriteLine ("new TailoringInfo ({0}, 0x{1:X}, {2}, {3}), ", t.LCID, idx, cnt, french ? "true" : "false");
+				CResult.WriteLine ("{0},{1},{2},{3},", t.LCID, idx, cnt, french ? 1 : 0);
 #if Binary
 				binary.Write (t.LCID);
 				binary.Write (idx);
@@ -588,7 +715,8 @@ sw.Close ();
 				binary.Write (french);
 #endif
 			}
-			Result.WriteLine ("};");
+			CResult.WriteLine ("0};");
+			CSResult.WriteLine ("};");
 #if Binary
 			binary.Write ((byte) 0xFF);
 			binary.Write ((byte) 0xFF);
@@ -596,7 +724,7 @@ sw.Close ();
 			binary.Write (rawdata, 0, rawdata.Length);
 
 
-			using (FileStream fs = File.Create ("../collation.tailoring.bin")) {
+			using (FileStream fs = File.Create ("../resources/collation.tailoring.bin")) {
 				byte [] array = ms.ToArray ();
 				fs.Write (array, 0, array.Length);
 			}
@@ -627,6 +755,7 @@ sw.Close ();
 
 			ParseJISOrder (cp932); // in prior to ParseUnidata()
 			ParseUnidata (unidata);
+			ModifyUnidata ();
 			ParseDerivedCoreProperties (derivedCoreProps);
 			ParseScripts (scripts);
 			ParseCJK (chXML, jaXML, koXML);
@@ -657,14 +786,17 @@ sw.Close ();
 		{
 			StringBuilder sb = new StringBuilder ();
 			for (int i = 0; i < s.Length; i++) {
-				if (s.StartsWith ("\\u")) {
-					sb.Append ((char) int.Parse (
-						s.Substring (2, 4), NumberStyles.HexNumber),
+				if (i + 5 < s.Length &&
+					s [i] == '\\' && s [i + 1] == 'u') {
+					sb.Append (
+						(char) int.Parse (
+							s.Substring (i + 2, 4),
+							NumberStyles.HexNumber),
 						1);
 					i += 5;
 				}
-			else
-				sb.Append (s [i]);
+				else
+					sb.Append (s [i]);
 			}
 			return sb.ToString ();
 		}
@@ -844,10 +976,10 @@ sw.Close ();
 					target = 'B';
 				else if (s.Substring (offset).StartsWith ("OPEN O"))
 					target = 'C';
+				else if (s.Substring (offset).StartsWith ("ETH"))
+					target = 'D';
 				else if (s.Substring (offset).StartsWith ("SCHWA"))
 					target = 'E';
-				else if (s.Substring (offset).StartsWith ("ENG"))
-					target = 'N';
 				else if (s.Substring (offset).StartsWith ("OI;")) // 01A2,01A3
 					target = 'O';
 				else if (s.Substring (offset).StartsWith ("YR;")) // 01A2,01A3
@@ -856,6 +988,26 @@ sw.Close ();
 					target = 'S';
 				else if (s.Substring (offset).StartsWith ("ESH"))
 					target = 'S';
+				else if (s.Substring (offset).StartsWith ("OUNCE"))
+					target = 'Z';
+
+				// For remaining IPA chars, direct mapping is
+				// much faster.
+				switch (cp) {
+				case 0x0166: case 0x0167:
+					// Though they are 'T', they have different weight
+					target = char.MinValue; break;
+				case 0x0299: target = 'B'; break;
+				case 0x029A: target = 'E'; break;
+				case 0x029B: target = 'G'; break;
+				case 0x029C: target = 'H'; break;
+				case 0x029D: target = 'J'; break;
+				case 0x029E: target = 'K'; break;
+				case 0x029F: target = 'L'; break;
+				case 0x02A0: target = 'Q'; break;
+				case 0x02A7: target = 'T'; break;
+				case 0x02A8: target = 'T'; break;
+				}
 
 				if (target == char.MinValue)
 					target = previousLatinTarget;
@@ -905,7 +1057,19 @@ sw.Close ();
 					"SOUTH WEST",
 					"LEFTWARDS",
 					"NORTH WEST",
+					"LEFT RIGHT",
+					"UP DOWN",
 					};
+				if (s.IndexOf ("RIGHTWARDS") >= 0 &&
+					s.IndexOf ("LEFTWARDS") >= 0)
+					value = 0xE1 - 0xD8;
+				else if (s.IndexOf ("UPWARDS") >= 0 &&
+					s.IndexOf ("DOWNWARDS") >= 0)
+					value = 0xE2 - 0xD8;
+				else if (s.IndexOf ("ARROW") >= 0 &&
+					s.IndexOf ("COMBINING") < 0 &&
+					s.IndexOf ("CLOCKWISE") >= 0)
+					value = s.IndexOf ("ANTICLOCKWISE") >= 0 ? 0xE4 - 0xD8 : 0xE3 - 0xD8;
 				if (value == 0)
 					for (int i = 1; value == 0 && i < arrowTargets.Length; i++)
 						if (s.IndexOf (arrowTargets [i]) > 0 &&
@@ -920,7 +1084,7 @@ sw.Close ();
 
 			// Box names
 			if (0x2500 <= cp && cp < 0x2600) {
-				int value = 0;
+				int value = int.MinValue;
 				// flags:
 				// up:1 down:2 right:4 left:8 vert:16 horiz:32
 				// [h,rl] [r] [l]
@@ -960,7 +1124,8 @@ sw.Close ();
 						flag |= 32;
 
 					int fidx = flags.IndexOf (flag);
-					value = fidx < 0 ? fidx : offsets [fidx];
+					if (fidx >= 0)
+						value = offsets [fidx];
 				} else if (s.IndexOf ("BLOCK") >= 0) {
 					if (s.IndexOf ("ONE EIGHTH") >= 0)
 						value = 0x12;
@@ -1021,6 +1186,8 @@ sw.Close ();
 					else
 						value = 0xC9 - 0xE5;
 				}
+				else if (s.IndexOf ("BULLET") >= 0)
+					value = 0xCC - 0xE5;
 				if (0x25DA <= cp && cp <= 0x25E5)
 					value = 0xCD + cp - 0x25DA - 0xE5;
 
@@ -1030,7 +1197,7 @@ sw.Close ();
 				case 0x2572: value = 0x10; break;
 				case 0x2573: value = 0x11; break;
 				}
-				if (value != 0)
+				if (value != int.MinValue)
 					boxValues.Add (new DictionaryEntry (
 						cp, value));
 			}
@@ -1045,15 +1212,23 @@ sw.Close ();
 				sortableCharNames.Add (new DictionaryEntry (
 					cp, name.Substring (7)));
 
+			if (Char.GetUnicodeCategory ((char) cp) ==
+				UnicodeCategory.MathSymbol) {
+				if (name.StartsWith ("CIRCLED "))
+					diacritical [cp] = 0xEE;
+				if (name.StartsWith ("SQUARED "))
+					diacritical [cp] = 0xEF;
+			}
+
 			// diacritical weights by character name
 if (diacritics.Length != diacriticWeights.Length)
 throw new Exception (String.Format ("Should not happen. weights are {0} while labels are {1}", diacriticWeights.Length, diacritics.Length));
-			for (int d = 0; d < diacritics.Length; d++) {
+			for (int d = diacritics.Length - 1; d >= 0; d--) {
 				if (s.IndexOf (diacritics [d]) > 0) {
 					diacritical [cp] += diacriticWeights [d];
 					if (s.IndexOf ("COMBINING") >= 0)
 						diacritical [cp] -= (byte) 2;
-					continue;
+					break;
 				}
 				// also process "COMBINING blah" here
 				// For now it is limited to cp < 0x0370
@@ -1063,8 +1238,10 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 				if (tmp.IndexOf ("WITH ") == 0)
 					tmp = tmp.Substring (4);
 				tmp = String.Concat ("COMBINING", (tmp [0] != ' ' ? " " : ""), tmp);
-				if (name == tmp)
+				if (name == tmp) {
 					diacritical [cp] = (byte) (diacriticWeights [d] - 2);
+					break;
+				}
 //if (name == tmp)
 //Console.Error.WriteLine ("======= {2:X04} : '{0}' / '{1}'", name, tmp, cp);
 			}
@@ -1072,26 +1249,9 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			if (s.IndexOf ("FULL STOP") > 0 &&
 				(s.IndexOf ("DIGIT") > 0 || s.IndexOf ("NUMBER") > 0))
 				diacritical [cp] |= 0xF4;
-
-			// Cyrillic letter name
-			if (0x0430 <= cp && cp <= 0x0486 &&
-				Char.IsLetter ((char) cp)) {
-				byte value = (byte) (cyrillicNameMap.Count * 3 + 0x06);
-				// Get primary letter name i.e.
-				// XXX part of CYRILLIC LETTER XXX yyy
-				// e.g. "IZHITSA" for "IZHITSA DOUBLE GRAVE".
-				string letterName =
-					name.Substring (name.IndexOf ("LETTER ") + 7);
-				int tmpIdx = letterName.IndexOf (' ');
-				letterName = tmpIdx < 0 ? letterName : letterName.Substring (0, tmpIdx);
-//Console.Error.WriteLine ("Arabic name for {0:X04} is {1}", cp, letterName);
-				if (cyrillicNameMap.ContainsKey (letterName))
-					value = (byte) cyrillicLetterPrimaryValues [cyrillicNameMap [letterName]];
-				else
-					cyrillicNameMap [letterName] = cp;
-
-				cyrillicLetterPrimaryValues [cp] = value;
-			}
+			if (s.StartsWith ("SCRIPT") || s.IndexOf (" SCRIPT ") > 0)
+				diacritical [cp] = (byte) (s.IndexOf ("SMALL") > 0 ? 3 :
+					s.IndexOf ("CAPITAL") > 0 ? 5 : 4);
 
 			// Arabic letter name
 			if (0x0621 <= cp && cp <= 0x064A &&
@@ -1308,7 +1468,6 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 
 		void ParseScripts (string filename)
 		{
-			ArrayList cyrillic = new ArrayList ();
 			ArrayList gurmukhi = new ArrayList ();
 			ArrayList gujarati = new ArrayList ();
 			ArrayList georgian = new ArrayList ();
@@ -1338,11 +1497,6 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 						continue;
 
 					switch (value) {
-					case "Cyrillic":
-						for (int x = cp; x <= cpEnd; x++)
-							if (!IsIgnorable (x))
-								cyrillic.Add ((char) x);
-						break;
 					case "Gurmukhi":
 						for (int x = cp; x <= cpEnd; x++)
 							if (!IsIgnorable (x))
@@ -1366,12 +1520,10 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 					}
 				}
 			}
-			cyrillic.Sort (UCAComparer.Instance);
 			gurmukhi.Sort (UCAComparer.Instance);
 			gujarati.Sort (UCAComparer.Instance);
 			georgian.Sort (UCAComparer.Instance);
 			thaana.Sort (UCAComparer.Instance);
-			orderedCyrillic = (char []) cyrillic.ToArray (typeof (char));
 			orderedGurmukhi = (char []) gurmukhi.ToArray (typeof (char));
 			orderedGujarati = (char []) gujarati.ToArray (typeof (char));
 			orderedGeorgian = (char []) georgian.ToArray (typeof (char));
@@ -1458,16 +1610,55 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			category = "ja";
 			arr = cjkJA;
 			offset = 0;//char.MaxValue - arr.Length;
-			doc.Load (jaXML);
-			s = doc.SelectSingleNode ("/ldml/collations/collation/rules/pc").InnerText;
+
+			// SPECIAL CASES
+			arr [0x4EDD] = 0x8002; // Chinese repetition mark?
+			arr [0x337B] = 0x8004; // Those 4 characters are Gengou
+			arr [0x337E] = 0x8005;
+			arr [0x337D] = 0x8006;
+			arr [0x337C] = 0x8007;
+
 			v = 0x8008;
-			foreach (char c in s) {
+			foreach (JISCharacter jc in jisJapanese) {
+				if (jc.JIS < 0x8800)
+					continue;
+				char c = (char) jc.CP;
+
 				if (c < '\u4E00')
-					Console.Error.WriteLine ("---- warning: for {0} {1:X04} is omitted which should be {2:X04}", category, (int) c, v);
+					// Console.Error.WriteLine ("---- warning: for {0} {1:X04} is omitted which should be {2:X04}", category, (int) c, v);
+					continue;
 				else {
 					arr [(int) c - offset] = (ushort) v++;
 					if (v % 256 == 0)
 						v += 2;
+
+					// SPECIAL CASES:
+					if (c == '\u662D') // U+337C
+						continue;
+					if (c == '\u5927') // U+337D
+						continue;
+					if (c == '\u5E73') // U+337B
+						continue;
+					if (c == '\u660E') // U+337E
+						continue;
+					if (c == '\u9686') // U+F9DC
+						continue;
+
+					// FIXME: there are still remaining
+					// characters after U+FA0C.
+//					for (int k = 0; k < char.MaxValue; k++) {
+					for (int k = 0; k < '\uFA0D'; k++) {
+						if (decompIndex [k] == 0 || IsIgnorable (k))
+							continue;
+						if (decompValues [decompIndex [k]] == c /*&&
+							decompLength [k] == 1*/ ||
+							decompLength [k] == 3 &&
+							decompValues [decompIndex [k] + 1] == c) {
+							arr [k - offset] = (ushort) v++;
+							if (v % 256 == 0)
+								v += 2;
+						}
+					}
 				}
 			}
 
@@ -1523,8 +1714,124 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			}
 		}
 
+		void ModifyUnidata ()
+		{
+			ArrayList decompValues = new ArrayList (this.decompValues);
+
+			// Hebrew uppercase letters.
+			foreach (int i in new int []
+				{0x05DB, 0x05DE, 0x05E0, 0x05E4, 0x05E6})
+				isUppercase [i] = true;
+
+
+			// Modify some decomposition equivalence
+			for (int i = 0xFE31; i <= 0xFE34; i++) {
+				decompType [i] = 0;
+				decompIndex [i] = 0;
+				decompLength [i] = 0;
+			}
+			decompType [0x037E] = 0;
+			decompIndex [0x037E] = 0;
+			decompLength [0x037E] = 0;
+
+			// Hangzhou numbers
+			for (int i = 0x3021; i <= 0x3029; i++)
+				diacritical [i] = 0x4E;
+			// Korean parens numbers
+			for (int i = 0x3200; i <= 0x321C; i++)
+				diacritical [i] = 0xA;
+			for (int i = 0x3260; i <= 0x327B; i++)
+				diacritical [i] = 0xC;
+
+			// LAMESPEC: these remapping should not be done.
+			// Windows have incorrect CJK compat mappings.
+			decompValues [decompIndex [0x32A9]] = 0x91AB;
+			decompLength [0x323B] = 1;
+			decompValues [decompIndex [0x323B]] = 0x5B78;
+			decompValues [decompIndex [0x32AB]] = 0x5B78;
+			decompValues [decompIndex [0x32A2]] = 0x5BEB;
+			decompLength [0x3238] = 1;
+			decompValues [decompIndex [0x3238]] = 0x52DE;
+			decompValues [decompIndex [0x3298]] = 0x52DE;
+
+			// LAMESPEC: custom remapping (which is not bugs but not fine, non-standard compliant things)
+			decompIndex [0xFA0C] = decompValues.Count;
+			decompValues.Add ((int) 0x5140);
+			decompLength [0xFA0C] = 1;
+			decompIndex [0xF929] = decompLength [0xF929] = 0;
+
+			decompValues [decompIndex [0xF92C]] = 0x90DE;
+
+			decompIndex [0x2125] = decompValues.Count;
+			decompValues.Add ((int) 0x005A);
+			decompLength [0x2125] = 1;
+			decompType [0x2125] = DecompositionFont;
+
+			this.decompValues = decompValues.ToArray (typeof (int)) as int [];
+		}
+
 		void ModifyParsedValues ()
 		{
+			// Sometimes STROKE don't work fine
+			diacritical [0xD8] = diacritical [0xF8] = 0x21;
+			diacritical [0x141] = diacritical [0x142] = 0x1F;
+			// FIXME: why?
+			diacritical [0xAA] = diacritical [0xBA] = 3;
+			diacritical [0xD0] = diacritical [0xF0] = 0x68;
+			diacritical [0x131] = 3;
+			diacritical [0x138] = 3;
+			// TOPBAR does not work as an identifier for the weight
+			diacritical [0x182] = diacritical [0x183] = 0x68; // B
+			diacritical [0x18B] = diacritical [0x18C] = 0x1E; // D
+			// TONE TWO
+			diacritical [0x1A7] = diacritical [0x1A8] = 0x87;
+			// TONE SIX
+			diacritical [0x184] = diacritical [0x185] = 0x87;
+			// OPEN E
+			diacritical [0x190] = diacritical [0x25B] = 0x7B;
+			// There are many letters w/ diacritical weight 0x7B
+			diacritical [0x0192] = diacritical [0x0194] =
+			diacritical [0x0195] = diacritical [0x0196] =
+			diacritical [0x019C] = diacritical [0x019E] =
+			diacritical [0x01A6] = diacritical [0x01B1] =
+			diacritical [0x01B2] = diacritical [0x01BF] = 0x7B;
+			// ... as well as 0x7C
+			diacritical [0x01A2] = diacritical [0x01A3] = 0x7C;
+
+			// <font> NFKD characters seem to have diacritical
+			// weight as 3,4,5... but the order does not look
+			// by codepoint and I have no idea how they are sorted.
+			diacritical [0x210E] = 3;
+			diacritical [0x210F] = 0x68;
+			diacritical [0x2110] = 4;
+			diacritical [0x2111] = 5;
+			diacritical [0x2112] = 4;
+			diacritical [0x2113] = 4;
+			diacritical [0x211B] = 4;
+			diacritical [0x211C] = 5;
+
+			// some cyrillic diacritical weight. They seem to be
+			// based on old character names, so it's quicker to
+			// set them directly here.
+			// FIXME: they are by mostly unknown reason
+			diacritical [0x0496] = diacritical [0x0497] = 7;
+			diacritical [0x0498] = diacritical [0x0499] = 0x1A;
+			diacritical [0x049A] = diacritical [0x049B] = 0x17;
+			diacritical [0x049C] = diacritical [0x049D] = 9;
+			diacritical [0x049E] = diacritical [0x049F] = 4;
+			diacritical [0x04A0] = diacritical [0x04A1] = 0xA;
+			diacritical [0x04A2] = diacritical [0x04A3] = 7;
+			diacritical [0x04A4] = diacritical [0x04A5] = 8;
+			diacritical [0x04AA] = diacritical [0x04AB] = 0x1A; // ES CEDILLA?
+			diacritical [0x04AC] = diacritical [0x04AD] = 7; // RIGHT DESCENDER? but U+4B2
+			diacritical [0x04AE] = diacritical [0x04AF] = 0xB; // STRAIGHT U?
+			diacritical [0x04B2] = diacritical [0x04B3] = 0x17; // RIGHT DESCENDER? but U+4AC
+			diacritical [0x04B4] = diacritical [0x04B5] = 3;
+			diacritical [0x04B6] = 8;
+			diacritical [0x04B7] = 7;
+			diacritical [0x04B8] = diacritical [0x04B9] = 9;
+			diacritical [0x04BA] = diacritical [0x04BB] = 9;
+
 			// number, secondary weights
 			byte weight = 0x38;
 			int [] numarr = numberSecondaryWeightBounds;
@@ -1533,19 +1840,12 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 					if (Char.IsNumber ((char) cp))
 						diacritical [cp] = weight;
 
-			// Modify some decomposition equivalence
-			decompType [0xFE31] = 0;
-			decompIndex [0xFE31] = 0;
-			decompLength [0xFE31] = 0;
-			decompType [0xFE32] = 0;
-			decompIndex [0xFE32] = 0;
-			decompLength [0xFE32] = 0;
-
-			// Korean parens numbers
-			for (int i = 0x3200; i <= 0x321C; i++)
-				diacritical [i] = 0xA;
-			for (int i = 0x3260; i <= 0x327B; i++)
-				diacritical [i] = 0xC;
+			// Gurmukhi special letters' diacritical weight
+			for (int i = 0x0A50; i < 0x0A60; i++)
+				diacritical [i] = 4;
+			// Oriya special letters' diacritical weight
+			for (int i = 0x0B5C; i < 0x0B60; i++)
+				diacritical [i] = 6;
 
 			// Update name part of named characters
 			for (int i = 0; i < sortableCharNames.Count; i++) {
@@ -1587,14 +1887,25 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 
 			#region Specially ignored // 01
 			// This will raise "Defined" flag up.
+			// FIXME: Check If it is really fine. Actually for
+			// Japanese voice marks this code does remapping.
 			foreach (char c in specialIgnore)
 				map [(int) c] = new CharMapEntry (0, 0, 0);
 			#endregion
 
+			#region Extenders (FF FF)
+			fillIndex [0xFF] = 0xFF;
+			char [] specialBiggest = new char [] {
+				'\u3005', '\u3031', '\u3032', '\u309D',
+				'\u309E', '\u30FC', '\u30FD', '\u30FE',
+				'\uFE7C', '\uFE7D', '\uFF70'};
+			foreach (char c in specialBiggest)
+				AddCharMap (c, 0xFF, 0);
+			#endregion
 
 			#region Variable weights
 			// Controls : 06 03 - 06 3D
-			fillIndex [6] = 3;
+			fillIndex [0x6] = 3;
 			for (int i = 0; i < 65536; i++) {
 				if (IsIgnorable (i))
 					continue;
@@ -1607,10 +1918,15 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			}
 
 			// Apostrophe 06 80
-			fillIndex [6] = 0x80;
-			AddCharMapGroup ('\'', 6, 1, 0);
+			fillIndex [0x6] = 0x80;
+			AddCharMap ('\'', 6, 0);
+			AddCharMap ('\uFF07', 6, 1);
 			AddCharMap ('\uFE63', 6, 1);
 
+			// SPECIAL CASE: fill FE32 here in prior to be added
+			// at 2013. Windows does not always respect NFKD.
+			map [0xFE32] = new CharMapEntry (6, 0x90, 0);
+
 			// Hyphen/Dash : 06 81 - 06 90
 			for (int i = 0; i < char.MaxValue; i++) {
 				if (!IsIgnorable (i) &&
@@ -1627,12 +1943,16 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 					}
 				}
 			}
+			// They are regarded as primarily equivalent to '-'
+			map [0x208B] = new CharMapEntry (6, 0x82, 0);
+			map [0x207B] = new CharMapEntry (6, 0x82, 0);
+			map [0xFF0D] = new CharMapEntry (6, 0x82, 0);
 
 			// Arabic variable weight chars 06 A0 -
 			fillIndex [6] = 0xA0;
 			// vowels
 			for (int i = 0x64B; i <= 0x650; i++)
-				AddArabicCharMap ((char) i);
+				AddArabicCharMap ((char) i, 6, 1, 0);
 			// sukun
 			AddCharMapGroup ('\u0652', 6, 1, 0);
 			// shadda
@@ -1652,10 +1972,11 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			for (int i = 0x0329; i <= 0x0334; i++)
 				if (!IsIgnorable (i))
 					AddCharMap ((char) i, 0x1, 1);
+			fillIndex [0x1]++;
 			for (int i = 0x0339; i <= 0x0341; i++)
 				if (!IsIgnorable (i))
 					AddCharMap ((char) i, 0x1, 1);
-			fillIndex [0x1] = 0x72;
+			fillIndex [0x1] = 0x74;
 			for (int i = 0x0346; i <= 0x0348; i++)
 				if (!IsIgnorable (i))
 					AddCharMap ((char) i, 0x1, 1);
@@ -1668,6 +1989,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			for (int i = 0x02CE; i <= 0x02CF; i++)
 				if (!IsIgnorable (i))
 					AddCharMap ((char) i, 0x1, 1);
+			fillIndex [0x1]++;
 			for (int i = 0x02D1; i <= 0x02D3; i++)
 				if (!IsIgnorable (i))
 					AddCharMap ((char) i, 0x1, 1);
@@ -1676,30 +1998,87 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 				if (!IsIgnorable (i))
 					AddCharMap ((char) i, 0x1, 1);
 
+
 			// FIXME: needs more love here (it should eliminate
 			// all the hacky code above).
 			for (int i = 0x0300; i < 0x0370; i++)
 				if (!IsIgnorable (i) && diacritical [i] != 0
-					/* especiall here*/ && !map [i].Defined)
+					&& !map [i].Defined)
 					map [i] = new CharMapEntry (
 						0x1, 0x1, diacritical [i]);
 
-			fillIndex [0x1] = 0xAC;
-			for (int i = 0x07A6; i <= 0x07B0; i++)
-				if (!IsIgnorable (i))
-					AddCharMap ((char) i, 0x1, 1);
+			// Cyrillic and Armenian nonspacing mark
+			fillIndex [0x1] = 0x94;
+			for (int i = 0x400; i < 0x580; i++)
+				if (!IsIgnorable (i) &&
+					Char.GetUnicodeCategory ((char) i) ==
+					UnicodeCategory.NonSpacingMark)
+					AddCharMap ((char) i, 1, 1);
 
-			fillIndex [0x1] = 0x0C;
-			for (int i = 0x0EC8; i <= 0x0ECD; i++)
-				if (!IsIgnorable (i))
+			fillIndex [0x1] = 0x8D;
+			// syriac dotted nonspacing marks (1)
+			AddCharMap ('\u0740', 0x1, 1);
+			AddCharMap ('\u0741', 0x1, 1);
+			AddCharMap ('\u0742', 0x1, 1);
+			// syriac oblique nonspacing marks
+			AddCharMap ('\u0747', 0x1, 1);
+			AddCharMap ('\u0748', 0x1, 1);
+			// syriac dotted nonspacing marks (2)
+			fillIndex [0x1] = 0x94; // this reset is mandatory
+			AddCharMap ('\u0732', 0x1, 1);
+			AddCharMap ('\u0735', 0x1, 1);
+			AddCharMap ('\u0738', 0x1, 1);
+			AddCharMap ('\u0739', 0x1, 1);
+			AddCharMap ('\u073C', 0x1, 1);
+			// SPECIAL CASES: superscripts
+			AddCharMap ('\u073F', 0x1, 1);
+			AddCharMap ('\u0711', 0x1, 1);
+			// syriac "DOTS"
+			for (int i = 0x0743; i <= 0x0746; i++)
+				AddCharMap ((char) i, 0x1, 1);
+			for (int i = 0x0730; i <= 0x0780; i++)
+				if (!map [i].Defined &&
+					Char.GetUnicodeCategory ((char) i) ==
+					UnicodeCategory.NonSpacingMark)
 					AddCharMap ((char) i, 0x1, 1);
 
 			// LAMESPEC: It should not stop at '\u20E1'. There are
 			// a few more characters (that however results in 
 			// overflow of level 2 unless we start before 0xDD).
-			fillIndex [0x1] = 0xDC;
-			for (int i = 0x20d0; i <= 0x20e1; i++)
+			fillIndex [0x1] = 0xDD;
+			for (int i = 0x20D0; i <= 0x20DC; i++)
+				AddCharMap ((char) i, 0x1, 1);
+			fillIndex [0x1] = 0xEC;
+			for (int i = 0x20DD; i <= 0x20E1; i++)
+				AddCharMap ((char) i, 0x1, 1);
+			fillIndex [0x1] = 0x4;
+			AddCharMap ('\u0CD5', 0x1, 1);
+			AddCharMap ('\u0CD6', 0x1, 1);
+			AddCharMap ('\u093C', 0x1, 1);
+			for (int i = 0x302A; i <= 0x302D; i++)
+				AddCharMap ((char) i, 0x1, 1);
+			AddCharMap ('\u0C55', 0x1, 1);
+			AddCharMap ('\u0C56', 0x1, 1);
+
+			fillIndex [0x1] = 0x50; // I wonder how they are sorted
+			for (int i = 0x02D4; i <= 0x02D7; i++)
 				AddCharMap ((char) i, 0x1, 1);
+
+			// They are not part of Nonspacing marks, but have
+			// only diacritical weight.
+			for (int i = 0x3099; i <= 0x309C; i++)
+				map [i] = new CharMapEntry (1, 1, 1);
+			map [0xFF9E] = new CharMapEntry (1, 1, 1);
+			map [0xFF9F] = new CharMapEntry (1, 1, 2);
+			map [0x309D] = new CharMapEntry (0xFF, 0xFF, 1);
+			map [0x309E] = new CharMapEntry (0xFF, 0xFF, 1);
+			for (int i = 0x30FC; i <= 0x30FE; i++)
+				map [i] = new CharMapEntry (0xFF, 0xFF, 1);
+
+			fillIndex [0x1] = 0xA;
+			for (int i = 0x0951; i <= 0x0954; i++)
+				AddCharMap ((char) i, 0x1, 2);
+
 			#endregion
 
 
@@ -1722,6 +2101,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			// while they aren't.
 			AddCharMap ('\u2422', 0x7, 1, 0); // blank symbol
 			AddCharMap ('\u2423', 0x7, 1, 0); // open box
+
 			#endregion
 
 			// category 09 - continued symbols from 08
@@ -1731,7 +2111,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 				AddCharMap ((char) cp, 0x9, 1, 0);
 
 			// arrows
-			byte [] arrowLv2 = new byte [] {0, 3, 3, 3, 3, 3, 3, 3, 3};
+			byte [] arrowLv2 = new byte [] {0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3};
 			foreach (DictionaryEntry de in arrowValues) {
 				int idx = (int) de.Value;
 				int cp = (int) de.Key;
@@ -1743,6 +2123,8 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			}
 			// boxes
 			byte [] boxLv2 = new byte [128];
+			// 0-63 will be used for those offsets are positive,
+			// and 64-127 are for negative ones.
 			for (int i = 0; i < boxLv2.Length; i++)
 				boxLv2 [i] = 3;
 			foreach (DictionaryEntry de in boxValues) {
@@ -1752,7 +2134,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 					continue;
 				if (off < 0) {
 					fillIndex [0x9] = (byte) (0xE5 + off);
-					AddCharMapGroup ((char) cp, 0x9, 0, boxLv2 [-off]++);
+					AddCharMapGroup ((char) cp, 0x9, 0, boxLv2 [128 + off]++);
 				}
 				else {
 					fillIndex [0x9] = (byte) (0xE5 + off);
@@ -1773,8 +2155,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 				uc = Char.GetUnicodeCategory ((char) cp);
 				if (!IsIgnorable (cp) &&
 					uc == UnicodeCategory.CurrencySymbol &&
-					cp != '$' ||
-					cp == 0xAC)
+					cp != '$')
 					AddCharMapGroup ((char) cp, 0xA, 1, 0);
 			}
 			// byte other symbols
@@ -1783,14 +2164,33 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 					continue; // SPECIAL: skip FIXME: why?
 				uc = Char.GetUnicodeCategory ((char) cp);
 				if (!IsIgnorable (cp) &&
-					uc == UnicodeCategory.OtherSymbol)
+					uc == UnicodeCategory.OtherSymbol ||
+					cp == '\u00AC' || cp == '\u00B5' || cp == '\u00B7')
 					AddCharMapGroup ((char) cp, 0xA, 1, 0);
 			}
+			// U+30FB here
+			AddCharMapGroup ('\u30FB', 0xA, 1, 0);
+
+			for (int cp = 0x2020; cp <= 0x2031; cp++)
+				if (Char.IsPunctuation ((char) cp))
+					AddCharMap ((char) cp, 0xA, 1, 0);
+			// SPECIAL CASES: why?
+			AddCharMap ('\u203B', 0xA, 1, 0);
+			AddCharMap ('\u2040', 0xA, 1, 0);
+			AddCharMap ('\u2041', 0xA, 1, 0);
+			AddCharMap ('\u2042', 0xA, 1, 0);
 
-			fillIndex [0xA] = 0x1C; // FIXME: it won't be needed
 			for (int cp = 0x20A0; cp <= 0x20AB; cp++)
 				AddCharMap ((char) cp, 0xA, 1, 0);
-			fillIndex [0xA] = 0x2F; // FIXME: it won't be needed
+
+			// 3004 is skipped at first...
+			for (int cp = 0x3010; cp <= 0x3040; cp++)
+				if (Char.IsSymbol ((char) cp))
+					AddCharMap ((char) cp, 0xA, 1, 0);
+			// SPECIAL CASES: added here
+			AddCharMap ('\u3004', 0xA, 1, 0);
+			AddCharMap ('\u327F', 0xA, 1, 0);
+
 			for (int cp = 0x2600; cp <= 0x2613; cp++)
 				AddCharMap ((char) cp, 0xA, 1, 0);
 			// Dingbats
@@ -1801,13 +2201,17 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			for (int i = 0x2440; i < 0x2460; i++)
 				AddCharMap ((char) i, 0xA, 1, 0);
 
+			// SPECIAL CASES: why?
+			AddCharMap ('\u0E3F', 0xA, 1, 0);
+			AddCharMap ('\u2117', 0xA, 1, 0);
+			AddCharMap ('\u20AC', 0xA, 1, 0);
 			#endregion
 
 			#region Numbers // 0C 02 - 0C E1
 			fillIndex [0xC] = 2;
 
 			// 9F8 : Bengali "one less than the denominator"
-			AddCharMap ('\u09F8', 0xC, 1);
+			AddCharMap ('\u09F8', 0xC, 1, 0x3C);
 
 			ArrayList numbers = new ArrayList ();
 			for (int i = 0; i < 65536; i++)
@@ -1819,11 +2223,15 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			ArrayList numberValues = new ArrayList ();
 			foreach (int i in numbers)
 				numberValues.Add (new DictionaryEntry (i, decimalValue [(char) i]));
+			// SPECIAL CASE: Cyrillic Thousand sign
+			numberValues.Add (new DictionaryEntry (0x0482, 1000m));
 			numberValues.Sort (DecimalDictionaryValueComparer.Instance);
 
 //foreach (DictionaryEntry de in numberValues)
 //Console.Error.WriteLine ("****** number {0:X04} : {1} {2}", de.Key, de.Value, decompType [(int) de.Key]);
 
+			// FIXME: fillIndex adjustment lines are too
+			// complicated. It must be simpler.
 			decimal prevValue = -1;
 			foreach (DictionaryEntry de in numberValues) {
 				int cp = (int) de.Key;
@@ -1841,18 +2249,25 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 						fillIndex [0xC]++;
 
 					int xcp;
-					if (currValue <= 10) {
-						xcp = (int) prevValue + 0x2170 - 1;
-						AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
+					if (currValue <= 13) {
+						if (currValue == 4)
+							fillIndex [0xC]++;
+						// SPECIAL CASE
+						if (currValue == 11)
+							AddCharMap ('\u0BF0', 0xC, 1);
 						xcp = (int) prevValue + 0x2160 - 1;
 						AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
-						fillIndex [0xC] += 2;
-						xcp = (int) prevValue + 0x3021 - 1;
+						xcp = (int) prevValue + 0x2170 - 1;
 						AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
 						fillIndex [0xC]++;
 					}
-					else if (currValue == 11)
+					if (currValue < 12)
+						fillIndex [0xC]++;
+					if (currValue <= 10) {
+						xcp = (int) prevValue + 0x3021 - 1;
+						AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
 						fillIndex [0xC]++;
+					}
 				}
 				if (prevValue < currValue)
 					prevValue = currValue;
@@ -1860,20 +2275,19 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 					continue;
 				// HangZhou and Roman are add later 
 				// (code is above)
-				else if (0x3021 <= cp && cp < 0x302A
-					|| 0x2160 <= cp && cp < 0x216A
-					|| 0x2170 <= cp && cp < 0x217A)
+				if (0x3021 <= cp && cp < 0x302A
+					|| 0x2160 <= cp && cp < 0x216C
+					|| 0x2170 <= cp && cp < 0x217C)
 					continue;
 
-				if (cp ==  0x215B) // FIXME: why?
+				if (cp == 0x215B) // FIXME: why?
 					fillIndex [0xC] += 2;
 				else if (cp == 0x3021) // FIXME: why?
 					fillIndex [0xC]++;
-				AddCharMapGroup ((char) cp, 0xC, 0, diacritical [cp]);
 				if (addnew || cp <= '9') {
 					int mod = (int) currValue - 1;
 					int xcp;
-					if (1 <= currValue && currValue <= 10) {
+					if (1 <= currValue && currValue <= 11) {
 						xcp = mod + 0x2776;
 						AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
 						xcp = mod + 0x2780;
@@ -1890,9 +2304,27 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 						AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
 					}
 				}
+				if (addnew && currValue >= 10 && currValue < 13 || cp == 0x09F9)
+					fillIndex [0xC]++;
+				AddCharMapGroup ((char) cp, 0xC, 0, diacritical [cp], true);
 
-				if (cp != 0x09E7 && cp != 0x09EA)
+				switch (cp) {
+				// Maybe Bengali digit numbers do not increase
+				// indexes, but 0x09E6 does.
+				case 0x09E7: case 0x09E8: case 0x09E9:
+				case 0x09EA:
+				// SPECIAL CASES
+				case 0x0BF0: case 0x2180: case 0x2181:
+					break;
+				// SPECIAL CASE
+				case 0x0BF1:
 					fillIndex [0xC]++;
+					break;
+				default:
+					if (currValue < 11 || currValue == 1000)
+						fillIndex [0xC]++;
+					break;
+				}
 
 				// Add special cases that are not regarded as 
 				// numbers in UnicodeCategory speak.
@@ -1901,7 +2333,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 					AddCharMapGroup ('\u01BD', 0xC, 0, 0);
 					AddCharMapGroup ('\u01BC', 0xC, 1, 0);
 				}
-				else if (cp == '6') // FIXME: why?
+				else if (cp == '2' || cp == '6') // FIXME: why?
 					fillIndex [0xC]++;
 			}
 
@@ -1916,7 +2348,6 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			for (int i = 0; i < alphabets.Length; i++)
 				AddAlphaMap (alphabets [i], 0xE, alphaWeights [i]);
 
-
 			// non-ASCII Latin alphabets
 			// FIXME: there is no such characters that are placed
 			// *after* "alphabets" array items. This is nothing
@@ -1968,77 +2399,176 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 				AddCharMapGroup ((char) i, 0xE, 1, 0);
 			}
 
-			// Greek and Coptic
-			fillIndex [0xF] = 02;
-			for (int i = 0x0380; i < 0x0390; i++)
+			// IPA extensions
+			// FIXME: this results in not equivalent values to
+			// Windows, but is safer for comparison.
+			char [] ipaArray = new char [0x300 - 0x250 + 0x20];
+			for (int i = 0x40; i < 0x60; i++)
 				if (Char.IsLetter ((char) i))
-					AddLetterMap ((char) i, 0xF, 1);
-			fillIndex [0xF] = 02;
-			for (int i = 0x0391; i < 0x03CF; i++)
+					ipaArray [i - 0x40] = (char) (i);
+			for (int i = 0x250; i < 0x300; i++)
 				if (Char.IsLetter ((char) i))
-					AddLetterMap ((char) i, 0xF, 1);
+					ipaArray [i - 0x250 + 0x20] = (char) i;
+			Array.Sort (ipaArray, UCAComparer.Instance);
+			int targetASCII = 0;
+			byte latinDiacritical = 0x7B;
+			foreach (char c in ipaArray) {
+				if (c <= 'Z') {
+					targetASCII = c;
+					latinDiacritical = 0x7B;
+				}
+				else
+					map [(int) c] = new CharMapEntry (
+						0xE,
+						map [targetASCII].Level1,
+						latinDiacritical++);
+			}
+
+			// Greek and Coptic
+
+			// FIXME: this is (mysterious and) incomplete.
+			for (int i = 0x0380; i < 0x0400; i++)
+				if (diacritical [i] == 0 &&
+					decompLength [i] == 1 &&
+					decompType [i] == DecompositionCompat)
+					diacritical [i] = 3;
+
+			fillIndex [0xF] = 2;
+			for (int i = 0x0391; i < 0x03AA; i++)
+				if (i != 0x03A2)
+					AddCharMap ((char) i, 0xF, 1,
+						diacritical [i]);
+			fillIndex [0xF] = 2;
+			for (int i = 0x03B1; i < 0x03CA; i++)
+				if (i != 0x03C2)
+					AddCharMap ((char) i, 0xF, 1,
+						diacritical [i]);
+			// Final Sigma
+			map [0x03C2] = new CharMapEntry (0xF,
+				map [0x03C3].Level1, map [0x03C3].Level2);
+
 			fillIndex [0xF] = 0x40;
-			for (int i = 0x03D0; i < 0x0400; i++)
-				if (Char.IsLetter ((char) i))
-					AddLetterMap ((char) i, 0xF, 1);
+			for (int i = 0x03DA; i < 0x03F0; i++)
+				AddCharMap ((char) i, 0xF,
+					(byte) (i % 2 == 0 ? 0 : 2),
+					diacritical [i]);
+
+			// NFKD
+			for (int i = 0x0386; i <= 0x0400; i++)
+				FillLetterNFKD (i, true, true);
+
+			// Cyrillic.
+			// Cyrillic letters are sorted like Latin letters i.e. 
+			// containing culture-specific letters between the
+			// standard Cyrillic sequence.
+			//
+			// We can't use UCA here; it has different sorting.
+			char [] orderedCyrillic = new char [] {
+				'\u0430', '\u0431', '\u0432', '\u0433', '\u0434',
+				'\u0452', // DJE for Serbocroatian
+				'\u0435',
+				'\u0454', // IE for Ukrainian
+				'\u0436', '\u0437',
+				'\u0455', // DZE
+				'\u0438',
+				'\u0456', // Byelorussian-Ukrainian I
+				'\u0457', // YI
+				'\u0439',
+				'\u0458', // JE
+				'\u043A', '\u043B',
+				'\u0459', // LJE
+				'\u043C', '\u043D',
+				'\u045A', // NJE
+				'\u043E',
+				// 4E9 goes here.
+				'\u043F', '\u0440', '\u0441', '\u0442',
+				'\u045B', // TSHE for Serbocroatian
+				'\u0443',
+				'\u045E', // Short U for Byelorussian
+				'\u04B1', // Straight U w/ stroke (diacritical!)
+				'\u0444', '\u0445', '\u0446', '\u0447',
+				'\u045F', // DZHE
+				'\u0448', '\u0449', '\u044A', '\u044B', '\u044C',
+				'\u044D', '\u044E', '\u044F'};
+
+			// For some characters here is a map to basic cyrillic
+			// letters. See UnicodeData.txt character names for
+			// the sources. Here I simply declare an equiv. array.
+			// The content characters are map from U+490(,491),
+			// skipping small letters.
+			char [] cymap_src = new char [] {
+				'\u0433', '\u0433', '\u0433', '\u0436',
+				'\u0437', '\u043A', '\u043A', '\u043A',
+				'\u043A', '\u043D', '\u043D', '\u043F',
+				'\u0445', '\u0441', '\u0442', '\u0443',
+				'\u0443', '\u0445', '\u0446', '\u0447',
+				'\u0447', '\u0432', '\u0435', '\u0435',
+				'\u0406', '\u0436', '\u043A', '\u043D',
+				'\u0447', '\u0435'};
+
+			fillIndex [0x10] = 0x8D;
+			for (int i = 0x0460; i < 0x0481; i++) {
+				if (Char.IsLetter ((char) i)) {
+					if (i == 0x0476)
+						// U+476/477 have the same
+						// primary weight as U+474/475.
+						fillIndex [0x10] -= 3;
+					AddLetterMap ((char) i, 0x10, 3);
+				}
+			}
 
-			// Cyrillic - character name order
 			fillIndex [0x10] = 0x6;
-//*
-for (int i = 0; i < orderedCyrillic.Length; i++)
-Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
-
-			// table which is moslty from UCA DUCET.
 			for (int i = 0; i < orderedCyrillic.Length; i++) {
 				char c = Char.ToUpper (orderedCyrillic [i], CultureInfo.InvariantCulture);
 				if (!IsIgnorable ((int) c) &&
-					c <= '\u045C' &&
-					Char.IsLetter (c)) {
+					Char.IsLetter (c) &&
+					!map [c].Defined) {
 					AddLetterMap (c, 0x10, 0);
 					fillIndex [0x10] += 3;
 				}
 			}
-			/*
-			for (int i = 0x0460; i < 0x0481; i++) {
-				if (Char.IsLetter ((char) i)) {
-					AddLetterMap ((char) i, 0x10, 0);
-					fillIndex [0x10] += 3;
-				}
-			}
-			*/
-/*
-			for (int i = 0x0400; i <= 0x0486; i++) {
-				if (!Char.IsLetter ((char) i)) {
-//					AddCharMap ((char) i, 0x1, 1);
-					continue;
-				}
-				if (!cyrillicLetterPrimaryValues.ContainsKey (i)) {
-					Console.Error.WriteLine ("no value for {0:x04}", i);
-					continue;
-				}
-				fillIndex [0x10] = 
-					(byte) cyrillicLetterPrimaryValues [i];
-				AddLetterMap ((char) i, 0x10, 0);
+
+			// NFKD
+			for (int i = 0x0401; i <= 0x045F; i++)
+				FillLetterNFKD (i, false, false);
+
+			for (int i = 0; i < cymap_src.Length; i++) {
+				char c = cymap_src [i];
+				fillIndex [0x10] = map [c].Level1;
+				int c2 = 0x0490 + i * 2;
+				AddLetterMapCore ((char) c2, 0x10, 0, diacritical [c2], false);
 			}
-*/
 
 			// Armenian
 			fillIndex [0x11] = 0x3;
-			for (int i = 0x0531; i < 0x0586; i++)
+			fillIndex [0x1] = 0x98;
+			for (int i = 0x0531; i < 0x0586; i++) {
+				if (i == 0x0559 || i == 0x55A)
+					AddCharMap ((char) i, 1, 1);
 				if (Char.IsLetter ((char) i))
 					AddLetterMap ((char) i, 0x11, 1);
+			}
 
 			// Hebrew
 			// -Letters
-			fillIndex [0x12] = 0x3;
+			fillIndex [0x12] = 0x2;
 			for (int i = 0x05D0; i < 0x05FF; i++)
-				if (Char.IsLetter ((char) i))
-					AddLetterMap ((char) i, 0x12, 1);
+				if (Char.IsLetter ((char) i)) {
+					if (isUppercase [i]) {
+						fillIndex [0x12]--;
+						AddLetterMap ((char) i, 0x12, 2);
+					}
+					else
+						AddLetterMap ((char) i, 0x12, 1);
+				}
 			// -Accents
 			fillIndex [0x1] = 0x3;
-			for (int i = 0x0591; i <= 0x05C2; i++)
+			for (int i = 0x0591; i <= 0x05C2; i++) {
+				if (i == 0x05A3 || i == 0x05BB)
+					fillIndex [0x1]++;
 				if (i != 0x05BE)
 					AddCharMap ((char) i, 0x1, 1);
+			}
 
 			// Arabic
 			fillIndex [0x1] = 0x8E;
@@ -2056,14 +2586,33 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 //					(byte) arabicLetterPrimaryValues [i], 1);
 				fillIndex [0x13] = 
 					(byte) arabicLetterPrimaryValues [i];
-				AddLetterMap ((char) i, 0x13, 0);
+				byte formDiacritical = 8; // default
+				// SPECIAL CASES:
+				switch (i) {
+				case 0x0622: formDiacritical = 9; break;
+				case 0x0623: formDiacritical = 0xA; break;
+				case 0x0624: formDiacritical = 5; break;
+				case 0x0625: formDiacritical = 0xB; break;
+				case 0x0626: formDiacritical = 7; break;
+				case 0x0649: formDiacritical = 5; break;
+				case 0x064A: formDiacritical = 7; break;
+				}
+//				AddLetterMapCore ((char) i, 0x13, 1, formDiacritical, false);
+				AddArabicCharMap ((char) i, 0x13, 1, formDiacritical);
 			}
+			for (int i = 0x0670; i < 0x0673; i++)
+				map [i] = new CharMapEntry (0x13, 0xB, (byte) (0xC + i - 0x670));
 			fillIndex [0x13] = 0x84;
 			for (int i = 0x0674; i < 0x06D6; i++)
 				if (Char.IsLetter ((char) i))
-					AddLetterMap ((char) i, 0x13, 1);
+					AddLetterMapCore ((char) i, 0x13, 1, 0, false);
 
 			// Devanagari
+
+			// FIXME: this could be fixed in more decent way
+			for (int i = 0x0958; i <= 0x095F; i++)
+				diacritical [i] = 8;
+
 			// FIXME: it does seem straight codepoint mapping.
 			fillIndex [0x14] = 04;
 			for (int i = 0x0901; i < 0x0905; i++)
@@ -2131,10 +2680,16 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 				if (c == '\u0A3C' || c == '\u0A4D' ||
 					'\u0A66' <= c && c <= '\u0A71')
 					continue;
-				// SPECIAL CASE: U+A38 = U+A36 at primary level (why?)
+				// SPECIAL CASES
 				byte shift = 4;
-				if (c == '\u0A36' || c == '\u0A16' || c == '\u0A17' || c == '\u0A5B' || c == '\u0A5E')
+				switch (c) {
+				case '\u0A33': case '\u0A36': case '\u0A16':
+				case '\u0A17': case '\u0A5B': case '\u0A5E':
 					shift = 0;
+					break;
+				}
+				if (c == '\u0A3E') // Skip
+					fillIndex [0x16] = 0xC0;
 				AddLetterMap (c, 0x16, shift);
 			}
 
@@ -2195,7 +2750,7 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 					AddLetterMap ((char) i, 0x1, 1);
 					continue;
 				}
-				AddLetterMap ((char) i, 0x18, 1);
+				AddLetterMapCore ((char) i, 0x18, 1, 0, true);
 			}
 
 			// Tamil
@@ -2254,17 +2809,22 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 			
 			// Malayalam
 			fillIndex [0x1C] = 2;
-			for (int i = 0x0D02; i < 0x0D61; i++)
+			fillIndex [0x1] = 3;
+			for (int i = 0x0D02; i < 0x0D61; i++) {
 				// FIXME: I avoided MSCompatUnicodeTable usage
 				// here (it results in recursion). So check if
 				// using NonSpacingMark makes sense or not.
 				if (Char.GetUnicodeCategory ((char) i) != UnicodeCategory.NonSpacingMark)
 //				if (!MSCompatUnicodeTable.IsIgnorable ((char) i))
 					AddCharMap ((char) i, 0x1C, 1);
+				else if (!IsIgnorable ((char) i))
+					AddCharMap ((char) i, 1, 1);
+			}
 
 			// Thai ... note that it breaks 0x1E wall after E2B!
 			// Also, all Thai characters have level 2 value 3.
 			fillIndex [0x1E] = 2;
+			fillIndex [0x1] = 3;
 			for (int i = 0xE40; i <= 0xE44; i++)
 				AddCharMap ((char) i, 0x1E, 1, 3);
 			for (int i = 0xE01; i < 0xE2B; i++)
@@ -2279,13 +2839,25 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 			char [] specialThai = new char [] {'\u0E45', '\u0E46',
 				'\u0E4E', '\u0E4F', '\u0E5A', '\u0E5B'};
 			foreach (char c in specialThai)
-				AddCharMap (c, 0x1F, 1);
+				AddCharMap (c, 0x1F, 1, 3);
+
+			for (int i = 0xE00; i < 0xE80; i++)
+				if (Char.GetUnicodeCategory ((char) i) ==
+					UnicodeCategory.NonSpacingMark)
+					AddCharMap ((char) i, 1, 1);
 
 			// Lao
 			fillIndex [0x1F] = 2;
-			for (int i = 0xE80; i < 0xEDF; i++)
-				if (Char.IsLetter ((char) i))
+			fillIndex [0x1] = 3;
+			for (int i = 0xE80; i < 0xEDF; i++) {
+				if (IsIgnorable ((char) i))
+					continue;
+				else if (Char.IsLetter ((char) i))
 					AddCharMap ((char) i, 0x1F, 1);
+				else if (Char.GetUnicodeCategory ((char) i) ==
+					UnicodeCategory.NonSpacingMark)
+					AddCharMap ((char) i, 1, 1);
+			}
 
 			// Georgian. orderedGeorgian is from UCA DUCET.
 			fillIndex [0x21] = 5;
@@ -2362,6 +2934,21 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 			AddLetterMap ((char) 0x3093, 0x22, 0);
 			AddLetterMap ((char) (0x3093 + 0x60), 0x22, 0);
 
+			map [0x3094] = new CharMapEntry (map [0x30A6].Category,
+				map [0x30A6].Level1, 3);// voiced hiragana U
+			map [0x30F4] = new CharMapEntry (map [0x30A6].Category,
+				map [0x30A6].Level1, 3);// voiced katakana U
+
+			map [0x30F5] = new CharMapEntry (map [0x30AB].Category,
+				map [0x30AB].Level1, 0);// small katakana Ka
+			map [0x30F6] = new CharMapEntry (map [0x30B1].Category,
+				map [0x30B1].Level1, 0);// small katakana Ke
+			// voiced Wa lines
+			for (int i = 0x30F7; i < 0x30FB; i++)
+				map [i] = new CharMapEntry (map [i - 8].Category,
+					map [i - 8].Level1,
+					3);
+
 			// JIS Japanese square chars.
 			fillIndex [0x22] = 0x97;
 			jisJapanese.Sort (JISComparer.Instance);
@@ -2402,10 +2989,11 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 			// Thaana
 			// FIXME: it turned out that it does not look like UCA
 			fillIndex [0x24] = 0x6E;
+			fillIndex [0x1] = 0xAC;
 			for (int i = 0; i < orderedThaana.Length; i++) {
 				char c = orderedThaana [i];
 				if (IsIgnorableNonSpacing ((int) c))
-					continue;
+					AddCharMap (c, 1, 1);
 				AddCharMap (c, 0x24, 2);
 				if (c == '\u0782') // SPECIAL CASE: why?
 					fillIndex [0x24] += 2;
@@ -2462,9 +3050,9 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 				+ "[\u11E8 \u11E9],, \u11EA \u317D,, \u110A=\u11BB,,, >"
 			+ "<{\u1134 \u1140}, \u317E,,,,,,, \u11EB,"
 				+ "\u110B=\u11BC, [\u1161 \u11A2], \u1160 >"
-			+ "<{\u1141 \u114C}, \u11EE, \u11EC, \u11ED,,,,, "
+			+ "<{\u1141 \u114C}, \u3180=\u11EE, \u11EC, \u11ED,,,,, "
 				+ "\u11F1,, \u11F2,,,"
-				+ "\u11EF,,, \u11F0, \u110C=\u11BD,, >"
+				+ "\u11EF,,, \u3181=\u11F0, \u110C=\u11BD,, >"
 			+ "<\u114D, \u110D,,  >"
 			+ "<{\u114E \u1151},, \u110E=\u11BE,,  >"
 			+ "<{\u1152 \u1155},,, \u110F=\u11BF >"
@@ -2597,30 +3185,31 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 			// PrivateUse ... computed.
 			// remaining Surrogate ... computed.
 
-			#region Special "biggest" area (FF FF)
-			fillIndex [0xFF] = 0xFF;
-			char [] specialBiggest = new char [] {
-				'\u3005', '\u3031', '\u3032', '\u309D',
-				'\u309E', '\u30FC', '\u30FD', '\u30FE',
-				'\uFE7C', '\uFE7D', '\uFF70'};
-			foreach (char c in specialBiggest)
-				AddCharMap (c, 0xFF, 0);
-			#endregion
-
 			#region 07 - ASCII non-alphanumeric + 3001, 3002 // 07
 			// non-alphanumeric ASCII except for: + - < = > '
 			for (int i = 0x21; i < 0x7F; i++) {
+				// SPECIAL CASE: 02C6 looks regarded as 
+				// equivalent to '^', which does not conform 
+				// to Unicode standard character database.
+				if (i == 0x005B)
+					AddCharMap ('\u2045', 0x7, 0, 0x1C);
+				if (i == 0x005D)
+					AddCharMap ('\u2046', 0x7, 0, 0x1C);
+				if (i == 0x005E)
+					AddCharMap ('\u02C6', 0x7, 0, 3);
+				if (i == 0x0060)
+					AddCharMap ('\u02CB', 0x7, 0, 3);
+
 				if (Char.IsLetterOrDigit ((char) i)
 					|| "+-<=>'".IndexOf ((char) i) >= 0)
 					continue; // they are not added here.
-					AddCharMapGroup2 ((char) i, 0x7, 1, 0);
+
+				AddCharMapGroup2 ((char) i, 0x7, 1, 0);
 				// Insert 3001 after ',' and 3002 after '.'
 				if (i == 0x2C)
 					AddCharMapGroup2 ('\u3001', 0x7, 1, 0);
-				else if (i == 0x2E) {
-					fillIndex [0x7]--;
+				else if (i == 0x2E)
 					AddCharMapGroup2 ('\u3002', 0x7, 1, 0);
-				}
 				else if (i == 0x3A)
 					AddCharMap ('\uFE30', 0x7, 1, 0);
 			}
@@ -2633,16 +3222,35 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 
 				// FIXME: actually those reset should not be 
 				// done but here I put for easy goal.
+				if (i == 0x05C3)
+					fillIndex [0x7]++;
 				if (i == 0x0700)
 					fillIndex [0x7] = 0xE2;
 				if (i == 0x2016)
 					fillIndex [0x7] = 0x77;
+				if (i == 0x3008)
+					fillIndex [0x7] = 0x93;
+
+				if (0x02C8 <= i && i <= 0x02CD)
+					continue; // nonspacing marks
+
+				// SPECIAL CASE: maybe they could be allocated
+				// dummy NFKD mapping and no special processing
+				// would be required here.
+				if (i == 0x00AF)
+					AddCharMap ('\u02C9', 0x7, 0, 3);
+				if (i == 0x00B4)
+					AddCharMap ('\u02CA', 0x7, 0, 3);
+				if (i == 0x02C7)
+					AddCharMap ('\u02D8', 0x7, 0, 3);
 
 				// SPECIAL CASES:
 				switch (i) {
 				case 0xAB: // 08
 				case 0xB7: // 0A
 				case 0xBB: // 08
+				case 0x02B9: // 01
+				case 0x02BA: // 01
 				case 0x2329: // 09
 				case 0x232A: // 09
 					continue;
@@ -2652,29 +3260,106 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 				case UnicodeCategory.OtherPunctuation:
 				case UnicodeCategory.ClosePunctuation:
 				case UnicodeCategory.OpenPunctuation:
+				case UnicodeCategory.ConnectorPunctuation:
 				case UnicodeCategory.InitialQuotePunctuation:
 				case UnicodeCategory.FinalQuotePunctuation:
 				case UnicodeCategory.ModifierSymbol:
 					// SPECIAL CASES: // 0xA
-					if (0x2020 <= i && i <= 0x2042)
+					if (0x2020 <= i && i <= 0x2031)
 						continue;
-					AddCharMapGroup ((char) i, 0x7, 1, 0);
+					if (i == 0x3003) // added later
+						continue;
+					AddCharMapGroup2 ((char) i, 0x7, 1, 0);
 					break;
 				default:
-					if (i == 0xA6) // SPECIAL CASE. FIXME: why?
+					if (i == 0xA6 || i == 0x1C3 || i == 0x037A) // SPECIAL CASE. FIXME: why?
 						goto case UnicodeCategory.OtherPunctuation;
 					break;
 				}
 			}
+
 			// Control pictures
 			// FIXME: it should not need to reset level 1, but
 			// it's for easy goal.
 			fillIndex [0x7] = 0xB6;
-			for (int i = 0x2400; i <= 0x2421; i++)
+			for (int i = 0x2400; i <= 0x2424; i++)
 				AddCharMap ((char) i, 0x7, 1, 0);
+
+			// FIXME: what are they?
+			AddCharMap ('\u3003', 0x7, 1);
+			AddCharMap ('\u3006', 0x7, 1);
+			AddCharMap ('\u02D0', 0x7, 1);
+			AddCharMap ('\u10FB', 0x7, 1);
+			AddCharMap ('\u0950', 0x7, 1);
+			AddCharMap ('\u093D', 0x7, 1);
+			AddCharMap ('\u0964', 0x7, 1);
+			AddCharMap ('\u0965', 0x7, 1);
+			AddCharMap ('\u0970', 0x7, 1);
+
+			#endregion
+
+			#region category 08 - symbols
+			fillIndex [0x8] = 2;
+			// Here Windows mapping is not straightforward. It is
+			// not based on computation but seems manual sorting.
+			AddCharMapGroup ('+', 0x8, 1, 0); // plus
+			AddCharMapGroup ('\u2212', 0x8, 1); // minus
+			AddCharMapGroup ('\u229D', 0x8, 1); // minus
+			AddCharMapGroup ('\u2297', 0x8, 1); // mul
+			AddCharMapGroup ('\u2044', 0x8, 1); // div
+			AddCharMapGroup ('\u2215', 0x8, 0); // div
+			AddCharMapGroup ('\u2298', 0x8, 1); // div slash
+			AddCharMapGroup ('\u2217', 0x8, 0); // mul
+			AddCharMapGroup ('\u229B', 0x8, 1); // asterisk oper
+			AddCharMapGroup ('\u2218', 0x8, 0); // ring
+			AddCharMapGroup ('\u229A', 0x8, 1); // ring
+			AddCharMapGroup ('\u2219', 0x8, 0); // bullet
+			AddCharMapGroup ('\u2299', 0x8, 1); // dot oper
+			AddCharMapGroup ('\u2213', 0x8, 1); // minus-or-plus
+			AddCharMapGroup ('\u003C', 0x8, 1); // <
+			AddCharMapGroup ('\u227A', 0x8, 1); // precedes relation
+			AddCharMapGroup ('\u22B0', 0x8, 1); // precedes under relation
+
+			for (int cp = 0; cp < 0x2300; cp++) {
+				if (cp == 0xAC) // SPECIAL CASE: skip
+					continue;
+				if (cp == 0x200) {
+					cp = 0x2200; // skip to 2200
+					fillIndex [0x8] = 0x21;
+				}
+				if (cp == 0x2295)
+					fillIndex [0x8] = 0x3;
+				if (cp == 0x22A2)
+					fillIndex [0x8] = 0xAB;
+				if (cp == 0x22B2)
+					fillIndex [0x8] = 0xB9;
+				if (!map [cp].Defined &&
+//					Char.GetUnicodeCategory ((char) cp) ==
+//					UnicodeCategory.MathSymbol)
+					Char.IsSymbol ((char) cp))
+					AddCharMapGroup ((char) cp, 0x8, 1);
+				// SPECIAL CASES: no idea why Windows sorts as such
+				switch (cp) {
+				case 0x3E:
+					AddCharMap ('\u227B', 0x8, 1, 0);
+					AddCharMap ('\u22B1', 0x8, 1, 0);
+					break;
+				case 0xB1:
+					AddCharMapGroup ('\u00AB', 0x8, 1);
+					AddCharMapGroup ('\u226A', 0x8, 1);
+					AddCharMapGroup ('\u00BB', 0x8, 1);
+					AddCharMapGroup ('\u226B', 0x8, 1);
+					break;
+				case 0xF7:
+					AddCharMap ('\u01C0', 0x8, 1, 0);
+					AddCharMap ('\u01C1', 0x8, 1, 0);
+					AddCharMap ('\u01C2', 0x8, 1, 0);
+					break;
+				}
+			}
 			#endregion
 
-			// FIXME: for 07 xx we need more love.
+			#region Hack!
 
 			// Characters w/ diacritical marks (NFKD)
 			for (int i = 0; i <= char.MaxValue; i++) {
@@ -2685,7 +3370,7 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 
 				int start = decompIndex [i];
 				int primaryChar = decompValues [start];
-				int secondary = 0;
+				int secondary = diacritical [i];
 				bool skip = false;
 				int length = decompLength [i];
 				// special processing for parenthesized ones.
@@ -2714,59 +3399,8 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 				
 			}
 
-			// category 08 - symbols
-			fillIndex [0x8] = 2;
-			// Here Windows mapping is not straightforward. It is
-			// not based on computation but seems manual sorting.
-			AddCharMapGroup ('+', 0x8, 1, 0); // plus
-			AddCharMapGroup ('\u2212', 0x8, 1, 0); // minus
-			AddCharMapGroup ('\u229D', 0x8, 1, 0); // minus
-			AddCharMapGroup ('\u2297', 0x8, 1, 0); // mul
-			AddCharMapGroup ('\u2044', 0x8, 1, 0); // div
-			AddCharMapGroup ('\u2215', 0x8, 1, 0); // div
-			AddCharMapGroup ('\u2217', 0x8, 1, 0); // mul
-			AddCharMapGroup ('\u2218', 0x8, 1, 0); // ring
-			AddCharMapGroup ('\u2219', 0x8, 1, 0); // bullet
-			AddCharMapGroup ('\u2213', 0x8, 1, 0); // minus-or-plus
-			AddCharMapGroup ('\u003C', 0x8, 1, 0); // <
-			AddCharMapGroup ('\u227A', 0x8, 1, 0); // precedes relation
-			AddCharMapGroup ('\u22B0', 0x8, 1, 0); // precedes under relation
+			// Diacritical weight adjustment
 
-			for (int cp = 0; cp < 0x2300; cp++) {
-				if (cp == 0xAC) // SPECIAL CASE: skip
-					continue;
-				if (cp == 0x200) {
-					cp = 0x2200; // skip to 2200
-					fillIndex [0x8] = 0x21;
-				}
-				if (cp == 0x2295)
-					fillIndex [0x8] = 0x3;
-				if (!map [cp].Defined &&
-//					Char.GetUnicodeCategory ((char) cp) ==
-//					UnicodeCategory.MathSymbol)
-					Char.IsSymbol ((char) cp))
-					AddCharMapGroup ((char) cp, 0x8, 1, 0);
-				// SPECIAL CASES: no idea why Windows sorts as such
-				switch (cp) {
-				case 0x3E:
-					AddCharMap ('\u227B', 0x8, 1, 0);
-					AddCharMap ('\u22B1', 0x8, 1, 0);
-					break;
-				case 0xB1:
-					AddCharMapGroup ('\u00AB', 0x8, 1, 0);
-					AddCharMapGroup ('\u226A', 0x8, 1, 0);
-					AddCharMapGroup ('\u00BB', 0x8, 1, 0);
-					AddCharMapGroup ('\u226B', 0x8, 1, 0);
-					break;
-				case 0xF7:
-					AddCharMap ('\u01C0', 0x8, 1, 0);
-					AddCharMap ('\u01C1', 0x8, 1, 0);
-					AddCharMap ('\u01C2', 0x8, 1, 0);
-					break;
-				}
-			}
-
-			#region Level2 adjustment
 			// Arabic Hamzah
 			diacritical [0x624] = 0x5;
 			diacritical [0x626] = 0x7;
@@ -2785,6 +3419,10 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 					mod = diacritical [i];
 					break;
 				case 0x13: // Arabic
+					if (i == 0x0621)
+						break; // 0
+					if (diacritical [i] == 0 && decompLength [i] != 0)
+						diacritical [i] = map [decompValues [decompIndex [i]]].Level2;
 					if (diacritical [i] == 0 && i >= 0xFE8D)
 						mod = 0x8; // default for arabic
 					break;
@@ -2795,25 +3433,79 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 					map [i] = new CharMapEntry (
 						cat, map [i].Level1, mod);
 			}
-			#endregion
 
-			// FIXME: this is hack but those NonSpacingMark 
+			// FIXME: this is halfly hack but those NonSpacingMark 
 			// characters and still undefined are likely to
 			// be nonspacing.
-			for (int i = 0; i < char.MaxValue; i++)
-				if (!map [i].Defined &&
-					!IsIgnorable (i) &&
-					Char.GetUnicodeCategory ((char) i) ==
+			for (int i = 0; i < char.MaxValue; i++) {
+				if (map [i].Defined ||
+					IsIgnorable (i))
+					continue;
+				switch (i) {
+				// SPECIAL CASES.
+				case 0x02B9:
+				case 0x02BA:
+					break;
+				default:
+					if (Char.GetUnicodeCategory ((char) i) !=
 					UnicodeCategory.NonSpacingMark)
+						continue;
+					break;
+				}
+				if (diacritical [i] != 0)
+					map [i] = new CharMapEntry (1, 1, diacritical [i]);
+				else
 					AddCharMap ((char) i, 1, 1);
+			}
 
-			// FIXME: this is hack but those Symbol characters
-			// are likely to fall into 0xA category.
-			for (int i = 0; i < char.MaxValue; i++)
-				if (!map [i].Defined &&
-					!IsIgnorable (i) &&
-					Char.IsSymbol ((char) i))
-					AddCharMap ((char) i, 0xA, 1);
+			#endregion
+		}
+
+		TextInfo ti = CultureInfo.InvariantCulture.TextInfo;
+
+		private void FillLetterNFKD (int i, bool checkUpper, bool greekRemap)
+		{
+			if (map [i].Defined)
+				return;
+			int up = (int) ti.ToUpper ((char) i);
+			if (checkUpper && map [up].Category == 0xF) {
+				if (i == up)
+					return;
+				FillLetterNFKD (up, checkUpper, greekRemap);
+				map [i] = new CharMapEntry (0xF,
+					map [up].Level1,
+					map [up].Level2);
+			} else {
+				int idx = decompIndex [i];
+				if (idx == 0)
+					return;
+				int primary = decompValues [decompIndex [i]];
+				FillLetterNFKD (primary, checkUpper, greekRemap);
+
+				int lv2 = map [primary].Level2;
+				byte off = 0;
+				for (int l = 1; l < decompLength [i]; l++) {
+					int tmp = decompValues [idx + l];
+					if (map [tmp].Category != 1)
+						return;
+					if (greekRemap && map [tmp].Level2 == 0xC)
+						off += 3;
+					else
+						off += map [tmp].Level2;
+				}
+				if (off > 0) {
+					if (lv2 == 0)
+						lv2 += 2;
+					lv2 += off;
+				}
+				// ... but override if the value already exists.
+				if (diacritical [i] != 0)
+					lv2 = diacritical [i];
+				map [i] = new CharMapEntry (
+					map [primary].Category,
+					map [primary].Level1,
+					(byte) lv2);
+			}
 		}
 
 		private void IncrementSequentialIndex (ref byte hangulCat)
@@ -2845,32 +3537,32 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 				char c = (char) (i + b);
 				byte arg = (byte) (b > 0 ? b + 2 : 0);
 				// Hiragana
-				AddLetterMapCore (c, 0x22, 0, arg);
+				AddLetterMapCore (c, 0x22, 0, arg, false);
 				// Katakana
-				AddLetterMapCore ((char) (c + 0x60), 0x22, 0, arg);
+				AddLetterMapCore ((char) (c + 0x60), 0x22, 0, arg, false);
 			}
 		}
 
 		private void AddLetterMap (char c, byte category, byte updateCount)
 		{
-			AddLetterMapCore (c, category, updateCount, 0);
+			AddLetterMapCore (c, category, updateCount, 0, true);
 		}
 
-		private void AddLetterMapCore (char c, byte category, byte updateCount, byte level2)
+		private void AddLetterMapCore (char c, byte category, byte updateCount, byte level2, bool deferLevel2)
 		{
 			char c2;
 			// <small> updates index
 			c2 = ToSmallForm (c);
 			if (c2 != c)
-				AddCharMapGroup (c2, category, updateCount, level2);
+				AddCharMapGroup (c2, category, updateCount, level2, deferLevel2);
 			c2 = Char.ToLower (c, CultureInfo.InvariantCulture);
 			if (c2 != c && !map [(int) c2].Defined)
-				AddLetterMapCore (c2, category, 0, level2);
+				AddLetterMapCore (c2, category, 0, level2, deferLevel2);
 			bool doUpdate = true;
 			if (IsIgnorable ((int) c) || map [(int) c].Defined)
 				doUpdate = false;
 			else
-				AddCharMapGroup (c, category, 0, level2);
+				AddCharMapGroup (c, category, 0, level2, deferLevel2);
 			if (doUpdate)
 				fillIndex [category] += updateCount;
 		}
@@ -2891,19 +3583,6 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 			return true;
 		}
 
-		private void AddCharMapGroupTail (char c, byte category, byte updateCount)
-		{
-			char c2 = ToSmallFormTail (c);
-			if (c2 != c)
-				AddCharMap (c2, category, updateCount, 0);
-			// itself
-			AddCharMap (c, category, updateCount, 0);
-			// <full>
-			c2 = ToFullWidthTail (c);
-			if (c2 != c)
-				AddCharMapGroupTail (c2, category, updateCount);
-		}
-
 		//
 		// Adds characters to table in the order below 
 		// (+ increases weight):
@@ -2925,11 +3604,24 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 			DecompositionWide,
 			DecompositionNarrow,
 			};
+		private void AddCharMapGroup (char c, byte category, byte updateCount)
+		{
+			AddCharMapGroup (c, category, updateCount, 0, true);
+		}
+
 		private void AddCharMapGroup (char c, byte category, byte updateCount, byte level2)
+		{
+			AddCharMapGroup (c, category, updateCount, level2, false);
+		}
+
+		private void AddCharMapGroup (char c, byte category, byte updateCount, byte level2, bool deferLevel2)
 		{
 			if (map [(int) c].Defined)
 				return;
 
+			if (deferLevel2)
+				level2 = diacritical [(int) c];
+
 			char small = char.MinValue;
 			char vertical = char.MinValue;
 			Hashtable nfkd = (Hashtable) nfkdMap [(int) c];
@@ -2943,8 +3635,11 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 			}
 
 			// <small> updates index
-			if (small != char.MinValue)
-				AddCharMap (small, category, updateCount);
+			if (small != char.MinValue) {
+				if (level2 == 0 && deferLevel2)
+					level2 = diacritical [small];
+				AddCharMap (small, category, updateCount, level2);
+			}
 
 			// itself
 			AddCharMap (c, category, 0, level2);
@@ -2952,16 +3647,22 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 			if (nfkd != null) {
 				foreach (int weight in sameWeightItems) {
 					object wv = nfkd [(byte) weight];
-					if (wv != null)
+					if (wv != null) {
+						if (deferLevel2)
+							level2 = diacritical [(int) wv];
 						AddCharMap ((char) ((int) wv), category, 0, level2);
+					}
 				}
 			}
 
 			// update index here.
 			fillIndex [category] += updateCount;
 
-			if (vertical != char.MinValue)
+			if (vertical != char.MinValue) {
+				if (level2 == 0 && deferLevel2)
+					level2 = diacritical [vertical];
 				AddCharMap (vertical, category, updateCount, level2);
+			}
 		}
 
 		private void AddCharMapCJK (char c, ref byte category)
@@ -3024,23 +3725,44 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 		// For now it is only for 0x7 category.
 		private void AddCharMapGroup2 (char c, byte category, byte updateCount, byte level2)
 		{
-			char small = char.MinValue;
-			char vertical = char.MinValue;
-			Hashtable nfkd = (Hashtable) nfkdMap [(int) c];
-			if (nfkd != null) {
-				object smv = nfkd [(byte) DecompositionSmall];
-				if (smv != null)
-					small = (char) ((int) smv);
-				object vv = nfkd [(byte) DecompositionVertical];
-				if (vv != null)
-					vertical = (char) ((int) vv);
+			if (map [(int) c].Defined)
+				return;
+
+			bool updateWeight = false;
+			// Process in advance (lower primary weight)
+			for (int c2 = 0; c2 < char.MaxValue; c2++) {
+				if (!map [c2].Defined &&
+					decompLength [c2] == 1 &&
+					(int) (decompValues [decompIndex [c2]]) == (int) c) {
+					switch (decompType [c2]) {
+					case DecompositionSmall:
+						updateWeight = true;
+						AddCharMap ((char) c2, category,
+							0, level2);
+						break;
+					}
+				}
 			}
+			if (updateWeight)
+				fillIndex [category] = (byte)
+					(fillIndex [category] + updateCount);
 
-			// <small> updates index
-			if (small != char.MinValue)
-				// SPECIAL CASE excluded (FIXME: why?)
-				if (small != '\u2024')
-					AddCharMap (small, category, updateCount);
+			// Identical weight
+			for (int c2 = 0; c2 < char.MaxValue; c2++) {
+				if (!map [c2].Defined &&
+					decompLength [c2] == 1 &&
+					(int) (decompValues [decompIndex [c2]]) == (int) c) {
+					switch (decompType [c2]) {
+					case DecompositionSub:
+					case DecompositionSuper:
+					case DecompositionWide:
+					case DecompositionNarrow:
+						AddCharMap ((char) c2, category,
+							0, level2);
+						break;
+					}
+				}
+			}
 
 			// itself
 			AddCharMap (c, category, updateCount, level2);
@@ -3048,28 +3770,26 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 			// Since nfkdMap is problematic to have two or more
 			// NFKD to an identical character, here I iterate all.
 			for (int c2 = 0; c2 < char.MaxValue; c2++) {
-				if (decompLength [c2] == 1 &&
+				if (!map [c2].Defined &&
+					decompLength [c2] == 1 &&
 					(int) (decompValues [decompIndex [c2]]) == (int) c) {
 					switch (decompType [c2]) {
-					case DecompositionCompat:
+					case DecompositionWide:
+					case DecompositionNarrow:
+					case DecompositionSmall:
+					case DecompositionSub:
+					case DecompositionSuper:
+						continue;
+					default:
 						AddCharMap ((char) c2, category, updateCount, level2);
 						break;
 					}
 				}
 			}
-
-			if (vertical != char.MinValue)
-				// SPECIAL CASE excluded (FIXME: why?)
-				if (vertical != '\uFE33' && vertical != '\uFE34')
-					AddCharMap (vertical, category, updateCount, level2);
 		}
 
-		private void AddArabicCharMap (char c)
+		private void AddArabicCharMap (char c, byte category, byte updateCount, byte level2)
 		{
-			byte category = 6;
-			byte updateCount = 1;
-			byte level2 = 0;
-
 			// itself
 			AddCharMap (c, category, 0, level2);
 
@@ -3086,26 +3806,11 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 			fillIndex [category] += updateCount;
 		}
 
-		char ToFullWidth (char c)
-		{
-			return ToDecomposed (c, DecompositionFull, false);
-		}
-
-		char ToFullWidthTail (char c)
-		{
-			return ToDecomposed (c, DecompositionFull, true);
-		}
-
 		char ToSmallForm (char c)
 		{
 			return ToDecomposed (c, DecompositionSmall, false);
 		}
 
-		char ToSmallFormTail (char c)
-		{
-			return ToDecomposed (c, DecompositionSmall, true);
-		}
-
 		char ToDecomposed (char c, byte d, bool tail)
 		{
 			if (decompType [(int) c] != d)
@@ -3139,9 +3844,27 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 			// CJK compat
 			if ('\u3192' <= c && c <= '\u319F')
 				return 0;
-			// Japanese reading marks
-			if (c == '\u3001' || c == '\u3002')
-				return 2;
+
+			// They have <narrow> NFKD mapping, and on Windows
+			// those narrow characters are regarded as "normal",
+			// thus those characters themselves are regarded as
+			// "wide". grep "<narrow>" and you can pick them up
+			// (ignoring Kana, Hangul etc.)
+			switch (c) {
+			case '\u3002':
+			case '\u300C':
+			case '\u300D':
+			case '\u3001':
+			case '\u30FB':
+			case '\u2502':
+			case '\u2190':
+			case '\u2191':
+			case '\u2192':
+			case '\u2193':
+			case '\u25A0':
+			case '\u25CB':
+				return 1;
+			}
 			// Korean
 			if ('\u11A8' <= c && c <= '\u11F9')
 				return 2;
@@ -3164,22 +3887,32 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 			if ('\u2160' <= c && c <= '\u216F')
 				return 0x10;
 			if ('\u2181' <= c && c <= '\u2182')
-				return 0x18;
+				return 0x10;
 			// Arabic
 			if ('\u2135' <= c && c <= '\u2138')
 				return 4;
-			if ('\uFE80' <= c && c < '\uFF00') {
+			// I believe that Windows has a bug on setting level 3
+			// weight here. NFKD results in different values.
+			if ('\uFE80' < c && c < '\uFF00') {
 				// 2(Isolated)/8(Final)/0x18(Medial)
 				switch (decompType [(int) c]) {
 				case DecompositionIsolated:
-					return 2;
+					return 0; // 2;
 				case DecompositionFinal:
 					return 8;
 				case DecompositionMedial:
 					return 0x18;
+				case DecompositionInitial:
+					return 0x10;
 				}
 			}
 
+			// I have no idea why those symbols have level 3 weight
+			if (c == '\u2104' || c == '\u212B')
+				return 0x18;
+			if ('\u211E' <= c && c <= '\u212B')
+				return 0x10;
+
 			// actually I dunno the reason why they have weights.
 			switch (c) {
 			case '\u01BC':
@@ -3188,17 +3921,23 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 				return 0x20;
 			case '\u06AA':
 				return 0x28;
+			// Gurmukhi
+			case '\u0A39':
+			case '\u0A59':
+			case '\u0A5A':
+			case '\u0A5B':
+			case '\u0A5E':
+				return 0x10;
 			}
 
 			byte ret = 0;
 			switch (c) {
 			case '\u03C2':
-			case '\u2104':
 			case '\u212B':
-				ret |= 8;
+				ret = 8;
 				break;
 			case '\uFE42':
-				ret |= 0xC;
+				ret = 0xA;
 				break;
 			}