X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=mcs%2Fclass%2Fcorlib%2FMono.Globalization.Unicode%2Fcreate-mscompat-collation-table.cs;h=c54b9ab8eb936c46967e4952531edbb3207b16a5;hb=ff228e1c801bda9666b6edab3ee962e05edcf480;hp=f6d20851627e7e2c96e1ad81400db9ea61921af8;hpb=967ebd04adb87eaf73970fb297322820abe5393e;p=mono.git

diff --git a/mcs/class/corlib/Mono.Globalization.Unicode/create-mscompat-collation-table.cs b/mcs/class/corlib/Mono.Globalization.Unicode/create-mscompat-collation-table.cs
index f6d20851627..c54b9ab8eb9 100644
--- a/mcs/class/corlib/Mono.Globalization.Unicode/create-mscompat-collation-table.cs
+++ b/mcs/class/corlib/Mono.Globalization.Unicode/create-mscompat-collation-table.cs
@@ -24,7 +24,6 @@
 //	If there are characters whose primary weight is 0, they are consumed
 //	and considered as a part of the character element.
 //
-#define Binary
 
 using System;
 using System.IO;
@@ -96,25 +95,19 @@ namespace Mono.Globalization.Unicode
 		byte [] diacritical = new byte [char.MaxValue + 1];
 
 		string [] diacritics = new string [] {
-			// LATIN, CYRILLIC etc.
-			"UPTURN", "DOUBLE-STRUCK",
-			"MIDDLE HOOK", "WITH VERTICAL LINE ABOVE;",
-			"WITH GRAVE ACCENT;", "WITH ACUTE ACCENT;", "WITH CIRCUMFLEX ACCENT;",
-			"WITH ACUTE;", "WITH GRAVE;", "WITH DOT ABOVE;", " MIDDLE DOT;",
+			// LATIN
+			"WITH ACUTE;", "WITH GRAVE;", " DOT ABOVE;", " MIDDLE DOT;",
 			"WITH CIRCUMFLEX;", "WITH DIAERESIS;", "WITH CARON;", "WITH BREVE;",
-			" DIALYTIKA AND TONOS;", "WITH MACRON;", "WITH TILDE;", "WITH RING ABOVE;",
-			"WITH OGONEK;", "WITH CEDILLA;",
-			//
+			" DIALYTIKA AND TONOS;", "WITH MACRON;", "WITH TILDE;", " RING ABOVE;",
+			" OGONEK;", " CEDILLA;",
 			" DOUBLE ACUTE;", " ACUTE AND DOT ABOVE;",
-			"WITH STROKE;", " CIRCUMFLEX AND ACUTE;",
-			"STROKE OVERLAY",
+			" STROKE;", " CIRCUMFLEX AND ACUTE;",
 			" DIAERESIS AND ACUTE;", "WITH CIRCUMFLEX AND GRAVE;", " L SLASH;",
 			" DIAERESIS AND GRAVE;",
 			" BREVE AND ACUTE;",
 			" CARON AND DOT ABOVE;", " BREVE AND GRAVE;",
 			" MACRON AND ACUTE;",
 			" MACRON AND GRAVE;",
-			//
 			" DIAERESIS AND CARON", " DOT ABOVE AND MACRON", " TILDE AND ACUTE",
 			" RING ABOVE AND ACUTE",
 			" DIAERESIS AND MACRON", " CEDILLA AND ACUTE", " MACRON AND DIAERESIS",
@@ -124,59 +117,45 @@ namespace Mono.Globalization.Unicode
 			" BREVE AND TILDE",
 			" CEDILLA AND BREVE",
 			" OGONEK AND MACRON",
-			//
-			"WITH OVERLINE",
-			"WITH HOOK;", "LEFT HOOK;", " WITH HOOK ABOVE;",
-			" DOUBLE GRAVE",
+			" HOOK;", "LEFT HOOK;", " WITH HOOK ABOVE;",
+			" DOUBLE GRAVE;",
 			" INVERTED BREVE",
-			"ROMAN NUMERAL",
 			" PRECEDED BY APOSTROPHE",
-			"WITH HORN;",
+			" HORN;",
 			" LINE BELOW;", " CIRCUMFLEX AND HOOK ABOVE",
 			" PALATAL HOOK",
 			" DOT BELOW;",
 			" RETROFLEX;", "DIAERESIS BELOW",
 			" RING BELOW",
-			//
 			" CIRCUMFLEX BELOW", "HORN AND ACUTE",
 			" BREVE BELOW;", " HORN AND GRAVE",
 			" TILDE BELOW",
-			" TOPBAR",
 			" DOT BELOW AND DOT ABOVE",
 			" RIGHT HALF RING", " HORN AND TILDE",
 			" CIRCUMFLEX AND DOT BELOW",
 			" BREVE AND DOT BELOW",
 			" DOT BELOW AND MACRON",
-			" TONE TWO",
 			" HORN AND HOOK ABOVE",
 			" HORN AND DOT",
 			// CIRCLED, PARENTHESIZED and so on
-			"CIRCLED DIGIT", "CIRCLED NUMBER", "CIRCLED LATIN",
-			"CIRCLED KATAKANA", "CIRCLED SANS-SERIF",
+			"CIRCLED DIGIT", "CIRCLED NUMBER", "CIRCLED LATIN", "CIRCLED KATAKANA",
 			"PARENTHESIZED DIGIT", "PARENTHESIZED NUMBER", "PARENTHESIZED LATIN",
 			};
 		byte [] diacriticWeights = new byte [] {
 			// LATIN.
-			3, 3, 5, 5,
-			0xF, 0xE, 0x12,
 			0xE, 0xF, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 			0x17, 0x19, 0x1A, 0x1B, 0x1C,
-			//
-			0x1D, 0x1D, 0x1E, 0x1E, 0x1E, 0x1F, 0x1F, 0x1F,
+			0x1D, 0x1D, 0x1E, 0x1E, 0x1F, 0x1F, 0x1F,
 			0x20, 0x21, 0x22, 0x22, 0x23, 0x24,
-			//
 			0x25, 0x25, 0x25, 0x26, 0x28, 0x28, 0x28,
 			0x29, 0x2A, 0x2B, 0x2C, 0x2F, 0x30,
-			//
-			0x40, 0x43, 0x43, 0x43, 0x44, 0x46, 0x47, 0x48,
+			0x43, 0x43, 0x43, 0x44, 0x46, 0x48,
 			0x52, 0x55, 0x55, 0x57, 0x58, 0x59, 0x59, 0x5A,
-			//
-			0x60, 0x60, 0x61, 0x61, 0x63, 0x68, 0x68,
+			0x60, 0x60, 0x61, 0x61, 0x63, 0x68, 
 			0x69, 0x69, 0x6A, 0x6D, 0x6E,
-			0x87, 0x95, 0xAA,
+			0x95, 0xAA,
 			// CIRCLED, PARENTHESIZED and so on.
-			0xEE, 0xEE, 0xEE, 0xEE, 0xEE,
-			0xF3, 0xF3, 0xF3
+			0xEE, 0xEE, 0xEE, 0xEE, 0xF3, 0xF3, 0xF3, 0xF3
 			};
 
 		int [] numberSecondaryWeightBounds = new int [] {
@@ -187,6 +166,7 @@ namespace Mono.Globalization.Unicode
 			0xE50, 0xE60, 0xED0, 0xEE0
 			};
 
+		char [] orderedCyrillic;
 		char [] orderedGurmukhi;
 		char [] orderedGujarati;
 		char [] orderedGeorgian;
@@ -196,11 +176,11 @@ namespace Mono.Globalization.Unicode
 			// based on traditional Tamil consonants, except for
 			// Grantha (where Microsoft breaks traditionalism).
 			// http://www.angelfire.com/empire/thamizh/padanGaL
-			'\u0B95', '\u0B99', '\u0B9A', '\u0B9E', '\u0B9F',
-			'\u0BA3', '\u0BA4', '\u0BA8', '\u0BAA', '\u0BAE',
-			'\u0BAF', '\u0BB0', '\u0BB2', '\u0BB5', '\u0BB4',
-			'\u0BB3', '\u0BB1', '\u0BA9', '\u0B9C', '\u0BB8',
-			'\u0BB7', '\u0BB9'};
+			'\u0B99', '\u0B9A', '\u0B9E', '\u0B9F', '\u0BA3',
+			'\u0BA4', '\u0BA8', '\u0BAA', '\u0BAE', '\u0BAF',
+			'\u0BB0', '\u0BB2', '\u0BB5', '\u0BB4', '\u0BB3',
+			'\u0BB1', '\u0BA9', '\u0B9C', '\u0BB8', '\u0BB7',
+			'\u0BB9'};
 
 		// cp -> character name (only for some characters)
 		ArrayList sortableCharNames = new ArrayList ();
@@ -226,11 +206,11 @@ namespace Mono.Globalization.Unicode
 		ArrayList jisJapanese = new ArrayList ();
 		ArrayList nonJisJapanese = new ArrayList ();
 
-		ushort [] cjkJA = new ushort [char.MaxValue +1];// - 0x4E00];
-		ushort [] cjkCHS = new ushort [char.MaxValue +1];// - 0x3100];
-		ushort [] cjkCHT = new ushort [char.MaxValue +1];// - 0x4E00];
-		ushort [] cjkKO = new ushort [char.MaxValue +1];// - 0x4E00];
-		byte [] cjkKOlv2 = new byte [char.MaxValue +1];// - 0x4E00];
+		ushort [] cjkJA = new ushort [char.MaxValue - 0x4E00];
+		ushort [] cjkCHS = new ushort [char.MaxValue - 0x3100];
+		ushort [] cjkCHT = new ushort [char.MaxValue - 0x4E00];
+		ushort [] cjkKO = new ushort [char.MaxValue - 0x4E00];
+		byte [] cjkKOlv2 = new byte [char.MaxValue - 0x4E00];
 
 		byte [] ignorableFlags = new byte [char.MaxValue + 1];
 
@@ -272,12 +252,6 @@ sw.Close ();
 				source, typeof (byte), i);
 		}
 
-		ushort [] CompressArray (ushort [] source, CodePointIndexer i)
-		{
-			return (ushort []) CodePointIndexer.CompressArray  (
-				source, typeof (ushort), i);
-		}
-
 		void Serialize ()
 		{
 			// Tailorings
@@ -287,24 +261,19 @@ sw.Close ();
 			byte [] level1 = new byte [map.Length];
 			byte [] level2 = new byte [map.Length];
 			byte [] level3 = new byte [map.Length];
-			ushort [] widthCompat = new ushort [map.Length];
+			int [] widthCompat = new int [map.Length];
 			for (int i = 0; i < map.Length; i++) {
 				categories [i] = map [i].Category;
 				level1 [i] = map [i].Level1;
 				level2 [i] = map [i].Level2;
 				level3 [i] = ComputeLevel3Weight ((char) i);
-				// For Japanese Half-width characters, don't
-				// map widthCompat. It is IgnoreKanaType that
-				// handles those width differences.
-				if (0xFF6D <= i && i <= 0xFF9D)
-					continue;
 				switch (decompType [i]) {
 				case DecompositionNarrow:
 				case DecompositionWide:
 				case DecompositionSuper:
 				case DecompositionSub:
 					// they are always 1 char
-					widthCompat [i] = (ushort) decompValues [decompIndex [i]];
+					widthCompat [i] = decompValues [decompIndex [i]];
 					break;
 				}
 			}
@@ -320,36 +289,18 @@ sw.Close ();
 				MSCompatUnicodeTableUtil.Level2);
 			level3 = CompressArray (level3, 
 				MSCompatUnicodeTableUtil.Level3);
-			widthCompat = (ushort []) CodePointIndexer.CompressArray (
-				widthCompat, typeof (ushort),
+			widthCompat = (int []) CodePointIndexer.CompressArray (
+				widthCompat, typeof (int),
 				MSCompatUnicodeTableUtil.WidthCompat);
-			cjkCHS = CompressArray (cjkCHS,
-				MSCompatUnicodeTableUtil.CjkCHS);
-			cjkCHT = CompressArray (cjkCHT,
-				MSCompatUnicodeTableUtil.Cjk);
-			cjkJA = CompressArray (cjkJA,
-				MSCompatUnicodeTableUtil.Cjk);
-			cjkKO = CompressArray (cjkKO,
-				MSCompatUnicodeTableUtil.Cjk);
-			cjkKOlv2 = CompressArray (cjkKOlv2,
-				MSCompatUnicodeTableUtil.Cjk);
 
 			// Ignorables
-			Result.WriteLine ("internal static readonly byte [] ignorableFlags = new byte [] {");
-#if Binary
-			MemoryStream ms = new MemoryStream ();
-			BinaryWriter binary = new BinaryWriter (ms);
-			binary.Write (ignorableFlags.Length);
-#endif
+			Result.WriteLine ("static byte [] ignorableFlags = new byte [] {");
 			for (int i = 0; i < ignorableFlags.Length; i++) {
 				byte value = ignorableFlags [i];
 				if (value < 10)
 					Result.Write ("{0},", value);
 				else
 					Result.Write ("0x{0:X02},", value);
-#if Binary
-				binary.Write (value);
-#endif
 				if ((i & 0xF) == 0xF)
 					Result.WriteLine ("// {0:X04}", i - 0xF);
 			}
@@ -357,19 +308,13 @@ sw.Close ();
 			Result.WriteLine ();
 
 			// Primary category
-			Result.WriteLine ("internal static readonly byte [] categories = new byte [] {");
-#if Binary
-			binary.Write (categories.Length);
-#endif
+			Result.WriteLine ("static byte [] categories = new byte [] {");
 			for (int i = 0; i < categories.Length; i++) {
 				byte value = categories [i];
 				if (value < 10)
 					Result.Write ("{0},", value);
 				else
 					Result.Write ("0x{0:X02},", value);
-#if Binary
-				binary.Write (value);
-#endif
 				if ((i & 0xF) == 0xF)
 					Result.WriteLine ("// {0:X04}", i - 0xF);
 			}
@@ -377,19 +322,13 @@ sw.Close ();
 			Result.WriteLine ();
 
 			// Primary weight value
-			Result.WriteLine ("internal static readonly byte [] level1 = new byte [] {");
-#if Binary
-			binary.Write (level1.Length);
-#endif
+			Result.WriteLine ("static byte [] level1 = new byte [] {");
 			for (int i = 0; i < level1.Length; i++) {
 				byte value = level1 [i];
 				if (value < 10)
 					Result.Write ("{0},", value);
 				else
 					Result.Write ("0x{0:X02},", value);
-#if Binary
-				binary.Write (value);
-#endif
 				if ((i & 0xF) == 0xF)
 					Result.WriteLine ("// {0:X04}", i - 0xF);
 			}
@@ -397,19 +336,13 @@ sw.Close ();
 			Result.WriteLine ();
 
 			// Secondary weight
-			Result.WriteLine ("internal static readonly byte [] level2 = new byte [] {");
-#if Binary
-			binary.Write (level2.Length);
-#endif
+			Result.WriteLine ("static byte [] level2 = new byte [] {");
 			for (int i = 0; i < level2.Length; i++) {
-				byte value = level2 [i];
+				int value = level2 [i];
 				if (value < 10)
 					Result.Write ("{0},", value);
 				else
 					Result.Write ("0x{0:X02},", value);
-#if Binary
-				binary.Write (value);
-#endif
 				if ((i & 0xF) == 0xF)
 					Result.WriteLine ("// {0:X04}", i - 0xF);
 			}
@@ -417,19 +350,13 @@ sw.Close ();
 			Result.WriteLine ();
 
 			// Thirtiary weight
-			Result.WriteLine ("internal static readonly byte [] level3 = new byte [] {");
-#if Binary
-			binary.Write (level3.Length);
-#endif
+			Result.WriteLine ("static byte [] level3 = new byte [] {");
 			for (int i = 0; i < level3.Length; i++) {
 				byte value = level3 [i];
 				if (value < 10)
 					Result.Write ("{0},", value);
 				else
 					Result.Write ("0x{0:X02},", value);
-#if Binary
-				binary.Write (value);
-#endif
 				if ((i & 0xF) == 0xF)
 					Result.WriteLine ("// {0:X04}", i - 0xF);
 			}
@@ -439,30 +366,18 @@ sw.Close ();
 			// Width insensitivity mappings
 			// (for now it is more lightweight than dumping the
 			// entire NFKD table).
-			Result.WriteLine ("internal static readonly ushort [] widthCompat = new ushort [] {");
-#if Binary
-			binary.Write (widthCompat.Length);
-#endif
+			Result.WriteLine ("static int [] widthCompat = new int [] {");
 			for (int i = 0; i < widthCompat.Length; i++) {
-				ushort value = widthCompat [i];
+				int value = widthCompat [i];
 				if (value < 10)
 					Result.Write ("{0},", value);
 				else
 					Result.Write ("0x{0:X02},", value);
-#if Binary
-				binary.Write (value);
-#endif
 				if ((i & 0xF) == 0xF)
 					Result.WriteLine ("// {0:X04}", i - 0xF);
 			}
 			Result.WriteLine ("};");
 			Result.WriteLine ();
-#if Binary
-			using (FileStream fs = File.Create ("../collation.core.bin")) {
-				byte [] array = ms.ToArray ();
-				fs.Write (array, 0, array.Length);
-			}
-#endif
 
 			// CJK
 			SerializeCJK ("cjkCHS", cjkCHS, char.MaxValue);
@@ -474,13 +389,8 @@ sw.Close ();
 
 		void SerializeCJK (string name, ushort [] cjk, int max)
 		{
-			int offset = 0;//char.MaxValue - cjk.Length;
+			int offset = char.MaxValue - cjk.Length;
 			Result.WriteLine ("static ushort [] {0} = new ushort [] {{", name);
-#if Binary
-			MemoryStream ms = new MemoryStream ();
-			BinaryWriter binary = new BinaryWriter (ms);
-			binary.Write (cjk.Length);
-#endif
 			for (int i = 0; i < cjk.Length; i++) {
 				if (i + offset == max)
 					break;
@@ -489,30 +399,17 @@ sw.Close ();
 					Result.Write ("{0},", value);
 				else
 					Result.Write ("0x{0:X04},", value);
-#if Binary
-				binary.Write (value);
-#endif
 				if ((i & 0xF) == 0xF)
 					Result.WriteLine ("// {0:X04}", i - 0xF + offset);
 			}
 			Result.WriteLine ("};");
 			Result.WriteLine ();
-#if Binary
-			using (FileStream fs = File.Create (String.Format ("../collation.{0}.bin", name))) {
-				byte [] array = ms.ToArray ();
-				fs.Write (array, 0, array.Length);
-			}
-#endif
 		}
 
 		void SerializeCJK (string name, byte [] cjk, int max)
 		{
-			int offset = 0;//char.MaxValue - cjk.Length;
+			int offset = char.MaxValue - cjk.Length;
 			Result.WriteLine ("static byte [] {0} = new byte [] {{", name);
-#if Binary
-			MemoryStream ms = new MemoryStream ();
-			BinaryWriter binary = new BinaryWriter (ms);
-#endif
 			for (int i = 0; i < cjk.Length; i++) {
 				if (i + offset == max)
 					break;
@@ -521,20 +418,11 @@ sw.Close ();
 					Result.Write ("{0},", value);
 				else
 					Result.Write ("0x{0:X02},", value);
-#if Binary
-				binary.Write (value);
-#endif
 				if ((i & 0xF) == 0xF)
 					Result.WriteLine ("// {0:X04}", i - 0xF + offset);
 			}
 			Result.WriteLine ("};");
 			Result.WriteLine ();
-#if Binary
-			using (FileStream fs = File.Create (String.Format ("../collation.{0}.bin", name))) {
-				byte [] array = ms.ToArray ();
-				fs.Write (array, 0, array.Length);
-			}
-#endif
 		}
 
 		void SerializeTailorings ()
@@ -543,10 +431,6 @@ sw.Close ();
 			Hashtable counts = new Hashtable ();
 			Result.WriteLine ("static char [] tailorings = new char [] {");
 			int count = 0;
-#if Binary
-			MemoryStream ms = new MemoryStream ();
-			BinaryWriter binary = new BinaryWriter (ms);
-#endif
 			foreach (Tailoring t in tailorings) {
 				if (t.Alias != 0)
 					continue;
@@ -558,24 +442,15 @@ sw.Close ();
 					Result.Write ("'\\x{0:X}', ", (int) c);
 					if (++count % 16 == 0)
 						Result.WriteLine (" // {0:X04}", count - 16);
-#if Binary
-					binary.Write ((ushort) c);
-#endif
 				}
 			}
 			Result.WriteLine ("};");
 
 			Result.WriteLine ("static TailoringInfo [] tailoringInfos = new TailoringInfo [] {");
-#if Binary
-			byte [] rawdata = ms.ToArray ();
-			ms = new MemoryStream ();
-			binary = new BinaryWriter (ms);
-			binary.Write (tailorings.Count);
-#endif
 			foreach (Tailoring t in tailorings) {
 				int target = t.Alias != 0 ? t.Alias : t.LCID;
 				if (!indexes.ContainsKey (target)) {
-					throw new Exception (String.Format ("WARNING: no corresponding definition for tailoring alias. From {0} to {1}", t.LCID, t.Alias));
+					Console.Error.WriteLine ("WARNING: no corresponding definition for tailoring alias. From {0} to {1}", t.LCID, t.Alias);
 					continue;
 				}
 				int idx = (int) indexes [target];
@@ -586,26 +461,8 @@ sw.Close ();
 						if (t2.LCID == t.LCID)
 							french = t2.FrenchSort;
 				Result.WriteLine ("new TailoringInfo ({0}, 0x{1:X}, {2}, {3}), ", t.LCID, idx, cnt, french ? "true" : "false");
-#if Binary
-				binary.Write (t.LCID);
-				binary.Write (idx);
-				binary.Write (cnt);
-				binary.Write (french);
-#endif
 			}
 			Result.WriteLine ("};");
-#if Binary
-			binary.Write ((byte) 0xFF);
-			binary.Write ((byte) 0xFF);
-			binary.Write (rawdata.Length / 2);
-			binary.Write (rawdata, 0, rawdata.Length);
-
-
-			using (FileStream fs = File.Create ("../collation.tailoring.bin")) {
-				byte [] array = ms.ToArray ();
-				fs.Write (array, 0, array.Length);
-			}
-#endif
 		}
 
 		#region Parse
@@ -632,7 +489,6 @@ sw.Close ();
 
 			ParseJISOrder (cp932); // in prior to ParseUnidata()
 			ParseUnidata (unidata);
-			ModifyUnidata ();
 			ParseDerivedCoreProperties (derivedCoreProps);
 			ParseScripts (scripts);
 			ParseCJK (chXML, jaXML, koXML);
@@ -711,8 +567,8 @@ sw.Close ();
 			if (idx > 0) {
 				string source = s.Substring (0, idx).Trim ();
 				string [] l = s.Substring (idx + 1).Trim ().Split (' ');
-				byte [] b = new byte [4];
-				for (int i = 0; i < 4; i++) {
+				byte [] b = new byte [5];
+				for (int i = 0; i < 5; i++) {
 					if (l [i] == "*")
 						b [i] = 0;
 					else
@@ -756,9 +612,8 @@ sw.Close ();
 					if (cp > char.MaxValue)
 						continue;
 
-					double v = double.Parse (value);
 					for (int i = cp; i <= cpEnd; i++)
-						unicodeAge [i] = v;
+						unicodeAge [i] = double.Parse (value);
 				}
 			}
 			unicodeAge [0] = double.MaxValue; // never be supported
@@ -781,10 +636,7 @@ sw.Close ();
 			this.decompValues = (int [])
 				decompValues.ToArray (typeof (int));
 		}
-
-		char previousLatinTarget = char.MinValue;
-		byte [] diacriticalOffset = new byte ['Z' - 'A' + 1];
-
+		
 		void ProcessUnidataLine (string s, ArrayList decompValues)
 		{
 			int idx = s.IndexOf ('#');
@@ -804,98 +656,31 @@ sw.Close ();
 
 			string name = values [0];
 
-			// SPECIAL CASE: rename some characters for diacritical
-			// remapping. FIXME: why are they different?
-			// FIXME: it's still not working.
-			if (cp == 0x018B || cp == 0x018C)
-				name = name.Replace ("TOPBAR", "STROKE");
-
 			// isSmallCapital
 			if (s.IndexOf ("SMALL CAPITAL") > 0)
 				isSmallCapital [cp] = true;
 
 			// latin mapping by character name
-			if (s.IndexOf ("LATIN") >= 0) {
+			if (s.IndexOf ("LATIN") > 0) {
 				int lidx = s.IndexOf ("LETTER DOTLESS ");
 				int offset = lidx + 15;
 				if (lidx < 0) {
 					lidx = s.IndexOf ("LETTER TURNED ");
 					offset = lidx + 14;
 				}
-				if (lidx < 0) {
-					lidx = s.IndexOf ("LETTER CAPITAL ");
-					offset = lidx + 15;
-				}
-				if (lidx < 0) {
-					lidx = s.IndexOf ("LETTER SCRIPT ");
-					offset = lidx + 14;
-				}
 				if (lidx < 0) {
 					lidx = s.IndexOf ("LETTER ");
 					offset = lidx + 7;
 				}
 				char c = lidx > 0 ? s [offset] : char.MinValue;
-				char n = s [offset + 1];
-				char target = char.MinValue;
 				if ('A' <= c && c <= 'Z' &&
-					(n == ' ') || n == ';') {
-					target = c;
-					// FIXME: After 'Z', I cannot reset this state.
-					previousLatinTarget = c == 'Z' ? char.MinValue : c;
-				}
-
-				if (s.Substring (offset).StartsWith ("ALPHA"))
-					target = 'A';
-				else if (s.Substring (offset).StartsWith ("TONE SIX"))
-					target = 'B';
-				else if (s.Substring (offset).StartsWith ("OPEN O"))
-					target = 'C';
-				else if (s.Substring (offset).StartsWith ("SCHWA"))
-					target = 'E';
-				else if (s.Substring (offset).StartsWith ("ENG"))
-					target = 'N';
-				else if (s.Substring (offset).StartsWith ("OI;")) // 01A2,01A3
-					target = 'O';
-				else if (s.Substring (offset).StartsWith ("YR;")) // 01A2,01A3
-					target = 'R';
-				else if (s.Substring (offset).StartsWith ("TONE TWO"))
-					target = 'S';
-				else if (s.Substring (offset).StartsWith ("ESH"))
-					target = 'S';
-
-				// For remaining IPA chars, direct mapping is
-				// much faster.
-				switch (cp) {
-				case 0x0299: target = 'B'; break;
-				case 0x029A: target = 'E'; break;
-				case 0x029B: target = 'G'; break;
-				case 0x029C: target = 'H'; break;
-				case 0x029D: target = 'J'; break;
-				case 0x029E: target = 'K'; break;
-				case 0x029F: target = 'L'; break;
-				case 0x02A0: target = 'Q'; break;
-				case 0x02A7: target = 'T'; break;
-				case 0x02A8: target = 'T'; break;
-				}
-
-				if (target == char.MinValue)
-					target = previousLatinTarget;
-
-				if (target != char.MinValue) {
-					ArrayList entry = (ArrayList) latinMap [target];
+					(s.Length == offset + 1 || s [offset + 1] == ' ')) {
+					ArrayList entry = (ArrayList) latinMap [c];
 					if (entry == null) {
 						entry = new ArrayList ();
-						latinMap [target] = entry;
+						latinMap [c] = entry;
 					}
 					entry.Add (cp);
-					// FIXME: This secondary weight is hack.
-					// They are here because they must not
-					// be identical to the corresponding
-					// ASCII latins.
-					if (c != target && diacritical [cp] == 0) {
-						diacriticalOffset [c - 'A']++;
-						diacritical [cp] = (byte) (diacriticalOffset [c - 'A'] + 0x7C);
-					}
 				}
 			}
 
@@ -940,7 +725,7 @@ sw.Close ();
 			}
 
 			// Box names
-			if (0x2500 <= cp && cp < 0x2600) {
+			if (0x2500 <= cp && cp < 0x25B0) {
 				int value = 0;
 				// flags:
 				// up:1 down:2 right:4 left:8 vert:16 horiz:32
@@ -965,93 +750,42 @@ sw.Close ();
 					10, 10, 11, 11,
 					12, 12, 13, 13,
 					14, 14, 14, 14};
-				if (s.IndexOf ("BOX DRAWINGS ") >= 0) {
+				if (s.IndexOf ("BOX DRAWINGS ") > 0) {
 					int flag = 0;
-					if (s.IndexOf (" UP") >= 0)
+					if (s.IndexOf (" UP") > 0)
 						flag |= 1;
-					if (s.IndexOf (" DOWN") >= 0)
+					if (s.IndexOf (" DOWN") > 0)
 						flag |= 2;
-					if (s.IndexOf (" RIGHT") >= 0)
+					if (s.IndexOf (" RIGHT") > 0)
 						flag |= 4;
-					if (s.IndexOf (" LEFT") >= 0)
+					if (s.IndexOf (" LEFT") > 0)
 						flag |= 8;
-					if (s.IndexOf (" VERTICAL") >= 0)
+					if (s.IndexOf (" VERTICAL") > 0)
 						flag |= 16;
-					if (s.IndexOf (" HORIZONTAL") >= 0)
+					if (s.IndexOf (" HORIZONTAL") > 0)
 						flag |= 32;
 
 					int fidx = flags.IndexOf (flag);
 					value = fidx < 0 ? fidx : offsets [fidx];
-				} else if (s.IndexOf ("BLOCK") >= 0) {
-					if (s.IndexOf ("ONE EIGHTH") >= 0)
+				} else if (s.IndexOf ("BLOCK") > 0) {
+					if (s.IndexOf ("ONE EIGHTH") > 0)
 						value = 0x12;
-					else if (s.IndexOf ("ONE QUARTER") >= 0)
+					else if (s.IndexOf ("ONE QUARTER") > 0)
 						value = 0x13;
-					else if (s.IndexOf ("THREE EIGHTHS") >= 0)
+					else if (s.IndexOf ("THREE EIGHTHS") > 0)
 						value = 0x14;
-					else if (s.IndexOf ("HALF") >= 0)
+					else if (s.IndexOf ("HALF") > 0)
 						value = 0x15;
-					else if (s.IndexOf ("FIVE EIGHTHS") >= 0)
+					else if (s.IndexOf ("FIVE EIGHTHS") > 0)
 						value = 0x16;
-					else if (s.IndexOf ("THREE QUARTERS") >= 0)
+					else if (s.IndexOf ("THREE QUARTERS") > 0)
 						value = 0x17;
-					else if (s.IndexOf ("SEVEN EIGHTHS") >= 0)
+					else if (s.IndexOf ("SEVEN EIGHTHS") > 0)
 						value = 0x18;
 					else
 						value = 0x19;
 				}
-				else if (s.IndexOf ("SHADE") >= 0)
-					value = 0x19;
-				else if (s.IndexOf ("SQUARE") >= 0)
-					value = 0xBC - 0xE5;
-				else if (s.IndexOf ("VERTICAL RECTANGLE") >= 0)
-					value = 0xBE - 0xE5;
-				else if (s.IndexOf ("RECTANGLE") >= 0)
-					value = 0xBD - 0xE5;
-				else if (s.IndexOf ("PARALLELOGRAM") >= 0)
-					value = 0xBF - 0xE5;
-				else if (s.IndexOf ("TRIANGLE") >= 0) {
-					if (s.IndexOf ("UP-POINTING") >= 0)
-						value = 0xC0 - 0xE5;
-					else if (s.IndexOf ("RIGHT-POINTING") >= 0)
-						value = 0xC1 - 0xE5;
-					else if (s.IndexOf ("DOWN-POINTING") >= 0)
-						value = 0xC2 - 0xE5;
-					else if (s.IndexOf ("LEFT-POINTING") >= 0)
-						value = 0xC3 - 0xE5;
-				}
-				else if (s.IndexOf ("POINTER") >= 0) {
-					if (s.IndexOf ("RIGHT-POINTING") >= 0)
-						value = 0xC4 - 0xE5;
-					else if (s.IndexOf ("LEFT-POINTING") >= 0)
-						value = 0xC5 - 0xE5;
-				}
-				else if (s.IndexOf ("DIAMOND") >= 0)
-					value = 0xC6 - 0xE5;
-				else if (s.IndexOf ("FISHEYE") >= 0)
-					value = 0xC7 - 0xE5;
-				else if (s.IndexOf ("LOZENGE") >= 0)
-					value = 0xC8 - 0xE5;
-				else if (s.IndexOf ("BULLSEYE") >= 0)
-					value = 0xC9 - 0xE5;
-				else if (s.IndexOf ("CIRCLE") >= 0) {
-					if (cp == 0x25D6) // it could be IndexOf ("LEFT HALF BLACK CIRCLE")
-						value = 0xCA - 0xE5;
-					else if (cp == 0x25D7) // it could be IndexOf ("RIGHT HALF BLACK CIRCLE")
-						value = 0xCB - 0xE5;
-					else
-						value = 0xC9 - 0xE5;
-				}
-				if (0x25DA <= cp && cp <= 0x25E5)
-					value = 0xCD + cp - 0x25DA - 0xE5;
-
-				// SPECIAL CASE: BOX DRAWING DIAGONAL patterns
-				switch (cp) {
-				case 0x2571: value = 0xF; break;
-				case 0x2572: value = 0x10; break;
-				case 0x2573: value = 0x11; break;
-				}
-				if (value != 0)
+				if (value >= 0)
 					boxValues.Add (new DictionaryEntry (
 						cp, value));
 			}
@@ -1061,42 +795,15 @@ sw.Close ();
 			if (0x2100 <= cp && cp <= 0x213F &&
 				Char.IsSymbol ((char) cp))
 				sortableCharNames.Add (
-					new DictionaryEntry (cp, name));
+					new DictionaryEntry (cp, values [0]));
 			else if (0x3380 <= cp && cp <= 0x33DD)
 				sortableCharNames.Add (new DictionaryEntry (
-					cp, name.Substring (7)));
-
-			if (Char.GetUnicodeCategory ((char) cp) ==
-				UnicodeCategory.MathSymbol) {
-				if (name.StartsWith ("CIRCLED "))
-					diacritical [cp] = 0xEE;
-				if (name.StartsWith ("SQUARED "))
-					diacritical [cp] = 0xEF;
-			}
+					cp, values [0].Substring (7)));
 
 			// diacritical weights by character name
-if (diacritics.Length != diacriticWeights.Length)
-throw new Exception (String.Format ("Should not happen. weights are {0} while labels are {1}", diacriticWeights.Length, diacritics.Length));
-			for (int d = 0; d < diacritics.Length; d++) {
-				if (s.IndexOf (diacritics [d]) > 0) {
-					diacritical [cp] += diacriticWeights [d];
-					if (s.IndexOf ("COMBINING") >= 0)
-						diacritical [cp] -= (byte) 2;
-					continue;
-				}
-				// also process "COMBINING blah" here
-				// For now it is limited to cp < 0x0370
-//				if (cp < 0x0300 || cp >= 0x0370)
-//					continue;
-				string tmp = diacritics [d].TrimEnd (';');
-				if (tmp.IndexOf ("WITH ") == 0)
-					tmp = tmp.Substring (4);
-				tmp = String.Concat ("COMBINING", (tmp [0] != ' ' ? " " : ""), tmp);
-				if (name == tmp)
-					diacritical [cp] = (byte) (diacriticWeights [d] - 2);
-//if (name == tmp)
-//Console.Error.WriteLine ("======= {2:X04} : '{0}' / '{1}'", name, tmp, cp);
-			}
+			for (int d = 0; d < diacritics.Length; d++)
+				if (s.IndexOf (diacritics [d]) > 0)
+					diacritical [cp] |= diacriticWeights [d];
 			// Two-step grep required for it.
 			if (s.IndexOf ("FULL STOP") > 0 &&
 				(s.IndexOf ("DIGIT") > 0 || s.IndexOf ("NUMBER") > 0))
@@ -1126,8 +833,8 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 						(cp == 0x0640) ?
 						// 0x0640 is special: it does
 						// not start with ARABIC LETTER
-						name :
-						name.Substring (14);
+						values [0] :
+						values [0].Substring (14);
 					int tmpIdx = letterName.IndexOf (' ');
 					letterName = tmpIdx < 0 ? letterName : letterName.Substring (0, tmpIdx);
 //Console.Error.WriteLine ("Arabic name for {0:X04} is {1}", cp, letterName);
@@ -1143,7 +850,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			// Japanese square letter
 			if (0x3300 <= cp && cp <= 0x3357)
 				if (!ExistsJIS (cp))
-					nonJisJapanese.Add (new NonJISCharacter (cp, name));
+					nonJisJapanese.Add (new NonJISCharacter (cp, values [0]));
 
 			// normalizationType
 			string decomp = values [4];
@@ -1317,6 +1024,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 
 		void ParseScripts (string filename)
 		{
+			ArrayList cyrillic = new ArrayList ();
 			ArrayList gurmukhi = new ArrayList ();
 			ArrayList gujarati = new ArrayList ();
 			ArrayList georgian = new ArrayList ();
@@ -1346,6 +1054,11 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 						continue;
 
 					switch (value) {
+					case "Cyrillic":
+						for (int x = cp; x <= cpEnd; x++)
+							if (!IsIgnorable (x))
+								cyrillic.Add ((char) x);
+						break;
 					case "Gurmukhi":
 						for (int x = cp; x <= cpEnd; x++)
 							if (!IsIgnorable (x))
@@ -1369,10 +1082,12 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 					}
 				}
 			}
+			cyrillic.Sort (UCAComparer.Instance);
 			gurmukhi.Sort (UCAComparer.Instance);
 			gujarati.Sort (UCAComparer.Instance);
 			georgian.Sort (UCAComparer.Instance);
 			thaana.Sort (UCAComparer.Instance);
+			orderedCyrillic = (char []) cyrillic.ToArray (typeof (char));
 			orderedGurmukhi = (char []) gurmukhi.ToArray (typeof (char));
 			orderedGujarati = (char []) gujarati.ToArray (typeof (char));
 			orderedGeorgian = (char []) georgian.ToArray (typeof (char));
@@ -1381,37 +1096,26 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 
 		void ParseJISOrder (string filename)
 		{
-			int line = 1;
-			try {
-				using (StreamReader file =
-					new StreamReader (filename)) {
-					for (;file.Peek () >= 0; line++)
-						ProcessJISOrderLine (file.ReadLine ());
+			using (StreamReader file =
+				new StreamReader (filename)) {
+				while (file.Peek () >= 0) {
+					string s = file.ReadLine ();
+					int idx = s.IndexOf ('#');
+					if (idx >= 0)
+						s = s.Substring (0, idx).Trim ();
+					if (s.Length == 0)
+						continue;
+					idx = s.IndexOf (' ');
+					if (idx < 0)
+						continue;
+					// They start with "0x" so cut them out.
+					int jis = int.Parse (s.Substring (2, idx), NumberStyles.HexNumber);
+					int cp = int.Parse (s.Substring (idx + 3).Trim (), NumberStyles.HexNumber);
+					jisJapanese.Add (new JISCharacter (cp, jis));
 				}
-			} catch (Exception) {
-				Console.Error.WriteLine ("---- line {0}", line);
-				throw;
 			}
 		}
 
-		char [] ws = new char [] {'\t', ' '};
-
-		void ProcessJISOrderLine (string s)
-		{
-			int idx = s.IndexOf ('#');
-			if (idx >= 0)
-				s = s.Substring (0, idx).Trim ();
-			if (s.Length == 0)
-				return;
-			idx = s.IndexOfAny (ws);
-			if (idx < 0)
-				return;
-			// They start with "0x" so cut them out.
-			int jis = int.Parse (s.Substring (2, idx - 2), NumberStyles.HexNumber);
-			int cp = int.Parse (s.Substring (idx).Trim ().Substring (2), NumberStyles.HexNumber);
-			jisJapanese.Add (new JISCharacter (cp, jis));
-		}
-
 		void ParseCJK (string zhXML, string jaXML, string koXML)
 		{
 			XmlDocument doc = new XmlDocument ();
@@ -1425,7 +1129,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			// Chinese Simplified
 			category = "chs";
 			arr = cjkCHS;
-			offset = 0;//char.MaxValue - arr.Length;
+			offset = char.MaxValue - arr.Length;
 			doc.Load (zhXML);
 			s = doc.SelectSingleNode ("/ldml/collations/collation[@type='pinyin']/rules/pc").InnerText;
 			v = 0x8008;
@@ -1442,7 +1146,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			// Chinese Traditional
 			category = "cht";
 			arr = cjkCHT;
-			offset = 0;//char.MaxValue - arr.Length;
+			offset = char.MaxValue - arr.Length;
 			s = doc.SelectSingleNode ("/ldml/collations/collation[@type='stroke']/rules/pc").InnerText;
 			v = 0x8002;
 			foreach (char c in s) {
@@ -1458,56 +1162,17 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			// Japanese
 			category = "ja";
 			arr = cjkJA;
-			offset = 0;//char.MaxValue - arr.Length;
-
-			// SPECIAL CASES
-			arr [0x4EDD] = 0x8002; // Chinese repetition mark?
-			arr [0x337B] = 0x8004; // Those 4 characters are Gengou
-			arr [0x337E] = 0x8005;
-			arr [0x337D] = 0x8006;
-			arr [0x337C] = 0x8007;
-
+			offset = char.MaxValue - arr.Length;
+			doc.Load (jaXML);
+			s = doc.SelectSingleNode ("/ldml/collations/collation/rules/pc").InnerText;
 			v = 0x8008;
-			foreach (JISCharacter jc in jisJapanese) {
-				if (jc.JIS < 0x8800)
-					continue;
-				char c = (char) jc.CP;
-
+			foreach (char c in s) {
 				if (c < '\u4E00')
-					// Console.Error.WriteLine ("---- warning: for {0} {1:X04} is omitted which should be {2:X04}", category, (int) c, v);
-					continue;
+					Console.Error.WriteLine ("---- warning: for {0} {1:X04} is omitted which should be {2:X04}", category, (int) c, v);
 				else {
 					arr [(int) c - offset] = (ushort) v++;
 					if (v % 256 == 0)
 						v += 2;
-
-					// SPECIAL CASES:
-					if (c == '\u662D') // U+337C
-						continue;
-					if (c == '\u5927') // U+337D
-						continue;
-					if (c == '\u5E73') // U+337B
-						continue;
-					if (c == '\u660E') // U+337E
-						continue;
-					if (c == '\u9686') // U+F9DC
-						continue;
-
-					// FIXME: there are still remaining
-					// characters after U+FA0C.
-//					for (int k = 0; k < char.MaxValue; k++) {
-					for (int k = 0; k < '\uFA0D'; k++) {
-						if (decompIndex [k] == 0 || IsIgnorable (k))
-							continue;
-						if (decompValues [decompIndex [k]] == c /*&&
-							decompLength [k] == 1*/ ||
-							decompLength [k] == 3 &&
-							decompValues [decompIndex [k] + 1] == c) {
-							arr [k - offset] = (ushort) v++;
-							if (v % 256 == 0)
-								v += 2;
-						}
-					}
 				}
 			}
 
@@ -1523,7 +1188,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			//
 			category = "ko";
 			arr = cjkKO;
-			offset = 0;//char.MaxValue - arr.Length;
+			offset = char.MaxValue - arr.Length;
 			doc.Load (koXML);
 			foreach (XmlElement reset in doc.SelectNodes ("/ldml/collations/collation/rules/reset")) {
 				XmlElement sc = (XmlElement) reset.NextSibling;
@@ -1563,42 +1228,6 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			}
 		}
 
-		void ModifyUnidata ()
-		{
-			// Modify some decomposition equivalence
-			decompType [0xFE31] = 0;
-			decompIndex [0xFE31] = 0;
-			decompLength [0xFE31] = 0;
-			decompType [0xFE32] = 0;
-			decompIndex [0xFE32] = 0;
-			decompLength [0xFE32] = 0;
-
-			// Korean parens numbers
-			for (int i = 0x3200; i <= 0x321C; i++)
-				diacritical [i] = 0xA;
-			for (int i = 0x3260; i <= 0x327B; i++)
-				diacritical [i] = 0xC;
-
-			// LAMESPEC: these remapping should not be done.
-			// Windows have incorrect CJK compat mappings.
-			decompValues [decompIndex [0x32A9]] = 0x91AB;
-			decompLength [0x323B] = 1;
-			decompValues [decompIndex [0x323B]] = 0x5B78;
-			decompValues [decompIndex [0x32AB]] = 0x5B78;
-			decompValues [decompIndex [0x32A2]] = 0x5BEB;
-			decompLength [0x3238] = 1;
-			decompValues [decompIndex [0x3238]] = 0x52DE;
-			decompValues [decompIndex [0x3298]] = 0x52DE;
-
-			// LAMESPEC: custom remapping (which is not bugs but not fine, non-standard compliant things)
-			decompIndex [0xFA0C] = decompIndex [0xF929]; // borrow U+F929 room (being empty)
-			decompValues [decompIndex [0xFA0C]] = 0x5140;
-			decompLength [0xFA0C] = 1;
-			decompIndex [0xF929] = decompLength [0xF929] = 0;
-
-			decompValues [decompIndex [0xF92C]] = 0x90DE;
-		}
-
 		void ModifyParsedValues ()
 		{
 			// number, secondary weights
@@ -1609,6 +1238,12 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 					if (Char.IsNumber ((char) cp))
 						diacritical [cp] = weight;
 
+			// Korean parens numbers
+			for (int i = 0x3200; i <= 0x321C; i++)
+				diacritical [i] = 0xA;
+			for (int i = 0x3260; i <= 0x327B; i++)
+				diacritical [i] = 0xC;
+
 			// Update name part of named characters
 			for (int i = 0; i < sortableCharNames.Count; i++) {
 				DictionaryEntry de =
@@ -1675,26 +1310,16 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 
 			// Hyphen/Dash : 06 81 - 06 90
 			for (int i = 0; i < char.MaxValue; i++) {
-				if (!IsIgnorable (i) &&
-					Char.GetUnicodeCategory ((char) i) ==
-					UnicodeCategory.DashPunctuation) {
-					AddCharMapGroup2 ((char) i, 6, 1, 0);
-					if (i == 0x2011) {
-						// SPECIAL: add 2027 and 2043
-						// Maybe they are regarded the 
-						// same hyphens in "central"
-						// position.
-						AddCharMap ('\u2027', 6, 1);
-						AddCharMap ('\u2043', 6, 1);
-					}
-				}
+				if (Char.GetUnicodeCategory ((char) i)
+					== UnicodeCategory.DashPunctuation)
+					AddCharMapGroupTail ((char) i, 6, 1);
 			}
 
 			// Arabic variable weight chars 06 A0 -
 			fillIndex [6] = 0xA0;
 			// vowels
 			for (int i = 0x64B; i <= 0x650; i++)
-				AddArabicCharMap ((char) i);
+				AddCharMapGroupTail ((char) i, 6, 1);
 			// sukun
 			AddCharMapGroup ('\u0652', 6, 1, 0);
 			// shadda
@@ -1738,67 +1363,12 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 				if (!IsIgnorable (i))
 					AddCharMap ((char) i, 0x1, 1);
 
-			// FIXME: needs more love here (it should eliminate
-			// all the hacky code above).
-			for (int i = 0x0300; i < 0x0370; i++)
-				if (!IsIgnorable (i) && diacritical [i] != 0
-					/* especiall here*/ && !map [i].Defined)
-					map [i] = new CharMapEntry (
-						0x1, 0x1, diacritical [i]);
-
-			// Cyrillic and Armenian nonspacing mark
-			fillIndex [0x1] = 0x94;
-			for (int i = 0x400; i < 0x580; i++)
-				if (!IsIgnorable (i) &&
-					Char.GetUnicodeCategory ((char) i) ==
-					UnicodeCategory.NonSpacingMark)
-					AddCharMap ((char) i, 1, 1);
-
-			fillIndex [0x1] = 0x8D;
-			// syriac dotted nonspacing marks (1)
-			AddCharMap ('\u0740', 0x1, 1);
-			AddCharMap ('\u0741', 0x1, 1);
-			AddCharMap ('\u0742', 0x1, 1);
-			// syriac oblique nonspacing marks
-			AddCharMap ('\u0747', 0x1, 1);
-			AddCharMap ('\u0748', 0x1, 1);
-			// syriac dotted nonspacing marks (2)
-			fillIndex [0x1] = 0x94; // this reset is mandatory
-			AddCharMap ('\u0732', 0x1, 1);
-			AddCharMap ('\u0735', 0x1, 1);
-			AddCharMap ('\u0738', 0x1, 1);
-			AddCharMap ('\u0739', 0x1, 1);
-			AddCharMap ('\u073C', 0x1, 1);
-			// SPECIAL CASES: superscripts
-			AddCharMap ('\u073F', 0x1, 1);
-			AddCharMap ('\u0711', 0x1, 1);
-			// syriac "DOTS"
-			for (int i = 0x0743; i <= 0x0746; i++)
-				AddCharMap ((char) i, 0x1, 1);
-			for (int i = 0x0730; i <= 0x0780; i++)
-				if (!map [i].Defined &&
-					Char.GetUnicodeCategory ((char) i) ==
-					UnicodeCategory.NonSpacingMark)
-					AddCharMap ((char) i, 0x1, 1);
-
 			// LAMESPEC: It should not stop at '\u20E1'. There are
 			// a few more characters (that however results in 
 			// overflow of level 2 unless we start before 0xDD).
-			fillIndex [0x1] = 0xDD;
+			fillIndex [0x1] = 0xDC;
 			for (int i = 0x20d0; i <= 0x20e1; i++)
 				AddCharMap ((char) i, 0x1, 1);
-
-			// They are not part of Nonspacing marks, but have
-			// only diacritical weight.
-			for (int i = 0x3099; i <= 0x309C; i++)
-				map [i] = new CharMapEntry (1, 1, 1);
-			map [0xFF9E] = new CharMapEntry (1, 1, 1);
-			map [0xFF9F] = new CharMapEntry (1, 1, 2);
-			map [0x309D] = new CharMapEntry (0xFF, 0xFF, 1);
-			map [0x309E] = new CharMapEntry (0xFF, 0xFF, 1);
-			for (int i = 0x30FC; i <= 0x30FE; i++)
-				map [i] = new CharMapEntry (0xFF, 0xFF, 1);
-
 			#endregion
 
 
@@ -1823,7 +1393,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			AddCharMap ('\u2423', 0x7, 1, 0); // open box
 			#endregion
 
-			// category 09 - continued symbols from 08
+			// FIXME: 09 should be more complete.
 			fillIndex [0x9] = 2;
 			// misc tech mark
 			for (int cp = 0x2300; cp <= 0x237A; cp++)
@@ -1846,17 +1416,12 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 				boxLv2 [i] = 3;
 			foreach (DictionaryEntry de in boxValues) {
 				int cp = (int) de.Key;
-				int off = (int) de.Value;
+				int idx = (int) de.Value;
 				if (map [cp].Defined)
 					continue;
-				if (off < 0) {
-					fillIndex [0x9] = (byte) (0xE5 + off);
-					AddCharMapGroup ((char) cp, 0x9, 0, boxLv2 [-off]++);
-				}
-				else {
-					fillIndex [0x9] = (byte) (0xE5 + off);
-					AddCharMapGroup ((char) cp, 0x9, 0, boxLv2 [off]++);
-				}
+				fillIndex [0x9] = (byte) (0xE5 + idx);
+				AddCharMapGroup ((char) cp, 0x9, 0, boxLv2 [idx]);
+				boxLv2 [idx]++;
 			}
 			// Some special characters (slanted)
 			fillIndex [0x9] = 0xF4;
@@ -1872,8 +1437,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 				uc = Char.GetUnicodeCategory ((char) cp);
 				if (!IsIgnorable (cp) &&
 					uc == UnicodeCategory.CurrencySymbol &&
-					cp != '$' ||
-					cp == 0xAC)
+					cp != '$')
 					AddCharMapGroup ((char) cp, 0xA, 1, 0);
 			}
 			// byte other symbols
@@ -1882,24 +1446,10 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 					continue; // SPECIAL: skip FIXME: why?
 				uc = Char.GetUnicodeCategory ((char) cp);
 				if (!IsIgnorable (cp) &&
-					uc == UnicodeCategory.OtherSymbol ||
-					cp == '\u00B5' || cp == '\u00B7')
+					uc == UnicodeCategory.OtherSymbol)
 					AddCharMapGroup ((char) cp, 0xA, 1, 0);
 			}
-			// U+30FB here
-			AddCharMapGroup ('\u30FB', 0xA, 1, 0);
 
-			for (int cp = 0x2020; cp <= 0x2031; cp++)
-				if (Char.IsPunctuation ((char) cp))
-					AddCharMap ((char) cp, 0xA, 1, 0);
-			// SPECIAL CASES: why?
-			AddCharMap ('\u203B', 0xA, 1, 0);
-			AddCharMap ('\u2040', 0xA, 1, 0);
-			AddCharMap ('\u2041', 0xA, 1, 0);
-			AddCharMap ('\u2042', 0xA, 1, 0);
-
-			for (int cp = 0x20A0; cp <= 0x20AB; cp++)
-				AddCharMap ((char) cp, 0xA, 1, 0);
 			fillIndex [0xA] = 0x2F; // FIXME: it won't be needed
 			for (int cp = 0x2600; cp <= 0x2613; cp++)
 				AddCharMap ((char) cp, 0xA, 1, 0);
@@ -1951,18 +1501,14 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 						fillIndex [0xC]++;
 
 					int xcp;
-					if (currValue <= 10) {
-						xcp = (int) prevValue + 0x2170 - 1;
-						AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
-						xcp = (int) prevValue + 0x2160 - 1;
-						AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
-						fillIndex [0xC] += 2;
-						xcp = (int) prevValue + 0x3021 - 1;
-						AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
-						fillIndex [0xC]++;
-					}
-					else if (currValue == 11)
-						fillIndex [0xC]++;
+					xcp = (int) prevValue + 0x2170 - 1;
+					AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
+					xcp = (int) prevValue + 0x2160 - 1;
+					AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
+					fillIndex [0xC] += 2;
+					xcp = (int) prevValue + 0x3021 - 1;
+					AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
+					fillIndex [0xC]++;
 				}
 				if (prevValue < currValue)
 					prevValue = currValue;
@@ -1980,23 +1526,23 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 				else if (cp == 0x3021) // FIXME: why?
 					fillIndex [0xC]++;
 				AddCharMapGroup ((char) cp, 0xC, 0, diacritical [cp]);
+
 				if (addnew || cp <= '9') {
-					int mod = (int) currValue - 1;
 					int xcp;
 					if (1 <= currValue && currValue <= 10) {
-						xcp = mod + 0x2776;
+						xcp = cp - 0x31 + 0x2776;
 						AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
-						xcp = mod + 0x2780;
+						xcp = cp - 0x31 + 0x2780;
 						AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
-						xcp = mod + 0x278A;
+						xcp = cp - 0x31 + 0x278A;
 						AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
 					}
 					if (1 <= currValue && currValue <= 20) {
-						xcp = mod + 0x2460;
+						xcp = cp - 0x31 + 0x2460;
 						AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
-						xcp = mod + 0x2474;
+						xcp = cp - 0x31 + 0x2474;
 						AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
-						xcp = mod + 0x2488;
+						xcp = cp - 0x31 + 0x2488;
 						AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
 					}
 				}
@@ -2026,6 +1572,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			for (int i = 0; i < alphabets.Length; i++)
 				AddAlphaMap (alphabets [i], 0xE, alphaWeights [i]);
 
+
 			// non-ASCII Latin alphabets
 			// FIXME: there is no such characters that are placed
 			// *after* "alphabets" array items. This is nothing
@@ -2045,9 +1592,6 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 				//   but inside a-to-z range.
 				// 3.there are some expanded characters that
 				//   are not part of Unicode Standard NFKD.
-				// 4. some characters are letter in IsLetter
-				//   but not in sortkeys (maybe unicode version
-				//   difference caused it).
 				switch (i) {
 				// 1. skipping them does not make sense
 //				case 0xD0: case 0xF0: case 0x131: case 0x138:
@@ -2065,12 +1609,11 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 				case 0xFE: // Icelandic Thorn
 				case 0xDF: // German ss
 				case 0xFF: // German ss
-				// 4.
-				case 0x1C0: case 0x1C1: case 0x1C2: case 0x1C3:
 				// not classified yet
 //				case 0x1A6: case 0x1A7: case 0x1A8: case 0x1A9:
 //				case 0x1AA: case 0x1B1: case 0x1B7: case 0x1B8:
 //				case 0x1B9: case 0x1BA: case 0x1BB: case 0x1BF:
+//				case 0x1C0: case 0x1C1: case 0x1C2: case 0x1C3:
 //				case 0x1DD:
 					continue;
 				}
@@ -2091,82 +1634,17 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 				if (Char.IsLetter ((char) i))
 					AddLetterMap ((char) i, 0xF, 1);
 
-			// Cyrillic.
-			// Cyrillic letters are sorted like Latin letters i.e. 
-			// containing culture-specific letters between the
-			// standard Cyrillic sequence.
-			//
-			// We can't use UCA here; it has different sorting.
-			char [] orderedCyrillic = new char [] {
-				'\u0430', '\u0431', '\u0432', '\u0433', '\u0434',
-				'\u0452', // DJE for Serbocroatian
-				'\u0435',
-				'\u0454', // IE for Ukrainian
-				'\u0436', '\u0437',
-				'\u0455', // DZE
-				'\u0438',
-				'\u0456', // Byelorussian-Ukrainian I
-				'\u0457', // YI
-				'\u0439',
-				'\u0458', // JE
-				'\u043A', '\u043B',
-				'\u0459', // LJE
-				'\u043C', '\u043D',
-				'\u045A', // NJE
-				'\u043E',
-				// 4E9 goes here.
-				'\u043F', '\u0440', '\u0441', '\u0442',
-				'\u045B', // TSHE for Serbocroatian
-				'\u0443',
-				'\u045E', // Short U for Byelorussian
-				'\u04B1', // Straight U w/ stroke (diacritical!)
-				'\u0444', '\u0445', '\u0446', '\u0447',
-				'\u045F', // DZHE
-				'\u0448', '\u0449', '\u044A', '\u044B', '\u044C',
-				'\u044D', '\u044E', '\u044F'};
-
-			// For some characters here is a map to basic cyrillic
-			// letters. See UnicodeData.txt character names for
-			// the sources. Here I simply declare an equiv. array.
-			// The content characters are map from U+490(,491),
-			// skipping small letters.
-			char [] cymap_src = new char [] {
-				'\u0433', '\u0433', '\u0433', '\u0436',
-				'\u0437', '\u043A', '\u043A', '\u043A',
-				'\u043A', '\u043D', '\u043D', '\u043F',
-				'\u0445', '\u0441', '\u0442', '\u0443',
-				'\u0443', '\u0445', '\u0446', '\u0447',
-				'\u0447', '\u0432', '\u0435', '\u0435',
-				'\u0406', '\u0436', '\u043A', '\u043D',
-				'\u0447', '\u0435'};
-
-			fillIndex [0x10] = 0x8D;
-			for (int i = 0x0460; i < 0x0481; i++) {
-				if (Char.IsLetter ((char) i)) {
-					if (i == 0x0476)
-						// U+476/477 have the same
-						// primary weight as U+474/475.
-						fillIndex [0x10] -= 3;
-					AddLetterMap ((char) i, 0x10, 3);
-				}
-			}
-
-			fillIndex [0x10] = 0x6;
+			// Cyrillic - UCA order w/ some modification
+			fillIndex [0x10] = 0x3;
+			// table which is moslty from UCA DUCET.
 			for (int i = 0; i < orderedCyrillic.Length; i++) {
-				char c = Char.ToUpper (orderedCyrillic [i], CultureInfo.InvariantCulture);
-				if (!IsIgnorable ((int) c) &&
-					Char.IsLetter (c) &&
-					!map [c].Defined) {
-					AddLetterMap (c, 0x10, 0);
-					fillIndex [0x10] += 3;
-				}
+				char c = orderedCyrillic [i];
+				if (Char.IsLetter (c))
+					AddLetterMap (c, 0x10, 3);
 			}
-
-			for (int i = 0; i < cymap_src.Length; i++) {
-				char c = cymap_src [i];
-				fillIndex [0x10] = map [c].Level1;
-				AddLetterMap ((char) (0x0490 + i * 2),
-					0x10, 0);
+			for (int i = 0x0460; i < 0x0481; i++) {
+				if (Char.IsLetter ((char) i))
+					AddLetterMap ((char) i, 0x10, 3);
 			}
 
 			// Armenian
@@ -2177,18 +1655,15 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 
 			// Hebrew
 			// -Letters
-			fillIndex [0x12] = 0x2;
+			fillIndex [0x12] = 0x3;
 			for (int i = 0x05D0; i < 0x05FF; i++)
 				if (Char.IsLetter ((char) i))
 					AddLetterMap ((char) i, 0x12, 1);
 			// -Accents
 			fillIndex [0x1] = 0x3;
-			for (int i = 0x0591; i <= 0x05C2; i++) {
-				if (i == 0x05A3 || i == 0x05BB)
-					fillIndex [0x1]++;
+			for (int i = 0x0591; i <= 0x05C2; i++)
 				if (i != 0x05BE)
 					AddCharMap ((char) i, 0x1, 1);
-			}
 
 			// Arabic
 			fillIndex [0x1] = 0x8E;
@@ -2206,18 +1681,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 //					(byte) arabicLetterPrimaryValues [i], 1);
 				fillIndex [0x13] = 
 					(byte) arabicLetterPrimaryValues [i];
-				byte formDiacritical = 8; // default
-				// SPECIAL CASES:
-				switch (i) {
-				case 0x0622: formDiacritical = 9; break;
-				case 0x0623: formDiacritical = 0xA; break;
-				case 0x0624: formDiacritical = 5; break;
-				case 0x0625: formDiacritical = 0xB; break;
-				case 0x0626: formDiacritical = 7; break;
-				case 0x0649: formDiacritical = 5; break;
-				case 0x064A: formDiacritical = 7; break;
-				}
-				AddLetterMapCore ((char) i, 0x13, 1, formDiacritical);
+				AddLetterMap ((char) i, 0x13, 0);
 			}
 			fillIndex [0x13] = 0x84;
 			for (int i = 0x0674; i < 0x06D6; i++)
@@ -2231,26 +1695,10 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 				if (!IsIgnorable (i))
 					AddLetterMap ((char) i, 0x14, 2);
 			fillIndex [0x14] = 0xB;
-			for (int i = 0x0905; i < 0x093A; i++) {
-				if (i == 0x0928)
-					AddCharMap ('\u0929', 0x14, 0, 8);
-				if (i == 0x0930)
-					AddCharMap ('\u0931', 0x14, 0, 8);
-				if (i == 0x0933)
-					AddCharMap ('\u0934', 0x14, 0, 8);
+			for (int i = 0x0905; i < 0x093A; i++)
 				if (Char.IsLetter ((char) i))
 					AddLetterMap ((char) i, 0x14, 4);
-				if (i == 0x090B)
-					AddCharMap ('\u0960', 0x14, 4);
-				if (i == 0x090C)
-					AddCharMap ('\u0961', 0x14, 4);
-			}
-			fillIndex [0x14] = 0xDA;
-			for (int i = 0x093E; i < 0x0945; i++)
-				if (!IsIgnorable (i))
-					AddLetterMap ((char) i, 0x14, 2);
-			fillIndex [0x14] = 0xEC;
-			for (int i = 0x0945; i < 0x094F; i++)
+			for (int i = 0x093E; i < 0x094F; i++)
 				if (!IsIgnorable (i))
 					AddLetterMap ((char) i, 0x14, 2);
 
@@ -2279,81 +1727,33 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 
 			// Gurmukhi. orderedGurmukhi is from UCA
 			// FIXME: it does not look equivalent to UCA.
-			fillIndex [0x16] = 04;
-			fillIndex [0x1] = 3;
+			fillIndex [0x1] = 03;
+			fillIndex [0x16] = 02;
 			for (int i = 0; i < orderedGurmukhi.Length; i++) {
 				char c = orderedGurmukhi [i];
 				if (IsIgnorable ((int) c))
 					continue;
-				if (IsIgnorableNonSpacing (c)) {
+				if (!Char.IsLetter (c)) {
 					AddLetterMap (c, 0x1, 1);
 					continue;
 				}
 				if (c == '\u0A3C' || c == '\u0A4D' ||
 					'\u0A66' <= c && c <= '\u0A71')
 					continue;
-				// SPECIAL CASE: U+A38 = U+A36 at primary level (why?)
-				byte shift = 4;
-				if (c == '\u0A36' || c == '\u0A16' || c == '\u0A17' || c == '\u0A5B' || c == '\u0A5E')
-					shift = 0;
-				AddLetterMap (c, 0x16, shift);
+				AddLetterMap (c, 0x16, 4);
 			}
 
 			// Gujarati. orderedGujarati is from UCA
-			fillIndex [0x17] = 0x4;
-			// nonspacing marks
-			map [0x0A4D] = new CharMapEntry (1, 0, 0x3);
-			map [0x0ABD] = new CharMapEntry (1, 0, 0x3);
-			map [0x0A3C] = new CharMapEntry (1, 0, 0x4);
-			map [0x0A71] = new CharMapEntry (1, 0, 0x6);
-			map [0x0ABC] = new CharMapEntry (1, 0, 0xB);
-			map [0x0A70] = new CharMapEntry (1, 0, 0xE);
-			// letters go first.
-			for (int i = 0; i < orderedGujarati.Length; i++) {
-				// SPECIAL CASE
-				char c = orderedGujarati [i];
-				if (Char.IsLetter (c)) {
-					// SPECIAL CASES
-					if (c == '\u0AB3' || c == '\u0A32')
-						continue;
-					if (c == '\u0A33') {
-						AddCharMap ('\u0A32', 0x17, 0);
-						AddCharMap ('\u0A33', 0x17, 4, 4);
-						continue;
-					}
-					if (c == '\u0A8B')
-						AddCharMap ('\u0AE0', 0x17, 0, 5);
-					AddCharMap (c, 0x17, 4);
-
-					if (c == '\u0AB9')
-						AddCharMap ('\u0AB3', 0x17, 6);
-				}
-			}
-			// non-letters
-			byte gujaratiShift = 4;
-			fillIndex [0x17] = 0xC0;
-			for (int i = 0; i < orderedGujarati.Length; i++) {
-				char c = orderedGujarati [i];
-				if (fillIndex [0x17] == 0xCC)
-					gujaratiShift = 3;
-				if (!Char.IsLetter (c)) {
-					// SPECIAL CASES
-					if (c == '\u0A82')
-						AddCharMap ('\u0A81', 0x17, 2);
-					if (c == '\u0AC2')
-						fillIndex [0x17]++;
-					AddLetterMap (c, 0x17, gujaratiShift);
-				}
-			}
+			fillIndex [0x17] = 02;
+			for (int i = 0; i < orderedGujarati.Length; i++)
+				AddLetterMap (orderedGujarati [i], 0x17, 4);
 
 			// Oriya
-			fillIndex [0x1] = 03;
 			fillIndex [0x18] = 02;
 			for (int i = 0x0B00; i < 0x0B7F; i++) {
 				switch (Char.GetUnicodeCategory ((char) i)) {
 				case UnicodeCategory.NonSpacingMark:
 				case UnicodeCategory.DecimalDigitNumber:
-					AddLetterMap ((char) i, 0x1, 1);
 					continue;
 				}
 				AddLetterMap ((char) i, 0x18, 1);
@@ -2364,11 +1764,13 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			AddCharMap ('\u0BD7', 0x19, 0);
 			fillIndex [0x19] = 0xA;
 			// vowels
-			for (int i = 0x0B82; i <= 0x0B94; i++)
-				if (!IsIgnorable ((char) i))
+			for (int i = 0x0BD7; i < 0x0B94; i++)
+				if (Char.IsLetter ((char) i))
 					AddCharMap ((char) i, 0x19, 2);
 			// special vowel
-			fillIndex [0x19] = 0x28;
+			fillIndex [0x19] = 0x24;
+			AddCharMap ('\u0B94', 0x19, 0);
+			fillIndex [0x19] = 0x26;
 			// The array for Tamil consonants is a constant.
 			// Windows have almost similar sequence to TAM from
 			// tamilnet but a bit different in Grantha.
@@ -2400,82 +1802,47 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			for (int i = 0x0C80; i < 0x0CE5; i++) {
 				if (i == 0x0CD5 || i == 0x0CD6)
 					continue; // ignore
-				if (i == 0x0CB1 || i == 0x0CB3 || i == 0x0CDE)
-					continue; // shift after 0xCB9
 				AddCharMap ((char) i, 0x1B, 3);
-				if (i == 0x0CB9) {
-					// SPECIAL CASES: but why?
-					AddCharMap ('\u0CB1', 0x1B, 3); // RRA
-					AddCharMap ('\u0CB3', 0x1B, 3); // LLA
-					AddCharMap ('\u0CDE', 0x1B, 3); // FA
-				}
-				if (i == 0x0CB2)
-					AddCharMap ('\u0CE1', 0x1B, 3); // vocalic LL
 			}
 			
 			// Malayalam
 			fillIndex [0x1C] = 2;
-			fillIndex [0x1] = 3;
-			for (int i = 0x0D02; i < 0x0D61; i++) {
+			for (int i = 0x0D02; i < 0x0D61; i++)
 				// FIXME: I avoided MSCompatUnicodeTable usage
 				// here (it results in recursion). So check if
 				// using NonSpacingMark makes sense or not.
 				if (Char.GetUnicodeCategory ((char) i) != UnicodeCategory.NonSpacingMark)
 //				if (!MSCompatUnicodeTable.IsIgnorable ((char) i))
 					AddCharMap ((char) i, 0x1C, 1);
-				else if (!IsIgnorable ((char) i))
-					AddCharMap ((char) i, 1, 1);
-			}
 
 			// Thai ... note that it breaks 0x1E wall after E2B!
 			// Also, all Thai characters have level 2 value 3.
 			fillIndex [0x1E] = 2;
-			fillIndex [0x1] = 3;
-			for (int i = 0xE40; i <= 0xE44; i++)
+			for (int i = 0xE44; i < 0xE48; i++)
 				AddCharMap ((char) i, 0x1E, 1, 3);
 			for (int i = 0xE01; i < 0xE2B; i++)
-				AddCharMap ((char) i, 0x1E, 6, 3);
+				AddCharMap ((char) i, 0x1E, 6, 0);
 			fillIndex [0x1F] = 5;
 			for (int i = 0xE2B; i < 0xE30; i++)
-				AddCharMap ((char) i, 0x1F, 6, 3);
-			fillIndex [0x1F] = 0x1E;
+				AddCharMap ((char) i, 0x1F, 6, 0);
 			for (int i = 0xE30; i < 0xE3B; i++)
 				AddCharMap ((char) i, 0x1F, 1, 3);
 			// some Thai characters remains.
 			char [] specialThai = new char [] {'\u0E45', '\u0E46',
 				'\u0E4E', '\u0E4F', '\u0E5A', '\u0E5B'};
 			foreach (char c in specialThai)
-				AddCharMap (c, 0x1F, 1, 3);
-
-			for (int i = 0xE00; i < 0xE80; i++)
-				if (Char.GetUnicodeCategory ((char) i) ==
-					UnicodeCategory.NonSpacingMark)
-					AddCharMap ((char) i, 1, 1);
+				AddCharMap (c, 0x1F, 1);
 
 			// Lao
 			fillIndex [0x1F] = 2;
-			fillIndex [0x1] = 3;
-			for (int i = 0xE80; i < 0xEDF; i++) {
-				if (IsIgnorable ((char) i))
-					continue;
-				else if (Char.IsLetter ((char) i))
+			for (int i = 0xE80; i < 0xEDF; i++)
+				if (Char.IsLetter ((char) i))
 					AddCharMap ((char) i, 0x1F, 1);
-				else if (Char.GetUnicodeCategory ((char) i) ==
-					UnicodeCategory.NonSpacingMark)
-					AddCharMap ((char) i, 1, 1);
-			}
 
 			// Georgian. orderedGeorgian is from UCA DUCET.
 			fillIndex [0x21] = 5;
-			for (int i = 0; i < orderedGeorgian.Length; i++) {
-				char c = orderedGeorgian [i];
-				if (map [(int) c].Defined)
-					continue;
-				AddCharMap (c, 0x21, 0);
-				if (c < '\u10F6')
-					AddCharMap ((char) (c - 0x30), 0x21, 0);
-				fillIndex [0x21] += 5;
-			}
+			for (int i = 0; i < orderedGeorgian.Length; i++)
+				AddLetterMap (orderedGeorgian [i], 0x21, 5);
 
 			// Japanese Kana.
 			fillIndex [0x22] = 2;
@@ -2500,16 +1867,6 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 						AddKanaMap (cp, kanaLines [gyo]);
 					fillIndex [0x22]++;
 
-					if (cp == 0x30AB) {
-						// add small 'ka' (before normal one)
-						AddKanaMap (0x30F5, 1);
-						kanaOffset++;
-					}
-					if (cp == 0x30B1) {
-						// add small 'ke' (before normal one)
-						AddKanaMap (0x30F6, 1);
-						kanaOffset++;
-					}
 					if (cp == 0x3061) {
 						// add small 'Tsu' (before normal one)
 						AddKanaMap (0x3063, 1);
@@ -2540,27 +1897,11 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			AddLetterMap ((char) 0x3093, 0x22, 0);
 			AddLetterMap ((char) (0x3093 + 0x60), 0x22, 0);
 
-			map [0x3094] = new CharMapEntry (map [0x30A6].Category,
-				map [0x30A6].Level1, 3);// voiced hiragana U
-			map [0x30F4] = new CharMapEntry (map [0x30A6].Category,
-				map [0x30A6].Level1, 3);// voiced katakana U
-
-			map [0x30F5] = new CharMapEntry (map [0x30AB].Category,
-				map [0x30AB].Level1, 0);// small katakana Ka
-			map [0x30F6] = new CharMapEntry (map [0x30B1].Category,
-				map [0x30B1].Level1, 0);// small katakana Ke
-			// voiced Wa lines
-			for (int i = 0x30F7; i < 0x30FB; i++)
-				map [i] = new CharMapEntry (map [i - 8].Category,
-					map [i - 8].Level1,
-					3);
-
 			// JIS Japanese square chars.
 			fillIndex [0x22] = 0x97;
 			jisJapanese.Sort (JISComparer.Instance);
 			foreach (JISCharacter j in jisJapanese)
-				if (0x3300 <= j.CP && j.CP <= 0x3357)
-					AddCharMap ((char) j.CP, 0x22, 1);
+				AddCharMap ((char) j.CP, 0x22, 1);
 			// non-JIS Japanese square chars.
 			nonJisJapanese.Sort (NonJISComparer.Instance);
 			foreach (NonJISCharacter j in nonJisJapanese)
@@ -2590,19 +1931,14 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 				map [cp] = new CharMapEntry (0x24,
 					(byte) (map [cp - 1].Level1 + 2),
 					0);
-			// FIXME: Syriac NonSpacingMark should go here.
 
 			// Thaana
 			// FIXME: it turned out that it does not look like UCA
 			fillIndex [0x24] = 0x6E;
-			fillIndex [0x1] = 0xAC;
 			for (int i = 0; i < orderedThaana.Length; i++) {
-				char c = orderedThaana [i];
-				if (IsIgnorableNonSpacing ((int) c))
-					AddCharMap (c, 1, 1);
-				AddCharMap (c, 0x24, 2);
-				if (c == '\u0782') // SPECIAL CASE: why?
-					fillIndex [0x24] += 2;
+				if (IsIgnorableNonSpacing (i))
+					continue;
+				AddCharMap (orderedThaana [i], 0x24, 2);
 			}
 			#endregion
 
@@ -2641,7 +1977,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			+ "<{\u1113 \u1116}, \u3165,"
 				+ "\u11C5, \u11C6=\u3166,, \u11C7, \u11C8,"
 				+ "\u11AC, \u11C9, \u11AD, \u1103=\u11AE  >"
-			+ "<\u1117, \u11CA, \u1104, \u11CB > \u1105=\u11AF >"
+			+ "<\u1117, \u11CA, \u1104, \u11CB > \u1105 >"
 			+ "<{\u1118 \u111B}, \u11B0, [\u11CC \u11D0], \u11B1,"
 				+ "[\u11D1 \u11D2], \u11B2,"
 				+ "[\u11D3 \u11D5], \u11B3,"
@@ -2649,11 +1985,10 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 				+ "\u11B6=\u11D8, \u3140,, \u11D9, \u1106=\u11B7 >"
 			+ "<{\u111C \u111D}, [\u11DA \u11E2], \u1107=\u11B8 >"
 			+ "<{\u111E \u1120}, \u3172,, \u3173, \u11E3, \u1108 >"
-			+ "<{\u1121 \u112C}, \u3144 \u11B9, \u3174, \u3175,,,, "
-				+ "\u3176,, \u3177, [\u11E4 \u11E6] \u3178,"
-				+ "\u3179, \u1109=\u11BA,,, \u3214=\u3274 <>"
-			+ "<{\u112D \u1133}, \u11E7 \u317A, \u317B, \u317C "
-				+ "[\u11E8 \u11E9],, \u11EA \u317D,, \u110A=\u11BB,,, >"
+			+ "<{\u1121 \u112C}, \u11B9,,,,,,,,, [\u11E4 \u11E6],,"
+				+ "\u1109=\u11BA,,, \u3214=\u3274 <>"
+			+ "<{\u112D \u1133}, \u11E7,, [\u11E8 \u11E9],,"
+				+ "\u11EA,, \u110A=\u11BB,,, >"
 			+ "<{\u1134 \u1140}, \u317E,,,,,,, \u11EB,"
 				+ "\u110B=\u11BC, [\u1161 \u11A2], \u1160 >"
 			+ "<{\u1141 \u114C}, \u11EE, \u11EC, \u11ED,,,,, "
@@ -2721,40 +2056,6 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 				}
 			}
 
-			// Some Jamo NFKD.
-			for (int i = 0x3200; i < 0x3300; i++) {
-				if (IsIgnorable (i) || map [i].Defined)
-					continue;
-				int ch = 0;
-				// w/ bracket
-				if (decompLength [i] == 4 &&
-					decompValues [decompIndex [i]] == '(')
-					ch = decompIndex [i] + 1;
-				// circled
-				else if (decompLength [i] == 2 &&
-					decompValues [decompIndex [i] + 1] == '\u1161')
-					ch = decompIndex [i];
-				else if (decompLength [i] == 1)
-					ch = decompIndex [i];
-				else
-					continue;
-				ch = decompValues [ch];
-				if (ch < 0x1100 || 0x1200 < ch &&
-					ch < 0xAC00 || 0xD800 < ch)
-					continue;
-
-				// SPECIAL CASE ?
-				int offset = i < 0x3260 ? 1 : 0;
-				if (0x326E <= i && i <= 0x3273)
-					offset = 1;
-
-				map [i] = new CharMapEntry (map [ch].Category,
-					(byte) (map [ch].Level1 + offset),
-					map [ch].Level2);
-//					Console.Error.WriteLine ("Jamo {0:X04} -> {1:X04}", i, decompValues [decompIndex [i] + 1]);
-			}
-
-
 			#endregion
 
 			// Letterlike characters and CJK compatibility square
@@ -2811,8 +2112,10 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 				// Insert 3001 after ',' and 3002 after '.'
 				if (i == 0x2C)
 					AddCharMapGroup2 ('\u3001', 0x7, 1, 0);
-				else if (i == 0x2E)
+				else if (i == 0x2E) {
+					fillIndex [0x7]--;
 					AddCharMapGroup2 ('\u3002', 0x7, 1, 0);
+				}
 				else if (i == 0x3A)
 					AddCharMap ('\uFE30', 0x7, 1, 0);
 			}
@@ -2823,18 +2126,10 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 				if (IsIgnorable (i))
 					continue;
 
-				// FIXME: actually those reset should not be 
-				// done but here I put for easy goal.
-				if (i == 0x0700)
-					fillIndex [0x7] = 0xE2;
-				if (i == 0x2016)
-					fillIndex [0x7] = 0x77;
-
 				// SPECIAL CASES:
 				switch (i) {
 				case 0xAB: // 08
 				case 0xB7: // 0A
-				case 0xBB: // 08
 				case 0x2329: // 09
 				case 0x232A: // 09
 					continue;
@@ -2848,7 +2143,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 				case UnicodeCategory.FinalQuotePunctuation:
 				case UnicodeCategory.ModifierSymbol:
 					// SPECIAL CASES: // 0xA
-					if (0x2020 <= i && i <= 0x2031)
+					if (0x2020 <= i && i <= 0x2042)
 						continue;
 					AddCharMapGroup ((char) i, 0x7, 1, 0);
 					break;
@@ -2859,15 +2154,20 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 				}
 			}
 			// Control pictures
-			// FIXME: it should not need to reset level 1, but
-			// it's for easy goal.
-			fillIndex [0x7] = 0xB6;
 			for (int i = 0x2400; i <= 0x2421; i++)
 				AddCharMap ((char) i, 0x7, 1, 0);
 			#endregion
 
 			// FIXME: for 07 xx we need more love.
 
+			// FIXME: 08 should be more complete.
+			fillIndex [0x8] = 2;
+			for (int cp = 0; cp < char.MaxValue; cp++)
+				if (!map [cp].Defined &&
+					Char.GetUnicodeCategory ((char) cp) ==
+					UnicodeCategory.MathSymbol)
+					AddCharMapGroup ((char) cp, 0x8, 1, 0);
+
 			// Characters w/ diacritical marks (NFKD)
 			for (int i = 0; i <= char.MaxValue; i++) {
 				if (map [i].Defined || IsIgnorable (i))
@@ -2906,60 +2206,6 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 				
 			}
 
-			// category 08 - symbols
-			fillIndex [0x8] = 2;
-			// Here Windows mapping is not straightforward. It is
-			// not based on computation but seems manual sorting.
-			AddCharMapGroup ('+', 0x8, 1, 0); // plus
-			AddCharMapGroup ('\u2212', 0x8, 1, 0); // minus
-			AddCharMapGroup ('\u229D', 0x8, 1, 0); // minus
-			AddCharMapGroup ('\u2297', 0x8, 1, 0); // mul
-			AddCharMapGroup ('\u2044', 0x8, 1, 0); // div
-			AddCharMapGroup ('\u2215', 0x8, 1, 0); // div
-			AddCharMapGroup ('\u2217', 0x8, 1, 0); // mul
-			AddCharMapGroup ('\u2218', 0x8, 1, 0); // ring
-			AddCharMapGroup ('\u2219', 0x8, 1, 0); // bullet
-			AddCharMapGroup ('\u2213', 0x8, 1, 0); // minus-or-plus
-			AddCharMapGroup ('\u003C', 0x8, 1, 0); // <
-			AddCharMapGroup ('\u227A', 0x8, 1, 0); // precedes relation
-			AddCharMapGroup ('\u22B0', 0x8, 1, 0); // precedes under relation
-
-			for (int cp = 0; cp < 0x2300; cp++) {
-				if (cp == 0xAC) // SPECIAL CASE: skip
-					continue;
-				if (cp == 0x200) {
-					cp = 0x2200; // skip to 2200
-					fillIndex [0x8] = 0x21;
-				}
-				if (cp == 0x2295)
-					fillIndex [0x8] = 0x3;
-				if (cp == 0x22B2)
-					fillIndex [0x8] = 0xB9;
-				if (!map [cp].Defined &&
-//					Char.GetUnicodeCategory ((char) cp) ==
-//					UnicodeCategory.MathSymbol)
-					Char.IsSymbol ((char) cp))
-					AddCharMapGroup ((char) cp, 0x8, 1, diacritical [cp]);
-				// SPECIAL CASES: no idea why Windows sorts as such
-				switch (cp) {
-				case 0x3E:
-					AddCharMap ('\u227B', 0x8, 1, 0);
-					AddCharMap ('\u22B1', 0x8, 1, 0);
-					break;
-				case 0xB1:
-					AddCharMapGroup ('\u00AB', 0x8, 1, 0);
-					AddCharMapGroup ('\u226A', 0x8, 1, 0);
-					AddCharMapGroup ('\u00BB', 0x8, 1, 0);
-					AddCharMapGroup ('\u226B', 0x8, 1, 0);
-					break;
-				case 0xF7:
-					AddCharMap ('\u01C0', 0x8, 1, 0);
-					AddCharMap ('\u01C1', 0x8, 1, 0);
-					AddCharMap ('\u01C2', 0x8, 1, 0);
-					break;
-				}
-			}
-
 			#region Level2 adjustment
 			// Arabic Hamzah
 			diacritical [0x624] = 0x5;
@@ -2970,6 +2216,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			diacritical [0x649] = 0x5; // 'alif maqs.uurah
 			diacritical [0x64A] = 0x7; // Yaa'
 
+
 			for (int i = 0; i < char.MaxValue; i++) {
 				byte mod = 0;
 				byte cat = map [i].Category;
@@ -2979,7 +2226,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 					mod = diacritical [i];
 					break;
 				case 0x13: // Arabic
-					if (diacritical [i] == 0 && i >= 0xFE8D)
+					if (diacritical [i] == 0)
 						mod = 0x8; // default for arabic
 					break;
 				}
@@ -2991,23 +2238,15 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			}
 			#endregion
 
-			// FIXME: this is hack but those NonSpacingMark 
-			// characters and still undefined are likely to
-			// be nonspacing.
+			// FIXME: this is hack but those which are 
+			// NonSpacingMark characters and still undefined
+			// are likely to be nonspacing.
 			for (int i = 0; i < char.MaxValue; i++)
 				if (!map [i].Defined &&
 					!IsIgnorable (i) &&
 					Char.GetUnicodeCategory ((char) i) ==
 					UnicodeCategory.NonSpacingMark)
 					AddCharMap ((char) i, 1, 1);
-
-			// FIXME: this is hack but those Symbol characters
-			// are likely to fall into 0xA category.
-			for (int i = 0; i < char.MaxValue; i++)
-				if (!map [i].Defined &&
-					!IsIgnorable (i) &&
-					Char.IsSymbol ((char) i))
-					AddCharMap ((char) i, 0xA, 1);
 		}
 
 		private void IncrementSequentialIndex (ref byte hangulCat)
@@ -3085,6 +2324,19 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			return true;
 		}
 
+		private void AddCharMapGroupTail (char c, byte category, byte updateCount)
+		{
+			char c2 = ToSmallFormTail (c);
+			if (c2 != c)
+				AddCharMap (c2, category, updateCount, 0);
+			// itself
+			AddCharMap (c, category, updateCount, 0);
+			// <full>
+			c2 = ToFullWidthTail (c);
+			if (c2 != c)
+				AddCharMapGroupTail (c2, category, updateCount);
+		}
+
 		//
 		// Adds characters to table in the order below 
 		// (+ increases weight):
@@ -3160,10 +2412,6 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			AddCharMapCJK (c, ref category);
 
 			// LAMESPEC: see below.
-			if (c == '\u5B78') {
-				AddCharMapCJK ('\u32AB', ref category);
-				AddCharMapCJK ('\u323B', ref category);
-			}
 			if (c == '\u52DE') {
 				AddCharMapCJK ('\u3298', ref category);
 				AddCharMapCJK ('\u3238', ref category);
@@ -3193,8 +2441,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 				// mix Chinise and Japanese Kanji when
 				// ordering those characters.
 				switch (w) {
-				case 0x32A2: case 0x3298: case 0x3238:
-				case 0x32A9: case 0x323B: case 0x32AB:
+				case 0x32A2: case 0x3298: case 0x3238: case 0x32A9:
 					continue;
 				}
 
@@ -3245,26 +2492,14 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 					AddCharMap (vertical, category, updateCount, level2);
 		}
 
-		private void AddArabicCharMap (char c)
+		char ToFullWidth (char c)
 		{
-			byte category = 6;
-			byte updateCount = 1;
-			byte level2 = 0;
-
-			// itself
-			AddCharMap (c, category, 0, level2);
+			return ToDecomposed (c, DecompositionFull, false);
+		}
 
-			// Since nfkdMap is problematic to have two or more
-			// NFKD to an identical character, here I iterate all.
-			for (int c2 = 0; c2 < char.MaxValue; c2++) {
-				if (decompLength [c2] == 0)
-					continue;
-				int idx = decompIndex [c2] + decompLength [c2] - 1;
-				if ((int) (decompValues [idx]) == (int) c)
-					AddCharMap ((char) c2, category,
-						0, level2);
-			}
-			fillIndex [category] += updateCount;
+		char ToFullWidthTail (char c)
+		{
+			return ToDecomposed (c, DecompositionFull, true);
 		}
 
 		char ToSmallForm (char c)
@@ -3272,6 +2507,11 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			return ToDecomposed (c, DecompositionSmall, false);
 		}
 
+		char ToSmallFormTail (char c)
+		{
+			return ToDecomposed (c, DecompositionSmall, true);
+		}
+
 		char ToDecomposed (char c, byte d, bool tail)
 		{
 			if (decompType [(int) c] != d)
@@ -3302,30 +2542,6 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 
 		private byte ComputeLevel3WeightRaw (char c) // add 2 for sortkey value
 		{
-			// CJK compat
-			if ('\u3192' <= c && c <= '\u319F')
-				return 0;
-
-			// They have <narrow> NFKD mapping, and on Windows
-			// those narrow characters are regarded as "normal",
-			// thus those characters themselves are regarded as
-			// "wide". grep "<narrow>" and you can pick them up
-			// (ignoring Kana, Hangul etc.)
-			switch (c) {
-			case '\u3002':
-			case '\u300C':
-			case '\u300D':
-			case '\u3001':
-			case '\u30FB':
-			case '\u2502':
-			case '\u2190':
-			case '\u2191':
-			case '\u2192':
-			case '\u2193':
-			case '\u25A0':
-			case '\u25CB':
-				return 1;
-			}
 			// Korean
 			if ('\u11A8' <= c && c <= '\u11F9')
 				return 2;
@@ -3333,11 +2549,6 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 				return 4;
 			if ('\u3130' <= c && c <= '\u3164')
 				return 5;
-			if ('\u3165' <= c && c <= '\u318E')
-				return 4;
-			// Georgian Capital letters
-			if ('\u10A0' <= c && c <= '\u10C5')
-				return 0x10;
 			// numbers
 			if ('\u2776' <= c && c <= '\u277F')
 				return 4;
@@ -3346,13 +2557,13 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			if ('\u2776' <= c && c <= '\u2793')
 				return 0xC;
 			if ('\u2160' <= c && c <= '\u216F')
-				return 0x10;
+				return 0x18;
 			if ('\u2181' <= c && c <= '\u2182')
 				return 0x18;
 			// Arabic
 			if ('\u2135' <= c && c <= '\u2138')
 				return 4;
-			if ('\uFE80' <= c && c < '\uFF00') {
+			if ('\uFE80' <= c && c < '\uFE8E') {
 				// 2(Isolated)/8(Final)/0x18(Medial)
 				switch (decompType [(int) c]) {
 				case DecompositionIsolated:
@@ -3453,7 +2664,6 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 			// those ranges.
 			case 0x4d8: case 0x4d9:
 			case 0x4e8: case 0x4e9:
-			case 0x70F:
 			case 0x3036: case 0x303f:
 			case 0x337b: case 0xfb1e:
 				return false;
@@ -3812,7 +3022,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 		{
 			JISCharacter j1 = (JISCharacter) o1;
 			JISCharacter j2 = (JISCharacter) o2;
-			return j1.JIS - j2.JIS;
+			return j2.JIS - j1.JIS;
 		}
 	}
 
@@ -4022,7 +3232,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 				for (int i = 0; i < Source.Length; i++)
 					ret [i + 1] = Source [i];
 				// null terminate
-				for (int i = 0; i < 4; i++)
+				for (int i = 0; i < 5; i++)
 					ret [i + Source.Length + 2] = (char) SortKey [i];
 				return ret;
 			}