2007-11-30 Zoltan Varga <vargaz@gmail.com>
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / create-mscompat-collation-table.cs
index 52457c54c1a09758b1f317b7b67ad31ba344bf79..e255c938eb6644f3e33736909e4deb87dea273a2 100644 (file)
@@ -1,4 +1,31 @@
 //
+// create-mscompat-collation-table.cs : generates Windows-like sortkey tables.
+//
+// Author:
+//     Atsushi Enomoto  <atsushi@ximian.com>
+//
+// Copyright (C) 2005 Novell, Inc (http://www.novell.com)
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the
+// "Software"), to deal in the Software without restriction, including
+// without limitation the rights to use, copy, modify, merge, publish,
+// distribute, sublicense, and/or sell copies of the Software, and to
+// permit persons to whom the Software is furnished to do so, subject to
+// the following conditions:
+// 
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
 //
 // There are two kind of sort keys : which are computed and which are laid out
 // as an indexed array. Computed sort keys are:
@@ -6,24 +33,9 @@
 //     - Surrogate
 //     - PrivateUse
 //
-// Also, for composite characters it should prepare different index table.
-//
 // Though it is possible to "compute" level 3 weights, they are still dumped
 // to an array to avoid execution cost.
 //
-
-//
-// * sortkey getter signature
-//
-//     int GetSortKey (string s, int index, SortKeyBuffer buf)
-//     Stores sort key for corresponding character element into buf and
-//     returns the length of the consumed _source_ character element in s.
-//
-// * character length to consume
-//
-//     If there are characters whose primary weight is 0, they are consumed
-//     and considered as a part of the character element.
-//
 #define Binary
 
 using System;
@@ -33,6 +45,8 @@ using System.Globalization;
 using System.Text;
 using System.Xml;
 
+using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
+
 namespace Mono.Globalization.Unicode
 {
        internal class MSCompatSortKeyTableGenerator
@@ -61,7 +75,8 @@ namespace Mono.Globalization.Unicode
                const int DecompositionCompat = 0x11;
                const int DecompositionCanonical = 0x12;
 
-               TextWriter Result = Console.Out;
+               TextWriter CSResult = Console.Out;
+               TextWriter CResult = TextWriter.Null;
 
                byte [] fillIndex = new byte [256]; // by category
                CharMapEntry [] map = new CharMapEntry [char.MaxValue + 1];
@@ -97,7 +112,8 @@ namespace Mono.Globalization.Unicode
 
                string [] diacritics = new string [] {
                        // LATIN, CYRILLIC etc.
-                       "UPTURN", "DOUBLE-STRUCK",
+                       "VERTICAL LINE ABOVE", "UPTURN", "DOUBLE-STRUCK",
+                       "ABKHASIAN",
                        "MIDDLE HOOK", "WITH VERTICAL LINE ABOVE;", "WITH TONOS",
                        "WITH ACUTE ACCENT;", "WITH GRAVE ACCENT;",
                        "WITH ACUTE;", "WITH GRAVE;",
@@ -106,7 +122,9 @@ namespace Mono.Globalization.Unicode
                        "WITH CIRCUMFLEX ACCENT;", "WITH CIRCUMFLEX;",
                        "WITH DIALYTIKA;",
                        "WITH DIAERESIS;", "WITH CARON;", "WITH BREVE;",
-                       "DIALYTIKA TONOS", "DIALYTIKA AND TONOS", "WITH MACRON;", "WITH TILDE;", "WITH RING ABOVE;",
+                       "DIALYTIKA TONOS", "DIALYTIKA AND TONOS",
+                       "ABKHASIAN CHE WITH DESCENDER",
+                       "WITH MACRON;", "WITH TILDE;", "WITH RING ABOVE;",
                        "WITH OGONEK;", "WITH CEDILLA;",
                        //
                        " DOUBLE ACUTE;", " ACUTE AND DOT ABOVE;",
@@ -128,8 +146,8 @@ namespace Mono.Globalization.Unicode
                        " BREVE AND TILDE",
                        " CEDILLA AND BREVE",
                        " OGONEK AND MACRON",
-                       //
-                       "WITH OVERLINE",
+                       // 0x40
+                       "WITH OVERLINE", "DOUBLE VERTICAL LINE ABOVE",
                        "WITH HOOK;", "LEFT HOOK;", " WITH HOOK ABOVE;",
                        " DOUBLE GRAVE",
                        " INVERTED BREVE",
@@ -139,11 +157,12 @@ namespace Mono.Globalization.Unicode
                        " LINE BELOW;", " CIRCUMFLEX AND HOOK ABOVE",
                        " PALATAL HOOK",
                        " DOT BELOW;",
-                       " RETROFLEX;", "DIAERESIS BELOW",
-                       " RING BELOW",
+                       " RETROFLEX;", "DIAERESIS BELOW", "RETROFLEX HOOK",
+                       " RING BELOW", "LOW VERTICAL LINE",
                        //
                        " CIRCUMFLEX BELOW", "HORN AND ACUTE",
                        " BREVE BELOW;", " HORN AND GRAVE",
+                       " LOW MACRON",
                        " TILDE BELOW",
                        " TOPBAR",
                        " DOT BELOW AND DOT ABOVE",
@@ -161,12 +180,12 @@ namespace Mono.Globalization.Unicode
                        };
                byte [] diacriticWeights = new byte [] {
                        // LATIN.
-                       3, 3, 5, 5, 5,
+                       3, 3, 3, 5, 5, 5, 5,
                        0xE, 0xF,
                        0xE, 0xF,
                        //
                        0x10, 0x11, 0x12, 0x12, 0x13, 0x13, 0x14, 0x15, 0x16,
-                       0x16, 0x17, 0x19, 0x1A, 0x1B, 0x1C,
+                       0x16, 0x17, 0x17, 0x19, 0x1A, 0x1B, 0x1C,
                        //
                        0x1D, 0x1D, 0x1E, 0x1E, 0x1E, 0x1F, 0x1F, 0x1F,
                        0x20, 0x21, 0x22, 0x22, 0x23, 0x24,
@@ -174,10 +193,11 @@ namespace Mono.Globalization.Unicode
                        0x25, 0x25, 0x25, 0x26, 0x28, 0x28, 0x28,
                        0x29, 0x2A, 0x2B, 0x2C, 0x2F, 0x30,
                        //
-                       0x40, 0x43, 0x43, 0x43, 0x44, 0x46, 0x47, 0x48,
-                       0x52, 0x55, 0x55, 0x57, 0x58, 0x59, 0x59, 0x5A,
+                       0x40, 0x41, 0x43, 0x43, 0x43, 0x44, 0x46, 0x47, 0x48,
+                       0x52, 0x55, 0x55, 0x57, 0x58, 0x59, 0x59, 0x59,
+                       0x5A, 0x5A,
                        //
-                       0x60, 0x60, 0x61, 0x61, 0x63, 0x68, 0x68,
+                       0x60, 0x60, 0x61, 0x61, 0x62, 0x63, 0x68, 0x68,
                        0x69, 0x69, 0x6A, 0x6D, 0x6E,
                        0x87, 0x95, 0xAA,
                        // CIRCLED, PARENTHESIZED and so on.
@@ -253,7 +273,9 @@ namespace Mono.Globalization.Unicode
                        ModifyParsedValues ();
                        GenerateCore ();
                        Console.Error.WriteLine ("generation done.");
+                       CResult = new StreamWriter ("collation-tables.h", false);
                        Serialize ();
+                       CResult.Close ();
                        Console.Error.WriteLine ("serialization done.");
 /*
 StreamWriter sw = new StreamWriter ("agelog.txt");
@@ -284,6 +306,11 @@ sw.Close ();
                                source, typeof (ushort), i);
                }
 
+               void WriteByte (byte value)
+               {
+                       
+               }
+
                void Serialize ()
                {
                        // Tailorings
@@ -293,12 +320,16 @@ sw.Close ();
                        byte [] level1 = new byte [map.Length];
                        byte [] level2 = new byte [map.Length];
                        byte [] level3 = new byte [map.Length];
-                       ushort [] widthCompat = new ushort [map.Length];
+// widthCompat is now removed from the mapping table.
+// If it turned out that it is still required, grep this source and uncomment
+// widthCompat related lines. FIXME: remove those lines in the future.
+//                     ushort [] widthCompat = new ushort [map.Length];
                        for (int i = 0; i < map.Length; i++) {
                                categories [i] = map [i].Category;
                                level1 [i] = map [i].Level1;
                                level2 [i] = map [i].Level2;
                                level3 [i] = ComputeLevel3Weight ((char) i);
+/*
                                // For Japanese Half-width characters, don't
                                // map widthCompat. It is IgnoreKanaType that
                                // handles those width differences.
@@ -313,158 +344,189 @@ sw.Close ();
                                        widthCompat [i] = (ushort) decompValues [decompIndex [i]];
                                        break;
                                }
+*/
                        }
 
                        // compress
                        ignorableFlags = CompressArray (ignorableFlags,
-                               MSCompatUnicodeTableUtil.Ignorable);
-                       categories = CompressArray (categories,
-                               MSCompatUnicodeTableUtil.Category);
-                       level1 = CompressArray (level1, 
-                               MSCompatUnicodeTableUtil.Level1);
-                       level2 = CompressArray (level2, 
-                               MSCompatUnicodeTableUtil.Level2);
-                       level3 = CompressArray (level3, 
-                               MSCompatUnicodeTableUtil.Level3);
-                       widthCompat = (ushort []) CodePointIndexer.CompressArray (
-                               widthCompat, typeof (ushort),
-                               MSCompatUnicodeTableUtil.WidthCompat);
-                       cjkCHS = CompressArray (cjkCHS,
-                               MSCompatUnicodeTableUtil.CjkCHS);
-                       cjkCHT = CompressArray (cjkCHT,
-                               MSCompatUnicodeTableUtil.Cjk);
-                       cjkJA = CompressArray (cjkJA,
-                               MSCompatUnicodeTableUtil.Cjk);
-                       cjkKO = CompressArray (cjkKO,
-                               MSCompatUnicodeTableUtil.Cjk);
-                       cjkKOlv2 = CompressArray (cjkKOlv2,
-                               MSCompatUnicodeTableUtil.Cjk);
+                               UUtil.Ignorable);
+                       categories = CompressArray (categories, UUtil.Category);
+                       level1 = CompressArray (level1, UUtil.Level1);
+                       level2 = CompressArray (level2, UUtil.Level2);
+                       level3 = CompressArray (level3, UUtil.Level3);
+//                     widthCompat = (ushort []) CodePointIndexer.CompressArray (
+//                             widthCompat, typeof (ushort), UUtil.WidthCompat);
+                       cjkCHS = CompressArray (cjkCHS, UUtil.CjkCHS);
+                       cjkCHT = CompressArray (cjkCHT,UUtil.Cjk);
+                       cjkJA = CompressArray (cjkJA, UUtil.Cjk);
+                       cjkKO = CompressArray (cjkKO, UUtil.Cjk);
+                       cjkKOlv2 = CompressArray (cjkKOlv2, UUtil.Cjk);
 
                        // Ignorables
-                       Result.WriteLine ("internal static readonly byte [] ignorableFlags = new byte [] {");
+                       CResult.WriteLine ("static const guint8  collation_table_ignorableFlags [] = {");
+                       CSResult.WriteLine ("static readonly byte [] ignorableFlagsArr = new byte [] {");
 #if Binary
                        MemoryStream ms = new MemoryStream ();
                        BinaryWriter binary = new BinaryWriter (ms);
+                       binary.Write (UUtil.ResourceVersion);
                        binary.Write (ignorableFlags.Length);
 #endif
                        for (int i = 0; i < ignorableFlags.Length; i++) {
                                byte value = ignorableFlags [i];
                                if (value < 10)
-                                       Result.Write ("{0},", value);
+                                       CSResult.Write ("{0},", value);
                                else
-                                       Result.Write ("0x{0:X02},", value);
+                                       CSResult.Write ("0x{0:X02},", value);
+                               CResult.Write ("{0},", value);
 #if Binary
                                binary.Write (value);
 #endif
-                               if ((i & 0xF) == 0xF)
-                                       Result.WriteLine ("// {0:X04}", i - 0xF);
+                               if ((i & 0xF) == 0xF) {
+                                       CSResult.WriteLine ("// {0:X04}",
+                                               UUtil.Ignorable.ToCodePoint (i - 0xF));
+                                       CResult.WriteLine ();
+                               }
                        }
-                       Result.WriteLine ("};");
-                       Result.WriteLine ();
+                       CResult.WriteLine ("0};");
+                       CSResult.WriteLine ("};");
+                       CSResult.WriteLine ();
 
                        // Primary category
-                       Result.WriteLine ("internal static readonly byte [] categories = new byte [] {");
+                       CResult.WriteLine ("static const guint8 collation_table_category [] = {");
+                       CSResult.WriteLine ("static readonly byte [] categoriesArr = new byte [] {");
 #if Binary
                        binary.Write (categories.Length);
 #endif
                        for (int i = 0; i < categories.Length; i++) {
                                byte value = categories [i];
                                if (value < 10)
-                                       Result.Write ("{0},", value);
+                                       CSResult.Write ("{0},", value);
                                else
-                                       Result.Write ("0x{0:X02},", value);
+                                       CSResult.Write ("0x{0:X02},", value);
+                               CResult.Write ("{0},", value);
 #if Binary
                                binary.Write (value);
 #endif
-                               if ((i & 0xF) == 0xF)
-                                       Result.WriteLine ("// {0:X04}", i - 0xF);
+                               if ((i & 0xF) == 0xF) {
+                                       CSResult.WriteLine ("// {0:X04}",
+                                               UUtil.Category.ToCodePoint (i - 0xF));
+                                       CResult.WriteLine ();
+                               }
                        }
-                       Result.WriteLine ("};");
-                       Result.WriteLine ();
+                       CResult.WriteLine ("};");
+                       CSResult.WriteLine ("};");
+                       CSResult.WriteLine ();
 
                        // Primary weight value
-                       Result.WriteLine ("internal static readonly byte [] level1 = new byte [] {");
+                       CResult.WriteLine ("static const guint8 collation_table_level1 [] = {");
+                       CSResult.WriteLine ("static readonly byte [] level1Arr = new byte [] {");
 #if Binary
                        binary.Write (level1.Length);
 #endif
                        for (int i = 0; i < level1.Length; i++) {
                                byte value = level1 [i];
                                if (value < 10)
-                                       Result.Write ("{0},", value);
+                                       CSResult.Write ("{0},", value);
                                else
-                                       Result.Write ("0x{0:X02},", value);
+                                       CSResult.Write ("0x{0:X02},", value);
+                               CResult.Write ("{0},", value);
 #if Binary
                                binary.Write (value);
 #endif
-                               if ((i & 0xF) == 0xF)
-                                       Result.WriteLine ("// {0:X04}", i - 0xF);
+                               if ((i & 0xF) == 0xF) {
+                                       CSResult.WriteLine ("// {0:X04}",
+                                               UUtil.Level1.ToCodePoint (i - 0xF));
+                                       CResult.WriteLine ();
+                               }
                        }
-                       Result.WriteLine ("};");
-                       Result.WriteLine ();
+                       CResult.WriteLine ("0};");
+                       CSResult.WriteLine ("};");
+                       CSResult.WriteLine ();
 
                        // Secondary weight
-                       Result.WriteLine ("internal static readonly byte [] level2 = new byte [] {");
+                       CResult.WriteLine ("static const guint8 collation_table_level2 [] = {");
+                       CSResult.WriteLine ("static readonly byte [] level2Arr = new byte [] {");
 #if Binary
                        binary.Write (level2.Length);
 #endif
                        for (int i = 0; i < level2.Length; i++) {
                                byte value = level2 [i];
                                if (value < 10)
-                                       Result.Write ("{0},", value);
+                                       CSResult.Write ("{0},", value);
                                else
-                                       Result.Write ("0x{0:X02},", value);
+                                       CSResult.Write ("0x{0:X02},", value);
+                               CResult.Write ("{0},", value);
 #if Binary
                                binary.Write (value);
 #endif
-                               if ((i & 0xF) == 0xF)
-                                       Result.WriteLine ("// {0:X04}", i - 0xF);
+                               if ((i & 0xF) == 0xF) {
+                                       CSResult.WriteLine ("// {0:X04}",
+                                               UUtil.Level2.ToCodePoint (i - 0xF));
+                                       CResult.WriteLine ();
+                               }
                        }
-                       Result.WriteLine ("};");
-                       Result.WriteLine ();
+                       CResult.WriteLine ("0};");
+                       CSResult.WriteLine ("};");
+                       CSResult.WriteLine ();
 
                        // Thirtiary weight
-                       Result.WriteLine ("internal static readonly byte [] level3 = new byte [] {");
+                       CResult.WriteLine ("static const guint8 collation_table_level3 [] = {");
+                       CSResult.WriteLine ("static readonly byte [] level3Arr = new byte [] {");
 #if Binary
                        binary.Write (level3.Length);
 #endif
                        for (int i = 0; i < level3.Length; i++) {
                                byte value = level3 [i];
                                if (value < 10)
-                                       Result.Write ("{0},", value);
+                                       CSResult.Write ("{0},", value);
                                else
-                                       Result.Write ("0x{0:X02},", value);
+                                       CSResult.Write ("0x{0:X02},", value);
+                               CResult.Write ("{0},", value);
 #if Binary
                                binary.Write (value);
 #endif
-                               if ((i & 0xF) == 0xF)
-                                       Result.WriteLine ("// {0:X04}", i - 0xF);
+                               if ((i & 0xF) == 0xF) {
+                                       CSResult.WriteLine ("// {0:X04}",
+                                               UUtil.Level3.ToCodePoint (i - 0xF));
+                                       CResult.WriteLine ();
+                               }
                        }
-                       Result.WriteLine ("};");
-                       Result.WriteLine ();
+                       CResult.WriteLine ("0};");
+                       CSResult.WriteLine ("};");
+                       CSResult.WriteLine ();
 
+/*
                        // Width insensitivity mappings
                        // (for now it is more lightweight than dumping the
                        // entire NFKD table).
-                       Result.WriteLine ("internal static readonly ushort [] widthCompat = new ushort [] {");
+                       CResult.WriteLine ("static const guint16* widthCompat [] = {");
+                       CSResult.WriteLine ("static readonly ushort [] widthCompatArr = new ushort [] {");
 #if Binary
                        binary.Write (widthCompat.Length);
 #endif
                        for (int i = 0; i < widthCompat.Length; i++) {
                                ushort value = widthCompat [i];
                                if (value < 10)
-                                       Result.Write ("{0},", value);
+                                       CSResult.Write ("{0},", value);
                                else
-                                       Result.Write ("0x{0:X02},", value);
+                                       CSResult.Write ("0x{0:X02},", value);
+                               CResult.Write ("{0},", value);
 #if Binary
                                binary.Write (value);
 #endif
-                               if ((i & 0xF) == 0xF)
-                                       Result.WriteLine ("// {0:X04}", i - 0xF);
+                               if ((i & 0xF) == 0xF) {
+                                       CSResult.WriteLine ("// {0:X04}",
+                                               UUtil.WidthCompat.ToCodePoint (i - 0xF));
+                                       CResult.WriteLine ();
+                               }
                        }
-                       Result.WriteLine ("};");
-                       Result.WriteLine ();
+                       CResult.WriteLine ("0};");
+                       CSResult.WriteLine ("};");
+                       CSResult.WriteLine ();
+*/
+
 #if Binary
-                       using (FileStream fs = File.Create ("../collation.core.bin")) {
+                       using (FileStream fs = File.Create ("../resources/collation.core.bin")) {
                                byte [] array = ms.ToArray ();
                                fs.Write (array, 0, array.Length);
                        }
@@ -478,33 +540,70 @@ sw.Close ();
                        SerializeCJK ("cjkKOlv2", cjkKOlv2, 0x9FB0);
                }
 
-               void SerializeCJK (string name, ushort [] cjk, int max)
+               void SerializeCJK (string name, ushort [] cjk, int max_unused)
                {
-                       int offset = 0;//char.MaxValue - cjk.Length;
-                       Result.WriteLine ("static ushort [] {0} = new ushort [] {{", name);
+//                     CResult.WriteLine ("static const int collation_table_collation_cjk_{0}_size [] = {1};", name, cjk.Length);
+                       CSResult.WriteLine ("const int {0}ArrLength = {1};", name, cjk.Length);
+
+                       int len = cjk.Length;
+                       CResult.WriteLine ("static const guint8 collation_table_collation_cjk_{0} [] = {{", name);
+                       CSResult.WriteLine ("static byte [] {0}Arr = new byte [] {{", name);
+                       // the actual length is *2
+                       for (int i = 0; i < 4; i++, len /= 256) {
+                               CResult.Write ("{0},", len & 0xFF);
+                               CSResult.Write ("0x{0:X04},", len & 0xFF);
+                       }
+                       CResult.WriteLine ();
+                       CSResult.WriteLine ();
 #if Binary
                        MemoryStream ms = new MemoryStream ();
                        BinaryWriter binary = new BinaryWriter (ms);
-                       binary.Write (cjk.Length);
+                       binary.Write (UUtil.ResourceVersion);
+                       binary.Write (cjk.Length); // the actual size is *2.
 #endif
+                       // category
                        for (int i = 0; i < cjk.Length; i++) {
-                               if (i + offset == max)
-                                       break;
-                               ushort value = cjk [i];
+//                             if (i == max)
+//                                     break;
+                               byte value = (byte) (cjk [i] >> 8);
                                if (value < 10)
-                                       Result.Write ("{0},", value);
+                                       CSResult.Write ("{0},", value);
                                else
-                                       Result.Write ("0x{0:X04},", value);
+                                       CSResult.Write ("0x{0:X02},", value);
+                               CResult.Write ("{0},", value);
 #if Binary
                                binary.Write (value);
 #endif
-                               if ((i & 0xF) == 0xF)
-                                       Result.WriteLine ("// {0:X04}", i - 0xF + offset);
+                               if ((i & 0xF) == 0xF) {
+                                       CSResult.WriteLine ("// {0:X04}", i - 0xF);
+                                       CResult.WriteLine ();
+                               }
                        }
-                       Result.WriteLine ("};");
-                       Result.WriteLine ();
+
+                       // level 1
+                       for (int i = 0; i < cjk.Length; i++) {
+//                             if (i == max)
+//                                     break;
+                               byte value = (byte) (cjk [i] & 0xFF);
+                               if (value < 10)
+                                       CSResult.Write ("{0},", value);
+                               else
+                                       CSResult.Write ("0x{0:X02},", value);
+                               CResult.Write ("{0},", value);
 #if Binary
-                       using (FileStream fs = File.Create (String.Format ("../collation.{0}.bin", name))) {
+                               binary.Write (value);
+#endif
+                               if ((i & 0xF) == 0xF) {
+                                       CSResult.WriteLine ("// {0:X04}", i - 0xF);
+                                       CResult.WriteLine ();
+                               }
+                       }
+
+                       CResult.WriteLine ("0};");
+                       CSResult.WriteLine ("};");
+                       CSResult.WriteLine ();
+#if Binary
+                       using (FileStream fs = File.Create (String.Format ("../resources/collation.{0}.bin", name))) {
                                byte [] array = ms.ToArray ();
                                fs.Write (array, 0, array.Length);
                        }
@@ -513,30 +612,35 @@ sw.Close ();
 
                void SerializeCJK (string name, byte [] cjk, int max)
                {
-                       int offset = 0;//char.MaxValue - cjk.Length;
-                       Result.WriteLine ("static byte [] {0} = new byte [] {{", name);
+                       CResult.WriteLine ("static const guint8 collation_table_collation_cjk_{0} [] = {{", name);
+                       CSResult.WriteLine ("static byte [] {0}Arr = new byte [] {{", name);
 #if Binary
                        MemoryStream ms = new MemoryStream ();
                        BinaryWriter binary = new BinaryWriter (ms);
+                       binary.Write (UUtil.ResourceVersion);
 #endif
                        for (int i = 0; i < cjk.Length; i++) {
-                               if (i + offset == max)
+                               if (i == max)
                                        break;
                                byte value = cjk [i];
                                if (value < 10)
-                                       Result.Write ("{0},", value);
+                                       CSResult.Write ("{0},", value);
                                else
-                                       Result.Write ("0x{0:X02},", value);
+                                       CSResult.Write ("0x{0:X02},", value);
+                               CResult.Write ("{0},", value);
 #if Binary
                                binary.Write (value);
 #endif
-                               if ((i & 0xF) == 0xF)
-                                       Result.WriteLine ("// {0:X04}", i - 0xF + offset);
+                               if ((i & 0xF) == 0xF) {
+                                       CSResult.WriteLine ("// {0:X04}", i - 0xF);
+                                       CResult.WriteLine ();
+                               }
                        }
-                       Result.WriteLine ("};");
-                       Result.WriteLine ();
+                       CResult.WriteLine ("0};");
+                       CSResult.WriteLine ("};");
+                       CSResult.WriteLine ();
 #if Binary
-                       using (FileStream fs = File.Create (String.Format ("../collation.{0}.bin", name))) {
+                       using (FileStream fs = File.Create (String.Format ("../resources/collation.{0}.bin", name))) {
                                byte [] array = ms.ToArray ();
                                fs.Write (array, 0, array.Length);
                        }
@@ -547,35 +651,46 @@ sw.Close ();
                {
                        Hashtable indexes = new Hashtable ();
                        Hashtable counts = new Hashtable ();
-                       Result.WriteLine ("static char [] tailorings = new char [] {");
+                       CResult.WriteLine ("static const guint16 collation_table_tailoring [] = {");
+                       CSResult.WriteLine ("static char [] tailoringArr = new char [] {");
                        int count = 0;
 #if Binary
                        MemoryStream ms = new MemoryStream ();
                        BinaryWriter binary = new BinaryWriter (ms);
+                       // Here we don't need to output resource version.
+                       // This is cached.
 #endif
                        foreach (Tailoring t in tailorings) {
                                if (t.Alias != 0)
                                        continue;
-                               Result.Write ("/*{0}*/", t.LCID);
+                               CResult.Write ("/*{0}*/", t.LCID);
+                               CSResult.Write ("/*{0}*/", t.LCID);
                                indexes.Add (t.LCID, count);
                                char [] values = t.ItemToCharArray ();
                                counts.Add (t.LCID, values.Length);
                                foreach (char c in values) {
-                                       Result.Write ("'\\x{0:X}', ", (int) c);
-                                       if (++count % 16 == 0)
-                                               Result.WriteLine (" // {0:X04}", count - 16);
+                                       CSResult.Write ("'\\x{0:X}', ", (int) c);
+                                       CResult.Write ("{0},", (int) c);
+                                       if (++count % 16 == 0) {
+                                               CSResult.WriteLine (" // {0:X04}", count - 16);
+                                               CResult.WriteLine ();
+                                       }
 #if Binary
                                        binary.Write ((ushort) c);
 #endif
                                }
                        }
-                       Result.WriteLine ("};");
+                       CResult.WriteLine ("0};");
+                       CSResult.WriteLine ("};");
 
-                       Result.WriteLine ("static TailoringInfo [] tailoringInfos = new TailoringInfo [] {");
+                       CResult.WriteLine ("static const guint32 collation_table_tailoring_infos [] = {");
+                       CResult.WriteLine ("{0}, /*count*/", tailorings.Count);
+                       CSResult.WriteLine ("static TailoringInfo [] tailoringInfos = new TailoringInfo [] {");
 #if Binary
                        byte [] rawdata = ms.ToArray ();
                        ms = new MemoryStream ();
                        binary = new BinaryWriter (ms);
+                       binary.Write (UUtil.ResourceVersion);
                        binary.Write (tailorings.Count);
 #endif
                        foreach (Tailoring t in tailorings) {
@@ -591,7 +706,8 @@ sw.Close ();
                                        foreach (Tailoring t2 in tailorings)
                                                if (t2.LCID == t.LCID)
                                                        french = t2.FrenchSort;
-                               Result.WriteLine ("new TailoringInfo ({0}, 0x{1:X}, {2}, {3}), ", t.LCID, idx, cnt, french ? "true" : "false");
+                               CSResult.WriteLine ("new TailoringInfo ({0}, 0x{1:X}, {2}, {3}), ", t.LCID, idx, cnt, french ? "true" : "false");
+                               CResult.WriteLine ("{0},{1},{2},{3},", t.LCID, idx, cnt, french ? 1 : 0);
 #if Binary
                                binary.Write (t.LCID);
                                binary.Write (idx);
@@ -599,7 +715,8 @@ sw.Close ();
                                binary.Write (french);
 #endif
                        }
-                       Result.WriteLine ("};");
+                       CResult.WriteLine ("0};");
+                       CSResult.WriteLine ("};");
 #if Binary
                        binary.Write ((byte) 0xFF);
                        binary.Write ((byte) 0xFF);
@@ -607,7 +724,7 @@ sw.Close ();
                        binary.Write (rawdata, 0, rawdata.Length);
 
 
-                       using (FileStream fs = File.Create ("../collation.tailoring.bin")) {
+                       using (FileStream fs = File.Create ("../resources/collation.tailoring.bin")) {
                                byte [] array = ms.ToArray ();
                                fs.Write (array, 0, array.Length);
                        }
@@ -669,14 +786,17 @@ sw.Close ();
                {
                        StringBuilder sb = new StringBuilder ();
                        for (int i = 0; i < s.Length; i++) {
-                               if (s.StartsWith ("\\u")) {
-                                       sb.Append ((char) int.Parse (
-                                               s.Substring (2, 4), NumberStyles.HexNumber),
+                               if (i + 5 < s.Length &&
+                                       s [i] == '\\' && s [i + 1] == 'u') {
+                                       sb.Append (
+                                               (char) int.Parse (
+                                                       s.Substring (i + 2, 4),
+                                                       NumberStyles.HexNumber),
                                                1);
                                        i += 5;
                                }
-                       else
-                               sb.Append (s [i]);
+                               else
+                                       sb.Append (s [i]);
                        }
                        return sb.ToString ();
                }
@@ -856,10 +976,10 @@ sw.Close ();
                                        target = 'B';
                                else if (s.Substring (offset).StartsWith ("OPEN O"))
                                        target = 'C';
+                               else if (s.Substring (offset).StartsWith ("ETH"))
+                                       target = 'D';
                                else if (s.Substring (offset).StartsWith ("SCHWA"))
                                        target = 'E';
-                               else if (s.Substring (offset).StartsWith ("ENG"))
-                                       target = 'N';
                                else if (s.Substring (offset).StartsWith ("OI;")) // 01A2,01A3
                                        target = 'O';
                                else if (s.Substring (offset).StartsWith ("YR;")) // 01A2,01A3
@@ -868,10 +988,15 @@ sw.Close ();
                                        target = 'S';
                                else if (s.Substring (offset).StartsWith ("ESH"))
                                        target = 'S';
+                               else if (s.Substring (offset).StartsWith ("OUNCE"))
+                                       target = 'Z';
 
                                // For remaining IPA chars, direct mapping is
                                // much faster.
                                switch (cp) {
+                               case 0x0166: case 0x0167:
+                                       // Though they are 'T', they have different weight
+                                       target = char.MinValue; break;
                                case 0x0299: target = 'B'; break;
                                case 0x029A: target = 'E'; break;
                                case 0x029B: target = 'G'; break;
@@ -935,20 +1060,23 @@ sw.Close ();
                                        "LEFT RIGHT",
                                        "UP DOWN",
                                        };
+                               if (s.IndexOf ("RIGHTWARDS") >= 0 &&
+                                       s.IndexOf ("LEFTWARDS") >= 0)
+                                       value = 0xE1 - 0xD8;
+                               else if (s.IndexOf ("UPWARDS") >= 0 &&
+                                       s.IndexOf ("DOWNWARDS") >= 0)
+                                       value = 0xE2 - 0xD8;
+                               else if (s.IndexOf ("ARROW") >= 0 &&
+                                       s.IndexOf ("COMBINING") < 0 &&
+                                       s.IndexOf ("CLOCKWISE") >= 0)
+                                       value = s.IndexOf ("ANTICLOCKWISE") >= 0 ? 0xE4 - 0xD8 : 0xE3 - 0xD8;
                                if (value == 0)
-                                       for (int i = 1; value == 0 && i < arrowTargets.Length; i++) {
+                                       for (int i = 1; value == 0 && i < arrowTargets.Length; i++)
                                                if (s.IndexOf (arrowTargets [i]) > 0 &&
                                                        s.IndexOf ("BARB " + arrowTargets [i]) < 0 &&
                                                        s.IndexOf (" OVER") < 0
                                                )
                                                        value = i;
-                                               else if (s.IndexOf ("RIGHTWARDS") > 0 &&
-                                                       s.IndexOf ("LEFTWARDS") > 0)
-                                                       value = 0xE1 - 0xD8;
-                                               else if (s.IndexOf ("UPWARDS") > 0 &&
-                                                       s.IndexOf ("DOWNWARDS") > 0)
-                                                       value = 0xE2 - 0xD8;
-                                       }
                                if (value > 0)
                                        arrowValues.Add (new DictionaryEntry (
                                                cp, value));
@@ -956,7 +1084,7 @@ sw.Close ();
 
                        // Box names
                        if (0x2500 <= cp && cp < 0x2600) {
-                               int value = 0;
+                               int value = int.MinValue;
                                // flags:
                                // up:1 down:2 right:4 left:8 vert:16 horiz:32
                                // [h,rl] [r] [l]
@@ -996,7 +1124,8 @@ sw.Close ();
                                                flag |= 32;
 
                                        int fidx = flags.IndexOf (flag);
-                                       value = fidx < 0 ? fidx : offsets [fidx];
+                                       if (fidx >= 0)
+                                               value = offsets [fidx];
                                } else if (s.IndexOf ("BLOCK") >= 0) {
                                        if (s.IndexOf ("ONE EIGHTH") >= 0)
                                                value = 0x12;
@@ -1057,6 +1186,8 @@ sw.Close ();
                                        else
                                                value = 0xC9 - 0xE5;
                                }
+                               else if (s.IndexOf ("BULLET") >= 0)
+                                       value = 0xCC - 0xE5;
                                if (0x25DA <= cp && cp <= 0x25E5)
                                        value = 0xCD + cp - 0x25DA - 0xE5;
 
@@ -1066,7 +1197,7 @@ sw.Close ();
                                case 0x2572: value = 0x10; break;
                                case 0x2573: value = 0x11; break;
                                }
-                               if (value != 0)
+                               if (value != int.MinValue)
                                        boxValues.Add (new DictionaryEntry (
                                                cp, value));
                        }
@@ -1092,12 +1223,12 @@ sw.Close ();
                        // diacritical weights by character name
 if (diacritics.Length != diacriticWeights.Length)
 throw new Exception (String.Format ("Should not happen. weights are {0} while labels are {1}", diacriticWeights.Length, diacritics.Length));
-                       for (int d = 0; d < diacritics.Length; d++) {
+                       for (int d = diacritics.Length - 1; d >= 0; d--) {
                                if (s.IndexOf (diacritics [d]) > 0) {
                                        diacritical [cp] += diacriticWeights [d];
                                        if (s.IndexOf ("COMBINING") >= 0)
                                                diacritical [cp] -= (byte) 2;
-                                       continue;
+                                       break;
                                }
                                // also process "COMBINING blah" here
                                // For now it is limited to cp < 0x0370
@@ -1585,14 +1716,27 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 
                void ModifyUnidata ()
                {
+                       ArrayList decompValues = new ArrayList (this.decompValues);
+
+                       // Hebrew uppercase letters.
+                       foreach (int i in new int []
+                               {0x05DB, 0x05DE, 0x05E0, 0x05E4, 0x05E6})
+                               isUppercase [i] = true;
+
+
                        // Modify some decomposition equivalence
-                       decompType [0xFE31] = 0;
-                       decompIndex [0xFE31] = 0;
-                       decompLength [0xFE31] = 0;
-                       decompType [0xFE32] = 0;
-                       decompIndex [0xFE32] = 0;
-                       decompLength [0xFE32] = 0;
+                       for (int i = 0xFE31; i <= 0xFE34; i++) {
+                               decompType [i] = 0;
+                               decompIndex [i] = 0;
+                               decompLength [i] = 0;
+                       }
+                       decompType [0x037E] = 0;
+                       decompIndex [0x037E] = 0;
+                       decompLength [0x037E] = 0;
 
+                       // Hangzhou numbers
+                       for (int i = 0x3021; i <= 0x3029; i++)
+                               diacritical [i] = 0x4E;
                        // Korean parens numbers
                        for (int i = 0x3200; i <= 0x321C; i++)
                                diacritical [i] = 0xA;
@@ -1611,19 +1755,65 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                        decompValues [decompIndex [0x3298]] = 0x52DE;
 
                        // LAMESPEC: custom remapping (which is not bugs but not fine, non-standard compliant things)
-                       decompIndex [0xFA0C] = decompIndex [0xF929]; // borrow U+F929 room (being empty)
-                       decompValues [decompIndex [0xFA0C]] = 0x5140;
+                       decompIndex [0xFA0C] = decompValues.Count;
+                       decompValues.Add ((int) 0x5140);
                        decompLength [0xFA0C] = 1;
                        decompIndex [0xF929] = decompLength [0xF929] = 0;
 
                        decompValues [decompIndex [0xF92C]] = 0x90DE;
+
+                       decompIndex [0x2125] = decompValues.Count;
+                       decompValues.Add ((int) 0x005A);
+                       decompLength [0x2125] = 1;
+                       decompType [0x2125] = DecompositionFont;
+
+                       this.decompValues = decompValues.ToArray (typeof (int)) as int [];
                }
 
                void ModifyParsedValues ()
                {
+                       // Sometimes STROKE don't work fine
+                       diacritical [0xD8] = diacritical [0xF8] = 0x21;
+                       diacritical [0x141] = diacritical [0x142] = 0x1F;
+                       // FIXME: why?
+                       diacritical [0xAA] = diacritical [0xBA] = 3;
+                       diacritical [0xD0] = diacritical [0xF0] = 0x68;
+                       diacritical [0x131] = 3;
+                       diacritical [0x138] = 3;
+                       // TOPBAR does not work as an identifier for the weight
+                       diacritical [0x182] = diacritical [0x183] = 0x68; // B
+                       diacritical [0x18B] = diacritical [0x18C] = 0x1E; // D
+                       // TONE TWO
+                       diacritical [0x1A7] = diacritical [0x1A8] = 0x87;
+                       // TONE SIX
+                       diacritical [0x184] = diacritical [0x185] = 0x87;
+                       // OPEN E
+                       diacritical [0x190] = diacritical [0x25B] = 0x7B;
+                       // There are many letters w/ diacritical weight 0x7B
+                       diacritical [0x0192] = diacritical [0x0194] =
+                       diacritical [0x0195] = diacritical [0x0196] =
+                       diacritical [0x019C] = diacritical [0x019E] =
+                       diacritical [0x01A6] = diacritical [0x01B1] =
+                       diacritical [0x01B2] = diacritical [0x01BF] = 0x7B;
+                       // ... as well as 0x7C
+                       diacritical [0x01A2] = diacritical [0x01A3] = 0x7C;
+
+                       // <font> NFKD characters seem to have diacritical
+                       // weight as 3,4,5... but the order does not look
+                       // by codepoint and I have no idea how they are sorted.
+                       diacritical [0x210E] = 3;
+                       diacritical [0x210F] = 0x68;
+                       diacritical [0x2110] = 4;
+                       diacritical [0x2111] = 5;
+                       diacritical [0x2112] = 4;
+                       diacritical [0x2113] = 4;
+                       diacritical [0x211B] = 4;
+                       diacritical [0x211C] = 5;
+
                        // some cyrillic diacritical weight. They seem to be
                        // based on old character names, so it's quicker to
                        // set them directly here.
+                       // FIXME: they are by mostly unknown reason
                        diacritical [0x0496] = diacritical [0x0497] = 7;
                        diacritical [0x0498] = diacritical [0x0499] = 0x1A;
                        diacritical [0x049A] = diacritical [0x049B] = 0x17;
@@ -1632,6 +1822,15 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                        diacritical [0x04A0] = diacritical [0x04A1] = 0xA;
                        diacritical [0x04A2] = diacritical [0x04A3] = 7;
                        diacritical [0x04A4] = diacritical [0x04A5] = 8;
+                       diacritical [0x04AA] = diacritical [0x04AB] = 0x1A; // ES CEDILLA?
+                       diacritical [0x04AC] = diacritical [0x04AD] = 7; // RIGHT DESCENDER? but U+4B2
+                       diacritical [0x04AE] = diacritical [0x04AF] = 0xB; // STRAIGHT U?
+                       diacritical [0x04B2] = diacritical [0x04B3] = 0x17; // RIGHT DESCENDER? but U+4AC
+                       diacritical [0x04B4] = diacritical [0x04B5] = 3;
+                       diacritical [0x04B6] = 8;
+                       diacritical [0x04B7] = 7;
+                       diacritical [0x04B8] = diacritical [0x04B9] = 9;
+                       diacritical [0x04BA] = diacritical [0x04BB] = 9;
 
                        // number, secondary weights
                        byte weight = 0x38;
@@ -1641,6 +1840,13 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                        if (Char.IsNumber ((char) cp))
                                                diacritical [cp] = weight;
 
+                       // Gurmukhi special letters' diacritical weight
+                       for (int i = 0x0A50; i < 0x0A60; i++)
+                               diacritical [i] = 4;
+                       // Oriya special letters' diacritical weight
+                       for (int i = 0x0B5C; i < 0x0B60; i++)
+                               diacritical [i] = 6;
+
                        // Update name part of named characters
                        for (int i = 0; i < sortableCharNames.Count; i++) {
                                DictionaryEntry de =
@@ -1746,7 +1952,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                        fillIndex [6] = 0xA0;
                        // vowels
                        for (int i = 0x64B; i <= 0x650; i++)
-                               AddArabicCharMap ((char) i);
+                               AddArabicCharMap ((char) i, 6, 1, 0);
                        // sukun
                        AddCharMapGroup ('\u0652', 6, 1, 0);
                        // shadda
@@ -1792,11 +1998,12 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                if (!IsIgnorable (i))
                                        AddCharMap ((char) i, 0x1, 1);
 
+
                        // FIXME: needs more love here (it should eliminate
                        // all the hacky code above).
                        for (int i = 0x0300; i < 0x0370; i++)
                                if (!IsIgnorable (i) && diacritical [i] != 0
-                                       /* especiall here*/ && !map [i].Defined)
+                                       && !map [i].Defined)
                                        map [i] = new CharMapEntry (
                                                0x1, 0x1, diacritical [i]);
 
@@ -1844,9 +2051,15 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                        fillIndex [0x1] = 0xEC;
                        for (int i = 0x20DD; i <= 0x20E1; i++)
                                AddCharMap ((char) i, 0x1, 1);
-                       fillIndex [0x1] = 0x7;
+                       fillIndex [0x1] = 0x4;
+                       AddCharMap ('\u0CD5', 0x1, 1);
+                       AddCharMap ('\u0CD6', 0x1, 1);
+                       AddCharMap ('\u093C', 0x1, 1);
                        for (int i = 0x302A; i <= 0x302D; i++)
                                AddCharMap ((char) i, 0x1, 1);
+                       AddCharMap ('\u0C55', 0x1, 1);
+                       AddCharMap ('\u0C56', 0x1, 1);
+
                        fillIndex [0x1] = 0x50; // I wonder how they are sorted
                        for (int i = 0x02D4; i <= 0x02D7; i++)
                                AddCharMap ((char) i, 0x1, 1);
@@ -1862,6 +2075,10 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                        for (int i = 0x30FC; i <= 0x30FE; i++)
                                map [i] = new CharMapEntry (0xFF, 0xFF, 1);
 
+                       fillIndex [0x1] = 0xA;
+                       for (int i = 0x0951; i <= 0x0954; i++)
+                               AddCharMap ((char) i, 0x1, 2);
+
                        #endregion
 
 
@@ -1884,6 +2101,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                        // while they aren't.
                        AddCharMap ('\u2422', 0x7, 1, 0); // blank symbol
                        AddCharMap ('\u2423', 0x7, 1, 0); // open box
+
                        #endregion
 
                        // category 09 - continued symbols from 08
@@ -1893,7 +2111,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                AddCharMap ((char) cp, 0x9, 1, 0);
 
                        // arrows
-                       byte [] arrowLv2 = new byte [] {0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3};
+                       byte [] arrowLv2 = new byte [] {0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3};
                        foreach (DictionaryEntry de in arrowValues) {
                                int idx = (int) de.Value;
                                int cp = (int) de.Key;
@@ -1905,6 +2123,8 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                        }
                        // boxes
                        byte [] boxLv2 = new byte [128];
+                       // 0-63 will be used for those offsets are positive,
+                       // and 64-127 are for negative ones.
                        for (int i = 0; i < boxLv2.Length; i++)
                                boxLv2 [i] = 3;
                        foreach (DictionaryEntry de in boxValues) {
@@ -1914,7 +2134,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                        continue;
                                if (off < 0) {
                                        fillIndex [0x9] = (byte) (0xE5 + off);
-                                       AddCharMapGroup ((char) cp, 0x9, 0, boxLv2 [-off]++);
+                                       AddCharMapGroup ((char) cp, 0x9, 0, boxLv2 [128 + off]++);
                                }
                                else {
                                        fillIndex [0x9] = (byte) (0xE5 + off);
@@ -1935,8 +2155,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                uc = Char.GetUnicodeCategory ((char) cp);
                                if (!IsIgnorable (cp) &&
                                        uc == UnicodeCategory.CurrencySymbol &&
-                                       cp != '$' ||
-                                       cp == 0xAC)
+                                       cp != '$')
                                        AddCharMapGroup ((char) cp, 0xA, 1, 0);
                        }
                        // byte other symbols
@@ -1946,7 +2165,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                uc = Char.GetUnicodeCategory ((char) cp);
                                if (!IsIgnorable (cp) &&
                                        uc == UnicodeCategory.OtherSymbol ||
-                                       cp == '\u00B5' || cp == '\u00B7')
+                                       cp == '\u00AC' || cp == '\u00B5' || cp == '\u00B7')
                                        AddCharMapGroup ((char) cp, 0xA, 1, 0);
                        }
                        // U+30FB here
@@ -1963,7 +2182,15 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 
                        for (int cp = 0x20A0; cp <= 0x20AB; cp++)
                                AddCharMap ((char) cp, 0xA, 1, 0);
-                       fillIndex [0xA] = 0x2F; // FIXME: it won't be needed
+
+                       // 3004 is skipped at first...
+                       for (int cp = 0x3010; cp <= 0x3040; cp++)
+                               if (Char.IsSymbol ((char) cp))
+                                       AddCharMap ((char) cp, 0xA, 1, 0);
+                       // SPECIAL CASES: added here
+                       AddCharMap ('\u3004', 0xA, 1, 0);
+                       AddCharMap ('\u327F', 0xA, 1, 0);
+
                        for (int cp = 0x2600; cp <= 0x2613; cp++)
                                AddCharMap ((char) cp, 0xA, 1, 0);
                        // Dingbats
@@ -1974,6 +2201,10 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                        for (int i = 0x2440; i < 0x2460; i++)
                                AddCharMap ((char) i, 0xA, 1, 0);
 
+                       // SPECIAL CASES: why?
+                       AddCharMap ('\u0E3F', 0xA, 1, 0);
+                       AddCharMap ('\u2117', 0xA, 1, 0);
+                       AddCharMap ('\u20AC', 0xA, 1, 0);
                        #endregion
 
                        #region Numbers // 0C 02 - 0C E1
@@ -1992,11 +2223,15 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                        ArrayList numberValues = new ArrayList ();
                        foreach (int i in numbers)
                                numberValues.Add (new DictionaryEntry (i, decimalValue [(char) i]));
+                       // SPECIAL CASE: Cyrillic Thousand sign
+                       numberValues.Add (new DictionaryEntry (0x0482, 1000m));
                        numberValues.Sort (DecimalDictionaryValueComparer.Instance);
 
 //foreach (DictionaryEntry de in numberValues)
 //Console.Error.WriteLine ("****** number {0:X04} : {1} {2}", de.Key, de.Value, decompType [(int) de.Key]);
 
+                       // FIXME: fillIndex adjustment lines are too
+                       // complicated. It must be simpler.
                        decimal prevValue = -1;
                        foreach (DictionaryEntry de in numberValues) {
                                int cp = (int) de.Key;
@@ -2014,18 +2249,25 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                                fillIndex [0xC]++;
 
                                        int xcp;
-                                       if (currValue <= 10) {
-                                               xcp = (int) prevValue + 0x2170 - 1;
-                                               AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
+                                       if (currValue <= 13) {
+                                               if (currValue == 4)
+                                                       fillIndex [0xC]++;
+                                               // SPECIAL CASE
+                                               if (currValue == 11)
+                                                       AddCharMap ('\u0BF0', 0xC, 1);
                                                xcp = (int) prevValue + 0x2160 - 1;
                                                AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
-                                               fillIndex [0xC] += 2;
-                                               xcp = (int) prevValue + 0x3021 - 1;
+                                               xcp = (int) prevValue + 0x2170 - 1;
                                                AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
                                                fillIndex [0xC]++;
                                        }
-                                       else if (currValue == 11)
+                                       if (currValue < 12)
+                                               fillIndex [0xC]++;
+                                       if (currValue <= 10) {
+                                               xcp = (int) prevValue + 0x3021 - 1;
+                                               AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
                                                fillIndex [0xC]++;
+                                       }
                                }
                                if (prevValue < currValue)
                                        prevValue = currValue;
@@ -2033,20 +2275,19 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                        continue;
                                // HangZhou and Roman are add later 
                                // (code is above)
-                               else if (0x3021 <= cp && cp < 0x302A
-                                       || 0x2160 <= cp && cp < 0x216A
-                                       || 0x2170 <= cp && cp < 0x217A)
+                               if (0x3021 <= cp && cp < 0x302A
+                                       || 0x2160 <= cp && cp < 0x216C
+                                       || 0x2170 <= cp && cp < 0x217C)
                                        continue;
 
-                               if (cp ==  0x215B) // FIXME: why?
+                               if (cp == 0x215B) // FIXME: why?
                                        fillIndex [0xC] += 2;
                                else if (cp == 0x3021) // FIXME: why?
                                        fillIndex [0xC]++;
-                               AddCharMapGroup ((char) cp, 0xC, 0, diacritical [cp], true);
                                if (addnew || cp <= '9') {
                                        int mod = (int) currValue - 1;
                                        int xcp;
-                                       if (1 <= currValue && currValue <= 10) {
+                                       if (1 <= currValue && currValue <= 11) {
                                                xcp = mod + 0x2776;
                                                AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
                                                xcp = mod + 0x2780;
@@ -2063,9 +2304,27 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                                AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
                                        }
                                }
+                               if (addnew && currValue >= 10 && currValue < 13 || cp == 0x09F9)
+                                       fillIndex [0xC]++;
+                               AddCharMapGroup ((char) cp, 0xC, 0, diacritical [cp], true);
 
-                               if (cp != 0x09E7 && cp != 0x09EA)
+                               switch (cp) {
+                               // Maybe Bengali digit numbers do not increase
+                               // indexes, but 0x09E6 does.
+                               case 0x09E7: case 0x09E8: case 0x09E9:
+                               case 0x09EA:
+                               // SPECIAL CASES
+                               case 0x0BF0: case 0x2180: case 0x2181:
+                                       break;
+                               // SPECIAL CASE
+                               case 0x0BF1:
                                        fillIndex [0xC]++;
+                                       break;
+                               default:
+                                       if (currValue < 11 || currValue == 1000)
+                                               fillIndex [0xC]++;
+                                       break;
+                               }
 
                                // Add special cases that are not regarded as 
                                // numbers in UnicodeCategory speak.
@@ -2074,7 +2333,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                        AddCharMapGroup ('\u01BD', 0xC, 0, 0);
                                        AddCharMapGroup ('\u01BC', 0xC, 1, 0);
                                }
-                               else if (cp == '6') // FIXME: why?
+                               else if (cp == '2' || cp == '6') // FIXME: why?
                                        fillIndex [0xC]++;
                        }
 
@@ -2140,19 +2399,63 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                AddCharMapGroup ((char) i, 0xE, 1, 0);
                        }
 
-                       // Greek and Coptic
-                       fillIndex [0xF] = 02;
-                       for (int i = 0x0380; i < 0x0390; i++)
+                       // IPA extensions
+                       // FIXME: this results in not equivalent values to
+                       // Windows, but is safer for comparison.
+                       char [] ipaArray = new char [0x300 - 0x250 + 0x20];
+                       for (int i = 0x40; i < 0x60; i++)
                                if (Char.IsLetter ((char) i))
-                                       AddLetterMap ((char) i, 0xF, 1);
-                       fillIndex [0xF] = 02;
-                       for (int i = 0x0391; i < 0x03CF; i++)
+                                       ipaArray [i - 0x40] = (char) (i);
+                       for (int i = 0x250; i < 0x300; i++)
                                if (Char.IsLetter ((char) i))
-                                       AddLetterMap ((char) i, 0xF, 1);
+                                       ipaArray [i - 0x250 + 0x20] = (char) i;
+                       Array.Sort (ipaArray, UCAComparer.Instance);
+                       int targetASCII = 0;
+                       byte latinDiacritical = 0x7B;
+                       foreach (char c in ipaArray) {
+                               if (c <= 'Z') {
+                                       targetASCII = c;
+                                       latinDiacritical = 0x7B;
+                               }
+                               else
+                                       map [(int) c] = new CharMapEntry (
+                                               0xE,
+                                               map [targetASCII].Level1,
+                                               latinDiacritical++);
+                       }
+
+                       // Greek and Coptic
+
+                       // FIXME: this is (mysterious and) incomplete.
+                       for (int i = 0x0380; i < 0x0400; i++)
+                               if (diacritical [i] == 0 &&
+                                       decompLength [i] == 1 &&
+                                       decompType [i] == DecompositionCompat)
+                                       diacritical [i] = 3;
+
+                       fillIndex [0xF] = 2;
+                       for (int i = 0x0391; i < 0x03AA; i++)
+                               if (i != 0x03A2)
+                                       AddCharMap ((char) i, 0xF, 1,
+                                               diacritical [i]);
+                       fillIndex [0xF] = 2;
+                       for (int i = 0x03B1; i < 0x03CA; i++)
+                               if (i != 0x03C2)
+                                       AddCharMap ((char) i, 0xF, 1,
+                                               diacritical [i]);
+                       // Final Sigma
+                       map [0x03C2] = new CharMapEntry (0xF,
+                               map [0x03C3].Level1, map [0x03C3].Level2);
+
                        fillIndex [0xF] = 0x40;
-                       for (int i = 0x03D0; i < 0x0400; i++)
-                               if (Char.IsLetter ((char) i))
-                                       AddLetterMap ((char) i, 0xF, 1);
+                       for (int i = 0x03DA; i < 0x03F0; i++)
+                               AddCharMap ((char) i, 0xF,
+                                       (byte) (i % 2 == 0 ? 0 : 2),
+                                       diacritical [i]);
+
+                       // NFKD
+                       for (int i = 0x0386; i <= 0x0400; i++)
+                               FillLetterNFKD (i, true, true);
 
                        // Cyrillic.
                        // Cyrillic letters are sorted like Latin letters i.e. 
@@ -2225,6 +2528,10 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                }
                        }
 
+                       // NFKD
+                       for (int i = 0x0401; i <= 0x045F; i++)
+                               FillLetterNFKD (i, false, false);
+
                        for (int i = 0; i < cymap_src.Length; i++) {
                                char c = cymap_src [i];
                                fillIndex [0x10] = map [c].Level1;
@@ -2246,8 +2553,14 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                        // -Letters
                        fillIndex [0x12] = 0x2;
                        for (int i = 0x05D0; i < 0x05FF; i++)
-                               if (Char.IsLetter ((char) i))
-                                       AddLetterMap ((char) i, 0x12, 1);
+                               if (Char.IsLetter ((char) i)) {
+                                       if (isUppercase [i]) {
+                                               fillIndex [0x12]--;
+                                               AddLetterMap ((char) i, 0x12, 2);
+                                       }
+                                       else
+                                               AddLetterMap ((char) i, 0x12, 1);
+                               }
                        // -Accents
                        fillIndex [0x1] = 0x3;
                        for (int i = 0x0591; i <= 0x05C2; i++) {
@@ -2284,7 +2597,8 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                case 0x0649: formDiacritical = 5; break;
                                case 0x064A: formDiacritical = 7; break;
                                }
-                               AddLetterMapCore ((char) i, 0x13, 1, formDiacritical, false);
+//                             AddLetterMapCore ((char) i, 0x13, 1, formDiacritical, false);
+                               AddArabicCharMap ((char) i, 0x13, 1, formDiacritical);
                        }
                        for (int i = 0x0670; i < 0x0673; i++)
                                map [i] = new CharMapEntry (0x13, 0xB, (byte) (0xC + i - 0x670));
@@ -2436,7 +2750,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                        AddLetterMap ((char) i, 0x1, 1);
                                        continue;
                                }
-                               AddLetterMap ((char) i, 0x18, 1);
+                               AddLetterMapCore ((char) i, 0x18, 1, 0, true);
                        }
 
                        // Tamil
@@ -2716,7 +3030,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                        //
 
                        string hangulSequence =
-                       + "\u1100=\u11A8 > \u1101=\u11A9 >"
+                         "\u1100=\u11A8 > \u1101=\u11A9 >"
                        + "\u11C3, \u11AA, \u11C4, \u1102=\u11AB >"
                        + "<{\u1113 \u1116}, \u3165,"
                                + "\u11C5, \u11C6=\u3166,, \u11C7, \u11C8,"
@@ -2874,10 +3188,23 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                        #region 07 - ASCII non-alphanumeric + 3001, 3002 // 07
                        // non-alphanumeric ASCII except for: + - < = > '
                        for (int i = 0x21; i < 0x7F; i++) {
+                               // SPECIAL CASE: 02C6 looks regarded as 
+                               // equivalent to '^', which does not conform 
+                               // to Unicode standard character database.
+                               if (i == 0x005B)
+                                       AddCharMap ('\u2045', 0x7, 0, 0x1C);
+                               if (i == 0x005D)
+                                       AddCharMap ('\u2046', 0x7, 0, 0x1C);
+                               if (i == 0x005E)
+                                       AddCharMap ('\u02C6', 0x7, 0, 3);
+                               if (i == 0x0060)
+                                       AddCharMap ('\u02CB', 0x7, 0, 3);
+
                                if (Char.IsLetterOrDigit ((char) i)
                                        || "+-<=>'".IndexOf ((char) i) >= 0)
                                        continue; // they are not added here.
-                                       AddCharMapGroup2 ((char) i, 0x7, 1, 0);
+
+                               AddCharMapGroup2 ((char) i, 0x7, 1, 0);
                                // Insert 3001 after ',' and 3002 after '.'
                                if (i == 0x2C)
                                        AddCharMapGroup2 ('\u3001', 0x7, 1, 0);
@@ -2895,10 +3222,27 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 
                                // FIXME: actually those reset should not be 
                                // done but here I put for easy goal.
+                               if (i == 0x05C3)
+                                       fillIndex [0x7]++;
                                if (i == 0x0700)
                                        fillIndex [0x7] = 0xE2;
                                if (i == 0x2016)
                                        fillIndex [0x7] = 0x77;
+                               if (i == 0x3008)
+                                       fillIndex [0x7] = 0x93;
+
+                               if (0x02C8 <= i && i <= 0x02CD)
+                                       continue; // nonspacing marks
+
+                               // SPECIAL CASE: maybe they could be allocated
+                               // dummy NFKD mapping and no special processing
+                               // would be required here.
+                               if (i == 0x00AF)
+                                       AddCharMap ('\u02C9', 0x7, 0, 3);
+                               if (i == 0x00B4)
+                                       AddCharMap ('\u02CA', 0x7, 0, 3);
+                               if (i == 0x02C7)
+                                       AddCharMap ('\u02D8', 0x7, 0, 3);
 
                                // SPECIAL CASES:
                                switch (i) {
@@ -2923,68 +3267,99 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                        // SPECIAL CASES: // 0xA
                                        if (0x2020 <= i && i <= 0x2031)
                                                continue;
-                                       AddCharMapGroup ((char) i, 0x7, 1, 0);
+                                       if (i == 0x3003) // added later
+                                               continue;
+                                       AddCharMapGroup2 ((char) i, 0x7, 1, 0);
                                        break;
                                default:
-                                       if (i == 0xA6 || i == 0x1C3) // SPECIAL CASE. FIXME: why?
+                                       if (i == 0xA6 || i == 0x1C3 || i == 0x037A) // SPECIAL CASE. FIXME: why?
                                                goto case UnicodeCategory.OtherPunctuation;
                                        break;
                                }
                        }
+
                        // Control pictures
                        // FIXME: it should not need to reset level 1, but
                        // it's for easy goal.
                        fillIndex [0x7] = 0xB6;
-                       for (int i = 0x2400; i <= 0x2421; i++)
+                       for (int i = 0x2400; i <= 0x2424; i++)
                                AddCharMap ((char) i, 0x7, 1, 0);
 
-                       // Actually 3008-301F and FE33-FE5D are mixed, so
-                       // it's somewhat countable, but not as a whole. Thus
-                       // manual remapping is quicker.
-                       fillIndex [0x7] = 0x8D;
-                       int [] cjkCompatMarks1 = new int [] {
-                               0xFE33, 0xFE49, 0xFE4A, 0xFE4B, 0xFE4C};
-                       int [] cjkCompatMarks2 = new int [] {
-                               0xFE34, 0xFE3F, 0xFE40, 0xFE3D, 0xFE3E, 0xFE41,
-                               0xFE42, 0xFE43, 0xFE44, 0xFE3B, 0xFE3C/*FE5D*/,
-                               0xFE39/*FE5E*/, 0xFE3A};
-                       for (int i = 0; i < cjkCompatMarks1.Length; i++)
-                               map [cjkCompatMarks1 [i]] = new CharMapEntry (
-                                       0x7, fillIndex [0x7]++, 0);
-                       for (int i = 0; i < cjkCompatMarks2.Length; i++) {
-                               map [cjkCompatMarks2 [i]] = new CharMapEntry (
-                                       0x7, fillIndex [0x7], 0);
-                               fillIndex [0x7] += 2;
-                               switch (cjkCompatMarks2 [i]) {
-                               case 0xFE3C:
-                                       map [0xFE5D] = new CharMapEntry (
-                                               0x7, fillIndex [0x7]++, 0);
+                       // FIXME: what are they?
+                       AddCharMap ('\u3003', 0x7, 1);
+                       AddCharMap ('\u3006', 0x7, 1);
+                       AddCharMap ('\u02D0', 0x7, 1);
+                       AddCharMap ('\u10FB', 0x7, 1);
+                       AddCharMap ('\u0950', 0x7, 1);
+                       AddCharMap ('\u093D', 0x7, 1);
+                       AddCharMap ('\u0964', 0x7, 1);
+                       AddCharMap ('\u0965', 0x7, 1);
+                       AddCharMap ('\u0970', 0x7, 1);
+
+                       #endregion
+
+                       #region category 08 - symbols
+                       fillIndex [0x8] = 2;
+                       // Here Windows mapping is not straightforward. It is
+                       // not based on computation but seems manual sorting.
+                       AddCharMapGroup ('+', 0x8, 1, 0); // plus
+                       AddCharMapGroup ('\u2212', 0x8, 1); // minus
+                       AddCharMapGroup ('\u229D', 0x8, 1); // minus
+                       AddCharMapGroup ('\u2297', 0x8, 1); // mul
+                       AddCharMapGroup ('\u2044', 0x8, 1); // div
+                       AddCharMapGroup ('\u2215', 0x8, 0); // div
+                       AddCharMapGroup ('\u2298', 0x8, 1); // div slash
+                       AddCharMapGroup ('\u2217', 0x8, 0); // mul
+                       AddCharMapGroup ('\u229B', 0x8, 1); // asterisk oper
+                       AddCharMapGroup ('\u2218', 0x8, 0); // ring
+                       AddCharMapGroup ('\u229A', 0x8, 1); // ring
+                       AddCharMapGroup ('\u2219', 0x8, 0); // bullet
+                       AddCharMapGroup ('\u2299', 0x8, 1); // dot oper
+                       AddCharMapGroup ('\u2213', 0x8, 1); // minus-or-plus
+                       AddCharMapGroup ('\u003C', 0x8, 1); // <
+                       AddCharMapGroup ('\u227A', 0x8, 1); // precedes relation
+                       AddCharMapGroup ('\u22B0', 0x8, 1); // precedes under relation
+
+                       for (int cp = 0; cp < 0x2300; cp++) {
+                               if (cp == 0xAC) // SPECIAL CASE: skip
+                                       continue;
+                               if (cp == 0x200) {
+                                       cp = 0x2200; // skip to 2200
+                                       fillIndex [0x8] = 0x21;
+                               }
+                               if (cp == 0x2295)
+                                       fillIndex [0x8] = 0x3;
+                               if (cp == 0x22A2)
+                                       fillIndex [0x8] = 0xAB;
+                               if (cp == 0x22B2)
+                                       fillIndex [0x8] = 0xB9;
+                               if (!map [cp].Defined &&
+//                                     Char.GetUnicodeCategory ((char) cp) ==
+//                                     UnicodeCategory.MathSymbol)
+                                       Char.IsSymbol ((char) cp))
+                                       AddCharMapGroup ((char) cp, 0x8, 1);
+                               // SPECIAL CASES: no idea why Windows sorts as such
+                               switch (cp) {
+                               case 0x3E:
+                                       AddCharMap ('\u227B', 0x8, 1, 0);
+                                       AddCharMap ('\u22B1', 0x8, 1, 0);
                                        break;
-                               case 0xFE39:
-                                       map [0xFE5D] = new CharMapEntry (
-                                               0x7, fillIndex [0x7]++, 0);
+                               case 0xB1:
+                                       AddCharMapGroup ('\u00AB', 0x8, 1);
+                                       AddCharMapGroup ('\u226A', 0x8, 1);
+                                       AddCharMapGroup ('\u00BB', 0x8, 1);
+                                       AddCharMapGroup ('\u226B', 0x8, 1);
+                                       break;
+                               case 0xF7:
+                                       AddCharMap ('\u01C0', 0x8, 1, 0);
+                                       AddCharMap ('\u01C1', 0x8, 1, 0);
+                                       AddCharMap ('\u01C2', 0x8, 1, 0);
                                        break;
                                }
                        }
-
-                       fillIndex [0x7] = 0x93;
-                       for (int i = 0x3008; i <= 0x3011; i++) {
-                               map [i] = new CharMapEntry (0x7,
-                                       fillIndex [0x7], 0);
-                               fillIndex [0x7] += 2;
-                       }
-                       fillIndex [0x7] += 3;
-                       map [0x3014] = new CharMapEntry (0x7, fillIndex [0x7], 0);
-                       fillIndex [0x7] += 3;
-                       map [0x3015] = new CharMapEntry (0x7, fillIndex [0x7], 0);
-                       fillIndex [0x7] += 2;
-                       for (int i = 0x3016; i < 0x301F; i++)
-                               map [i] = new CharMapEntry (0x7,
-                                       fillIndex [0x7]++, 0);
-
                        #endregion
 
-                       // FIXME: for 07 xx we need more love.
+                       #region Hack!
 
                        // Characters w/ diacritical marks (NFKD)
                        for (int i = 0; i <= char.MaxValue; i++) {
@@ -3024,61 +3399,8 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                
                        }
 
-                       // category 08 - symbols
-                       fillIndex [0x8] = 2;
-                       // Here Windows mapping is not straightforward. It is
-                       // not based on computation but seems manual sorting.
-                       AddCharMapGroup ('+', 0x8, 1, 0); // plus
-                       AddCharMapGroup ('\u2212', 0x8, 1, 0); // minus
-                       AddCharMapGroup ('\u229D', 0x8, 1, 0); // minus
-                       AddCharMapGroup ('\u2297', 0x8, 1, 0); // mul
-                       AddCharMapGroup ('\u2044', 0x8, 1, 0); // div
-                       AddCharMapGroup ('\u2215', 0x8, 1, 0); // div
-                       AddCharMapGroup ('\u2217', 0x8, 1, 0); // mul
-                       AddCharMapGroup ('\u2218', 0x8, 1, 0); // ring
-                       AddCharMapGroup ('\u2219', 0x8, 1, 0); // bullet
-                       AddCharMapGroup ('\u2213', 0x8, 1, 0); // minus-or-plus
-                       AddCharMapGroup ('\u003C', 0x8, 1, 0); // <
-                       AddCharMapGroup ('\u227A', 0x8, 1, 0); // precedes relation
-                       AddCharMapGroup ('\u22B0', 0x8, 1, 0); // precedes under relation
-
-                       for (int cp = 0; cp < 0x2300; cp++) {
-                               if (cp == 0xAC) // SPECIAL CASE: skip
-                                       continue;
-                               if (cp == 0x200) {
-                                       cp = 0x2200; // skip to 2200
-                                       fillIndex [0x8] = 0x21;
-                               }
-                               if (cp == 0x2295)
-                                       fillIndex [0x8] = 0x3;
-                               if (cp == 0x22B2)
-                                       fillIndex [0x8] = 0xB9;
-                               if (!map [cp].Defined &&
-//                                     Char.GetUnicodeCategory ((char) cp) ==
-//                                     UnicodeCategory.MathSymbol)
-                                       Char.IsSymbol ((char) cp))
-                                       AddCharMapGroup ((char) cp, 0x8, 1, diacritical [cp]);
-                               // SPECIAL CASES: no idea why Windows sorts as such
-                               switch (cp) {
-                               case 0x3E:
-                                       AddCharMap ('\u227B', 0x8, 1, 0);
-                                       AddCharMap ('\u22B1', 0x8, 1, 0);
-                                       break;
-                               case 0xB1:
-                                       AddCharMapGroup ('\u00AB', 0x8, 1, 0);
-                                       AddCharMapGroup ('\u226A', 0x8, 1, 0);
-                                       AddCharMapGroup ('\u00BB', 0x8, 1, 0);
-                                       AddCharMapGroup ('\u226B', 0x8, 1, 0);
-                                       break;
-                               case 0xF7:
-                                       AddCharMap ('\u01C0', 0x8, 1, 0);
-                                       AddCharMap ('\u01C1', 0x8, 1, 0);
-                                       AddCharMap ('\u01C2', 0x8, 1, 0);
-                                       break;
-                               }
-                       }
+                       // Diacritical weight adjustment
 
-                       #region Level2 adjustment
                        // Arabic Hamzah
                        diacritical [0x624] = 0x5;
                        diacritical [0x626] = 0x7;
@@ -3097,6 +3419,10 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                        mod = diacritical [i];
                                        break;
                                case 0x13: // Arabic
+                                       if (i == 0x0621)
+                                               break; // 0
+                                       if (diacritical [i] == 0 && decompLength [i] != 0)
+                                               diacritical [i] = map [decompValues [decompIndex [i]]].Level2;
                                        if (diacritical [i] == 0 && i >= 0xFE8D)
                                                mod = 0x8; // default for arabic
                                        break;
@@ -3107,7 +3433,6 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                        map [i] = new CharMapEntry (
                                                cat, map [i].Level1, mod);
                        }
-                       #endregion
 
                        // FIXME: this is halfly hack but those NonSpacingMark 
                        // characters and still undefined are likely to
@@ -3133,13 +3458,54 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                        AddCharMap ((char) i, 1, 1);
                        }
 
-                       // FIXME: this is hack but those Symbol characters
-                       // are likely to fall into 0xA category.
-                       for (int i = 0; i < char.MaxValue; i++)
-                               if (!map [i].Defined &&
-                                       !IsIgnorable (i) &&
-                                       Char.IsSymbol ((char) i))
-                                       AddCharMap ((char) i, 0xA, 1);
+                       #endregion
+               }
+
+               TextInfo ti = CultureInfo.InvariantCulture.TextInfo;
+
+               private void FillLetterNFKD (int i, bool checkUpper, bool greekRemap)
+               {
+                       if (map [i].Defined)
+                               return;
+                       int up = (int) ti.ToUpper ((char) i);
+                       if (checkUpper && map [up].Category == 0xF) {
+                               if (i == up)
+                                       return;
+                               FillLetterNFKD (up, checkUpper, greekRemap);
+                               map [i] = new CharMapEntry (0xF,
+                                       map [up].Level1,
+                                       map [up].Level2);
+                       } else {
+                               int idx = decompIndex [i];
+                               if (idx == 0)
+                                       return;
+                               int primary = decompValues [decompIndex [i]];
+                               FillLetterNFKD (primary, checkUpper, greekRemap);
+
+                               int lv2 = map [primary].Level2;
+                               byte off = 0;
+                               for (int l = 1; l < decompLength [i]; l++) {
+                                       int tmp = decompValues [idx + l];
+                                       if (map [tmp].Category != 1)
+                                               return;
+                                       if (greekRemap && map [tmp].Level2 == 0xC)
+                                               off += 3;
+                                       else
+                                               off += map [tmp].Level2;
+                               }
+                               if (off > 0) {
+                                       if (lv2 == 0)
+                                               lv2 += 2;
+                                       lv2 += off;
+                               }
+                               // ... but override if the value already exists.
+                               if (diacritical [i] != 0)
+                                       lv2 = diacritical [i];
+                               map [i] = new CharMapEntry (
+                                       map [primary].Category,
+                                       map [primary].Level1,
+                                       (byte) lv2);
+                       }
                }
 
                private void IncrementSequentialIndex (ref byte hangulCat)
@@ -3238,6 +3604,11 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                        DecompositionWide,
                        DecompositionNarrow,
                        };
+               private void AddCharMapGroup (char c, byte category, byte updateCount)
+               {
+                       AddCharMapGroup (c, category, updateCount, 0, true);
+               }
+
                private void AddCharMapGroup (char c, byte category, byte updateCount, byte level2)
                {
                        AddCharMapGroup (c, category, updateCount, level2, false);
@@ -3354,23 +3725,44 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                // For now it is only for 0x7 category.
                private void AddCharMapGroup2 (char c, byte category, byte updateCount, byte level2)
                {
-                       char small = char.MinValue;
-                       char vertical = char.MinValue;
-                       Hashtable nfkd = (Hashtable) nfkdMap [(int) c];
-                       if (nfkd != null) {
-                               object smv = nfkd [(byte) DecompositionSmall];
-                               if (smv != null)
-                                       small = (char) ((int) smv);
-                               object vv = nfkd [(byte) DecompositionVertical];
-                               if (vv != null)
-                                       vertical = (char) ((int) vv);
+                       if (map [(int) c].Defined)
+                               return;
+
+                       bool updateWeight = false;
+                       // Process in advance (lower primary weight)
+                       for (int c2 = 0; c2 < char.MaxValue; c2++) {
+                               if (!map [c2].Defined &&
+                                       decompLength [c2] == 1 &&
+                                       (int) (decompValues [decompIndex [c2]]) == (int) c) {
+                                       switch (decompType [c2]) {
+                                       case DecompositionSmall:
+                                               updateWeight = true;
+                                               AddCharMap ((char) c2, category,
+                                                       0, level2);
+                                               break;
+                                       }
+                               }
                        }
+                       if (updateWeight)
+                               fillIndex [category] = (byte)
+                                       (fillIndex [category] + updateCount);
 
-                       // <small> updates index
-                       if (small != char.MinValue)
-                               // SPECIAL CASE excluded (FIXME: why?)
-                               if (small != '\u2024')
-                                       AddCharMap (small, category, updateCount);
+                       // Identical weight
+                       for (int c2 = 0; c2 < char.MaxValue; c2++) {
+                               if (!map [c2].Defined &&
+                                       decompLength [c2] == 1 &&
+                                       (int) (decompValues [decompIndex [c2]]) == (int) c) {
+                                       switch (decompType [c2]) {
+                                       case DecompositionSub:
+                                       case DecompositionSuper:
+                                       case DecompositionWide:
+                                       case DecompositionNarrow:
+                                               AddCharMap ((char) c2, category,
+                                                       0, level2);
+                                               break;
+                                       }
+                               }
+                       }
 
                        // itself
                        AddCharMap (c, category, updateCount, level2);
@@ -3378,28 +3770,26 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                        // Since nfkdMap is problematic to have two or more
                        // NFKD to an identical character, here I iterate all.
                        for (int c2 = 0; c2 < char.MaxValue; c2++) {
-                               if (decompLength [c2] == 1 &&
+                               if (!map [c2].Defined &&
+                                       decompLength [c2] == 1 &&
                                        (int) (decompValues [decompIndex [c2]]) == (int) c) {
                                        switch (decompType [c2]) {
-                                       case DecompositionCompat:
+                                       case DecompositionWide:
+                                       case DecompositionNarrow:
+                                       case DecompositionSmall:
+                                       case DecompositionSub:
+                                       case DecompositionSuper:
+                                               continue;
+                                       default:
                                                AddCharMap ((char) c2, category, updateCount, level2);
                                                break;
                                        }
                                }
                        }
-
-                       if (vertical != char.MinValue)
-                               // SPECIAL CASE excluded (FIXME: why?)
-                               if (vertical != '\uFE33' && vertical != '\uFE34')
-                                       AddCharMap (vertical, category, updateCount, level2);
                }
 
-               private void AddArabicCharMap (char c)
+               private void AddArabicCharMap (char c, byte category, byte updateCount, byte level2)
                {
-                       byte category = 6;
-                       byte updateCount = 1;
-                       byte level2 = 0;
-
                        // itself
                        AddCharMap (c, category, 0, level2);
 
@@ -3497,22 +3887,32 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                        if ('\u2160' <= c && c <= '\u216F')
                                return 0x10;
                        if ('\u2181' <= c && c <= '\u2182')
-                               return 0x18;
+                               return 0x10;
                        // Arabic
                        if ('\u2135' <= c && c <= '\u2138')
                                return 4;
-                       if ('\uFE80' <= c && c < '\uFF00') {
+                       // I believe that Windows has a bug on setting level 3
+                       // weight here. NFKD results in different values.
+                       if ('\uFE80' < c && c < '\uFF00') {
                                // 2(Isolated)/8(Final)/0x18(Medial)
                                switch (decompType [(int) c]) {
                                case DecompositionIsolated:
-                                       return 2;
+                                       return 0; // 2;
                                case DecompositionFinal:
                                        return 8;
                                case DecompositionMedial:
                                        return 0x18;
+                               case DecompositionInitial:
+                                       return 0x10;
                                }
                        }
 
+                       // I have no idea why those symbols have level 3 weight
+                       if (c == '\u2104' || c == '\u212B')
+                               return 0x18;
+                       if ('\u211E' <= c && c <= '\u212B')
+                               return 0x10;
+
                        // actually I dunno the reason why they have weights.
                        switch (c) {
                        case '\u01BC':
@@ -3533,12 +3933,11 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                        byte ret = 0;
                        switch (c) {
                        case '\u03C2':
-                       case '\u2104':
                        case '\u212B':
-                               ret |= 8;
+                               ret = 8;
                                break;
                        case '\uFE42':
-                               ret |= 0xC;
+                               ret = 0xA;
                                break;
                        }