svn path=/branches/mono-1-1-9/mcs/; revision=51212
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / create-mscompat-collation-table.cs
index 2e7a3ec5ac6b181c54fa826b2ba4bf3a7dd82bb1..02bee38d373c50a0221d9d970d25cbfcb98d3590 100644 (file)
@@ -1,4 +1,31 @@
 //
+// create-mscompat-collation-table.cs : generates Windows-like sortkey tables.
+//
+// Author:
+//     Atsushi Enomoto  <atsushi@ximian.com>
+//
+// Copyright (C) 2005 Novell, Inc (http://www.novell.com)
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the
+// "Software"), to deal in the Software without restriction, including
+// without limitation the rights to use, copy, modify, merge, publish,
+// distribute, sublicense, and/or sell copies of the Software, and to
+// permit persons to whom the Software is furnished to do so, subject to
+// the following conditions:
+// 
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
 //
 // There are two kind of sort keys : which are computed and which are laid out
 // as an indexed array. Computed sort keys are:
 //     - Surrogate
 //     - PrivateUse
 //
-// Also, for composite characters it should prepare different index table.
-//
 // Though it is possible to "compute" level 3 weights, they are still dumped
 // to an array to avoid execution cost.
 //
-
-//
-// * sortkey getter signature
-//
-//     int GetSortKey (string s, int index, SortKeyBuffer buf)
-//     Stores sort key for corresponding character element into buf and
-//     returns the length of the consumed _source_ character element in s.
-//
-// * character length to consume
-//
-//     If there are characters whose primary weight is 0, they are consumed
-//     and considered as a part of the character element.
-//
+#define Binary
 
 using System;
 using System.IO;
 using System.Collections;
 using System.Globalization;
+using System.Text;
 using System.Xml;
 
+using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
+
 namespace Mono.Globalization.Unicode
 {
        internal class MSCompatSortKeyTableGenerator
@@ -59,7 +75,8 @@ namespace Mono.Globalization.Unicode
                const int DecompositionCompat = 0x11;
                const int DecompositionCanonical = 0x12;
 
-               TextWriter Result = Console.Out;
+               TextWriter CSResult = Console.Out;
+               TextWriter CResult = TextWriter.Null;
 
                byte [] fillIndex = new byte [256]; // by category
                CharMapEntry [] map = new CharMapEntry [char.MaxValue + 1];
@@ -94,19 +111,32 @@ namespace Mono.Globalization.Unicode
                byte [] diacritical = new byte [char.MaxValue + 1];
 
                string [] diacritics = new string [] {
-                       // LATIN
-                       "WITH ACUTE;", "WITH GRAVE;", " DOT ABOVE;", " MIDDLE DOT;",
-                       "WITH CIRCUMFLEX;", "WITH DIAERESIS;", "WITH CARON;", "WITH BREVE;",
-                       " DIALYTIKA AND TONOS;", "WITH MACRON;", "WITH TILDE;", " RING ABOVE;",
-                       " OGONEK;", " CEDILLA;",
+                       // LATIN, CYRILLIC etc.
+                       "VERTICAL LINE ABOVE", "UPTURN", "DOUBLE-STRUCK",
+                       "ABKHASIAN",
+                       "MIDDLE HOOK", "WITH VERTICAL LINE ABOVE;", "WITH TONOS",
+                       "WITH ACUTE ACCENT;", "WITH GRAVE ACCENT;",
+                       "WITH ACUTE;", "WITH GRAVE;",
+                       //
+                       "WITH DOT ABOVE;", " MIDDLE DOT;",
+                       "WITH CIRCUMFLEX ACCENT;", "WITH CIRCUMFLEX;",
+                       "WITH DIALYTIKA;",
+                       "WITH DIAERESIS;", "WITH CARON;", "WITH BREVE;",
+                       "DIALYTIKA TONOS", "DIALYTIKA AND TONOS",
+                       "ABKHASIAN CHE WITH DESCENDER",
+                       "WITH MACRON;", "WITH TILDE;", "WITH RING ABOVE;",
+                       "WITH OGONEK;", "WITH CEDILLA;",
+                       //
                        " DOUBLE ACUTE;", " ACUTE AND DOT ABOVE;",
-                       " STROKE;", " CIRCUMFLEX AND ACUTE;",
+                       "WITH STROKE;", " CIRCUMFLEX AND ACUTE;",
+                       "STROKE OVERLAY",
                        " DIAERESIS AND ACUTE;", "WITH CIRCUMFLEX AND GRAVE;", " L SLASH;",
                        " DIAERESIS AND GRAVE;",
                        " BREVE AND ACUTE;",
                        " CARON AND DOT ABOVE;", " BREVE AND GRAVE;",
                        " MACRON AND ACUTE;",
                        " MACRON AND GRAVE;",
+                       //
                        " DIAERESIS AND CARON", " DOT ABOVE AND MACRON", " TILDE AND ACUTE",
                        " RING ABOVE AND ACUTE",
                        " DIAERESIS AND MACRON", " CEDILLA AND ACUTE", " MACRON AND DIAERESIS",
@@ -116,45 +146,63 @@ namespace Mono.Globalization.Unicode
                        " BREVE AND TILDE",
                        " CEDILLA AND BREVE",
                        " OGONEK AND MACRON",
-                       " HOOK;", "LEFT HOOK;", " WITH HOOK ABOVE;",
-                       " DOUBLE GRAVE;",
+                       // 0x40
+                       "WITH OVERLINE", "DOUBLE VERTICAL LINE ABOVE",
+                       "WITH HOOK;", "LEFT HOOK;", " WITH HOOK ABOVE;",
+                       " DOUBLE GRAVE",
                        " INVERTED BREVE",
+                       "ROMAN NUMERAL",
                        " PRECEDED BY APOSTROPHE",
-                       " HORN;",
+                       "WITH HORN;",
                        " LINE BELOW;", " CIRCUMFLEX AND HOOK ABOVE",
                        " PALATAL HOOK",
                        " DOT BELOW;",
-                       " RETROFLEX;", "DIAERESIS BELOW",
-                       " RING BELOW",
+                       " RETROFLEX;", "DIAERESIS BELOW", "RETROFLEX HOOK",
+                       " RING BELOW", "LOW VERTICAL LINE",
+                       //
                        " CIRCUMFLEX BELOW", "HORN AND ACUTE",
                        " BREVE BELOW;", " HORN AND GRAVE",
+                       " LOW MACRON",
                        " TILDE BELOW",
+                       " TOPBAR",
                        " DOT BELOW AND DOT ABOVE",
                        " RIGHT HALF RING", " HORN AND TILDE",
                        " CIRCUMFLEX AND DOT BELOW",
                        " BREVE AND DOT BELOW",
                        " DOT BELOW AND MACRON",
+                       " TONE TWO",
                        " HORN AND HOOK ABOVE",
                        " HORN AND DOT",
                        // CIRCLED, PARENTHESIZED and so on
-                       "CIRCLED DIGIT", "CIRCLED NUMBER", "CIRCLED LATIN", "CIRCLED KATAKANA",
+                       "CIRCLED DIGIT", "CIRCLED NUMBER", "CIRCLED LATIN",
+                       "CIRCLED KATAKANA", "CIRCLED SANS-SERIF",
                        "PARENTHESIZED DIGIT", "PARENTHESIZED NUMBER", "PARENTHESIZED LATIN",
                        };
                byte [] diacriticWeights = new byte [] {
                        // LATIN.
-                       0xE, 0xF, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
-                       0x17, 0x19, 0x1A, 0x1B, 0x1C,
-                       0x1D, 0x1D, 0x1E, 0x1E, 0x1F, 0x1F, 0x1F,
+                       3, 3, 3, 5, 5, 5, 5,
+                       0xE, 0xF,
+                       0xE, 0xF,
+                       //
+                       0x10, 0x11, 0x12, 0x12, 0x13, 0x13, 0x14, 0x15, 0x16,
+                       0x16, 0x17, 0x17, 0x19, 0x1A, 0x1B, 0x1C,
+                       //
+                       0x1D, 0x1D, 0x1E, 0x1E, 0x1E, 0x1F, 0x1F, 0x1F,
                        0x20, 0x21, 0x22, 0x22, 0x23, 0x24,
+                       //
                        0x25, 0x25, 0x25, 0x26, 0x28, 0x28, 0x28,
                        0x29, 0x2A, 0x2B, 0x2C, 0x2F, 0x30,
-                       0x43, 0x43, 0x43, 0x44, 0x46, 0x48,
-                       0x52, 0x55, 0x55, 0x57, 0x58, 0x59, 0x59, 0x5A,
-                       0x60, 0x60, 0x61, 0x61, 0x63, 0x68, 
+                       //
+                       0x40, 0x41, 0x43, 0x43, 0x43, 0x44, 0x46, 0x47, 0x48,
+                       0x52, 0x55, 0x55, 0x57, 0x58, 0x59, 0x59, 0x59,
+                       0x5A, 0x5A,
+                       //
+                       0x60, 0x60, 0x61, 0x61, 0x62, 0x63, 0x68, 0x68,
                        0x69, 0x69, 0x6A, 0x6D, 0x6E,
-                       0x95, 0xAA,
+                       0x87, 0x95, 0xAA,
                        // CIRCLED, PARENTHESIZED and so on.
-                       0xEE, 0xEE, 0xEE, 0xEE, 0xF3, 0xF3, 0xF3, 0xF3
+                       0xEE, 0xEE, 0xEE, 0xEE, 0xEE,
+                       0xF3, 0xF3, 0xF3
                        };
 
                int [] numberSecondaryWeightBounds = new int [] {
@@ -165,7 +213,6 @@ namespace Mono.Globalization.Unicode
                        0xE50, 0xE60, 0xED0, 0xEE0
                        };
 
-               char [] orderedCyrillic;
                char [] orderedGurmukhi;
                char [] orderedGujarati;
                char [] orderedGeorgian;
@@ -175,18 +222,21 @@ namespace Mono.Globalization.Unicode
                        // based on traditional Tamil consonants, except for
                        // Grantha (where Microsoft breaks traditionalism).
                        // http://www.angelfire.com/empire/thamizh/padanGaL
-                       '\u0B99', '\u0B9A', '\u0B9E', '\u0B9F', '\u0BA3',
-                       '\u0BA4', '\u0BA8', '\u0BAA', '\u0BAE', '\u0BAF',
-                       '\u0BB0', '\u0BB2', '\u0BB5', '\u0BB4', '\u0BB3',
-                       '\u0BB1', '\u0BA9', '\u0B9C', '\u0BB8', '\u0BB7',
-                       '\u0BB9'};
+                       '\u0B95', '\u0B99', '\u0B9A', '\u0B9E', '\u0B9F',
+                       '\u0BA3', '\u0BA4', '\u0BA8', '\u0BAA', '\u0BAE',
+                       '\u0BAF', '\u0BB0', '\u0BB2', '\u0BB5', '\u0BB4',
+                       '\u0BB3', '\u0BB1', '\u0BA9', '\u0B9C', '\u0BB8',
+                       '\u0BB7', '\u0BB9'};
 
                // cp -> character name (only for some characters)
                ArrayList sortableCharNames = new ArrayList ();
 
-               // cp -> arrow value 0:UPWARDS 1:RIGHTWARDS 2:DOWNWARDS 3:LEFTWARDS
+               // cp -> arrow value (int)
                ArrayList arrowValues = new ArrayList ();
 
+               // cp -> box value (int)
+               ArrayList boxValues = new ArrayList ();
+
                // cp -> level1 value
                Hashtable arabicLetterPrimaryValues = new Hashtable ();
 
@@ -202,127 +252,285 @@ namespace Mono.Globalization.Unicode
                ArrayList jisJapanese = new ArrayList ();
                ArrayList nonJisJapanese = new ArrayList ();
 
-               ushort [] cjkJA = new ushort [char.MaxValue - 0x4E00];
-               ushort [] cjkCHS = new ushort [char.MaxValue - 0x3100];
-               ushort [] cjkCHT = new ushort [char.MaxValue - 0x4E00];
-               ushort [] cjkKO = new ushort [char.MaxValue - 0x4E00];
-               byte [] cjkKOlv2 = new byte [char.MaxValue - 0x4E00];
+               ushort [] cjkJA = new ushort [char.MaxValue +1];// - 0x4E00];
+               ushort [] cjkCHS = new ushort [char.MaxValue +1];// - 0x3100];
+               ushort [] cjkCHT = new ushort [char.MaxValue +1];// - 0x4E00];
+               ushort [] cjkKO = new ushort [char.MaxValue +1];// - 0x4E00];
+               byte [] cjkKOlv2 = new byte [char.MaxValue +1];// - 0x4E00];
 
                byte [] ignorableFlags = new byte [char.MaxValue + 1];
 
-               double [] unicodeAge = new double [char.MaxValue + 1];
+               static double [] unicodeAge = new double [char.MaxValue + 1];
+
+               ArrayList tailorings = new ArrayList ();
 
                void Run (string [] args)
                {
                        string dirname = args.Length == 0 ? "downloaded" : args [0];
-                       FillIgnorables ();
-
                        ParseSources (dirname);
                        Console.Error.WriteLine ("parse done.");
 
                        ModifyParsedValues ();
                        GenerateCore ();
                        Console.Error.WriteLine ("generation done.");
+                       CResult = new StreamWriter ("collation-tables.h", false);
                        Serialize ();
+                       CResult.Close ();
                        Console.Error.WriteLine ("serialization done.");
+/*
+StreamWriter sw = new StreamWriter ("agelog.txt");
+for (int i = 0; i < char.MaxValue; i++) {
+bool shouldBe = false;
+switch (Char.GetUnicodeCategory ((char) i)) {
+case UnicodeCategory.Format: case UnicodeCategory.OtherNotAssigned:
+       shouldBe = true; break;
+}
+if (unicodeAge [i] >= 3.1)
+       shouldBe = true;
+//if (IsIgnorable (i) != shouldBe)
+sw.WriteLine ("{1} {2} {3} {0:X04} {4} {5}", i, unicodeAge [i], IsIgnorable (i), IsIgnorableSymbol (i), char.GetUnicodeCategory ((char) i), IsIgnorable (i) != shouldBe ? '!' : ' ');
+}
+sw.Close ();
+*/
+               }
+
+               byte [] CompressArray (byte [] source, CodePointIndexer i)
+               {
+                       return (byte []) CodePointIndexer.CompressArray  (
+                               source, typeof (byte), i);
+               }
+
+               ushort [] CompressArray (ushort [] source, CodePointIndexer i)
+               {
+                       return (ushort []) CodePointIndexer.CompressArray  (
+                               source, typeof (ushort), i);
+               }
+
+               void WriteByte (byte value)
+               {
+                       
                }
 
                void Serialize ()
                {
+                       // Tailorings
+                       SerializeTailorings ();
+
+                       byte [] categories = new byte [map.Length];
+                       byte [] level1 = new byte [map.Length];
+                       byte [] level2 = new byte [map.Length];
+                       byte [] level3 = new byte [map.Length];
+// widthCompat is now removed from the mapping table.
+// If it turned out that it is still required, grep this source and uncomment
+// widthCompat related lines. FIXME: remove those lines in the future.
+//                     ushort [] widthCompat = new ushort [map.Length];
+                       for (int i = 0; i < map.Length; i++) {
+                               categories [i] = map [i].Category;
+                               level1 [i] = map [i].Level1;
+                               level2 [i] = map [i].Level2;
+                               level3 [i] = ComputeLevel3Weight ((char) i);
+/*
+                               // For Japanese Half-width characters, don't
+                               // map widthCompat. It is IgnoreKanaType that
+                               // handles those width differences.
+                               if (0xFF6D <= i && i <= 0xFF9D)
+                                       continue;
+                               switch (decompType [i]) {
+                               case DecompositionNarrow:
+                               case DecompositionWide:
+                               case DecompositionSuper:
+                               case DecompositionSub:
+                                       // they are always 1 char
+                                       widthCompat [i] = (ushort) decompValues [decompIndex [i]];
+                                       break;
+                               }
+*/
+                       }
+
+                       // compress
+                       ignorableFlags = CompressArray (ignorableFlags,
+                               UUtil.Ignorable);
+                       categories = CompressArray (categories, UUtil.Category);
+                       level1 = CompressArray (level1, UUtil.Level1);
+                       level2 = CompressArray (level2, UUtil.Level2);
+                       level3 = CompressArray (level3, UUtil.Level3);
+//                     widthCompat = (ushort []) CodePointIndexer.CompressArray (
+//                             widthCompat, typeof (ushort), UUtil.WidthCompat);
+                       cjkCHS = CompressArray (cjkCHS, UUtil.CjkCHS);
+                       cjkCHT = CompressArray (cjkCHT,UUtil.Cjk);
+                       cjkJA = CompressArray (cjkJA, UUtil.Cjk);
+                       cjkKO = CompressArray (cjkKO, UUtil.Cjk);
+                       cjkKOlv2 = CompressArray (cjkKOlv2, UUtil.Cjk);
+
                        // Ignorables
-                       Result.WriteLine ("static byte [] ignorableFlags = new byte [] {");
-                       for (int i = 0; i <= char.MaxValue; i++) {
+                       CResult.WriteLine ("static const guint8  collation_table_ignorableFlags [] = {");
+                       CSResult.WriteLine ("static readonly byte [] ignorableFlagsArr = new byte [] {");
+#if Binary
+                       MemoryStream ms = new MemoryStream ();
+                       BinaryWriter binary = new BinaryWriter (ms);
+                       binary.Write (UUtil.ResourceVersion);
+                       binary.Write (ignorableFlags.Length);
+#endif
+                       for (int i = 0; i < ignorableFlags.Length; i++) {
                                byte value = ignorableFlags [i];
                                if (value < 10)
-                                       Result.Write ("{0},", value);
+                                       CSResult.Write ("{0},", value);
                                else
-                                       Result.Write ("0x{0:X02},", value);
-                               if ((i & 0xF) == 0xF)
-                                       Result.WriteLine ("// {0:X04}", i - 0xF);
+                                       CSResult.Write ("0x{0:X02},", value);
+                               CResult.Write ("{0},", value);
+#if Binary
+                               binary.Write (value);
+#endif
+                               if ((i & 0xF) == 0xF) {
+                                       CSResult.WriteLine ("// {0:X04}",
+                                               UUtil.Ignorable.ToCodePoint (i - 0xF));
+                                       CResult.WriteLine ();
+                               }
                        }
-                       Result.WriteLine ("};");
-                       Result.WriteLine ();
+                       CResult.WriteLine ("0};");
+                       CSResult.WriteLine ("};");
+                       CSResult.WriteLine ();
 
                        // Primary category
-                       Result.WriteLine ("static byte [] categories = new byte [] {");
-                       for (int i = 0; i < map.Length; i++) {
-                               byte value = map [i].Category;
+                       CResult.WriteLine ("static const guint8 collation_table_category [] = {");
+                       CSResult.WriteLine ("static readonly byte [] categoriesArr = new byte [] {");
+#if Binary
+                       binary.Write (categories.Length);
+#endif
+                       for (int i = 0; i < categories.Length; i++) {
+                               byte value = categories [i];
                                if (value < 10)
-                                       Result.Write ("{0},", value);
+                                       CSResult.Write ("{0},", value);
                                else
-                                       Result.Write ("0x{0:X02},", value);
-                               if ((i & 0xF) == 0xF)
-                                       Result.WriteLine ("// {0:X04}", i - 0xF);
+                                       CSResult.Write ("0x{0:X02},", value);
+                               CResult.Write ("{0},", value);
+#if Binary
+                               binary.Write (value);
+#endif
+                               if ((i & 0xF) == 0xF) {
+                                       CSResult.WriteLine ("// {0:X04}",
+                                               UUtil.Category.ToCodePoint (i - 0xF));
+                                       CResult.WriteLine ();
+                               }
                        }
-                       Result.WriteLine ("};");
-                       Result.WriteLine ();
+                       CResult.WriteLine ("};");
+                       CSResult.WriteLine ("};");
+                       CSResult.WriteLine ();
 
                        // Primary weight value
-                       Result.WriteLine ("static byte [] level1 = new byte [] {");
-                       for (int i = 0; i < map.Length; i++) {
-                               byte value = map [i].Level1;
+                       CResult.WriteLine ("static const guint8 collation_table_level1 [] = {");
+                       CSResult.WriteLine ("static readonly byte [] level1Arr = new byte [] {");
+#if Binary
+                       binary.Write (level1.Length);
+#endif
+                       for (int i = 0; i < level1.Length; i++) {
+                               byte value = level1 [i];
                                if (value < 10)
-                                       Result.Write ("{0},", value);
+                                       CSResult.Write ("{0},", value);
                                else
-                                       Result.Write ("0x{0:X02},", value);
-                               if ((i & 0xF) == 0xF)
-                                       Result.WriteLine ("// {0:X04}", i - 0xF);
+                                       CSResult.Write ("0x{0:X02},", value);
+                               CResult.Write ("{0},", value);
+#if Binary
+                               binary.Write (value);
+#endif
+                               if ((i & 0xF) == 0xF) {
+                                       CSResult.WriteLine ("// {0:X04}",
+                                               UUtil.Level1.ToCodePoint (i - 0xF));
+                                       CResult.WriteLine ();
+                               }
                        }
-                       Result.WriteLine ("};");
-                       Result.WriteLine ();
+                       CResult.WriteLine ("0};");
+                       CSResult.WriteLine ("};");
+                       CSResult.WriteLine ();
 
                        // Secondary weight
-                       Result.WriteLine ("static byte [] level2 = new byte [] {");
-                       for (int i = 0; i < map.Length; i++) {
-                               int value = map [i].Level2;
+                       CResult.WriteLine ("static const guint8 collation_table_level2 [] = {");
+                       CSResult.WriteLine ("static readonly byte [] level2Arr = new byte [] {");
+#if Binary
+                       binary.Write (level2.Length);
+#endif
+                       for (int i = 0; i < level2.Length; i++) {
+                               byte value = level2 [i];
                                if (value < 10)
-                                       Result.Write ("{0},", value);
+                                       CSResult.Write ("{0},", value);
                                else
-                                       Result.Write ("0x{0:X02},", value);
-                               if ((i & 0xF) == 0xF)
-                                       Result.WriteLine ("// {0:X04}", i - 0xF);
+                                       CSResult.Write ("0x{0:X02},", value);
+                               CResult.Write ("{0},", value);
+#if Binary
+                               binary.Write (value);
+#endif
+                               if ((i & 0xF) == 0xF) {
+                                       CSResult.WriteLine ("// {0:X04}",
+                                               UUtil.Level2.ToCodePoint (i - 0xF));
+                                       CResult.WriteLine ();
+                               }
                        }
-                       Result.WriteLine ("};");
-                       Result.WriteLine ();
+                       CResult.WriteLine ("0};");
+                       CSResult.WriteLine ("};");
+                       CSResult.WriteLine ();
 
                        // Thirtiary weight
-                       Result.WriteLine ("static byte [] level3 = new byte [] {");
-                       for (int i = 0; i < map.Length; i++) {
-                               byte value = ComputeLevel3Weight ((char) i);
+                       CResult.WriteLine ("static const guint8 collation_table_level3 [] = {");
+                       CSResult.WriteLine ("static readonly byte [] level3Arr = new byte [] {");
+#if Binary
+                       binary.Write (level3.Length);
+#endif
+                       for (int i = 0; i < level3.Length; i++) {
+                               byte value = level3 [i];
                                if (value < 10)
-                                       Result.Write ("{0},", value);
+                                       CSResult.Write ("{0},", value);
                                else
-                                       Result.Write ("0x{0:X02},", value);
-                               if ((i & 0xF) == 0xF)
-                                       Result.WriteLine ("// {0:X04}", i - 0xF);
+                                       CSResult.Write ("0x{0:X02},", value);
+                               CResult.Write ("{0},", value);
+#if Binary
+                               binary.Write (value);
+#endif
+                               if ((i & 0xF) == 0xF) {
+                                       CSResult.WriteLine ("// {0:X04}",
+                                               UUtil.Level3.ToCodePoint (i - 0xF));
+                                       CResult.WriteLine ();
+                               }
                        }
-                       Result.WriteLine ("};");
-                       Result.WriteLine ();
+                       CResult.WriteLine ("0};");
+                       CSResult.WriteLine ("};");
+                       CSResult.WriteLine ();
 
+/*
                        // Width insensitivity mappings
                        // (for now it is more lightweight than dumping the
                        // entire NFKD table).
-                       Result.WriteLine ("static int [] widthCompat = new int [] {");
-                       for (int i = 0; i < char.MaxValue; i++) {
-                               int value = 0;
-                               switch (decompType [i]) {
-                               case DecompositionNarrow:
-                               case DecompositionWide:
-                               case DecompositionSuper:
-                               case DecompositionSub:
-                                       // they are always 1 char
-                                       value = decompValues [decompIndex [i]];
-                                       break;
-                               }
+                       CResult.WriteLine ("static const guint16* widthCompat [] = {");
+                       CSResult.WriteLine ("static readonly ushort [] widthCompatArr = new ushort [] {");
+#if Binary
+                       binary.Write (widthCompat.Length);
+#endif
+                       for (int i = 0; i < widthCompat.Length; i++) {
+                               ushort value = widthCompat [i];
                                if (value < 10)
-                                       Result.Write ("{0},", value);
+                                       CSResult.Write ("{0},", value);
                                else
-                                       Result.Write ("0x{0:X04},", value);
-                               if ((i & 0xF) == 0xF)
-                                       Result.WriteLine ("// {0:X04}", i - 0xF);
+                                       CSResult.Write ("0x{0:X02},", value);
+                               CResult.Write ("{0},", value);
+#if Binary
+                               binary.Write (value);
+#endif
+                               if ((i & 0xF) == 0xF) {
+                                       CSResult.WriteLine ("// {0:X04}",
+                                               UUtil.WidthCompat.ToCodePoint (i - 0xF));
+                                       CResult.WriteLine ();
+                               }
+                       }
+                       CResult.WriteLine ("0};");
+                       CSResult.WriteLine ("};");
+                       CSResult.WriteLine ();
+*/
+
+#if Binary
+                       using (FileStream fs = File.Create ("../resources/collation.core.bin")) {
+                               byte [] array = ms.ToArray ();
+                               fs.Write (array, 0, array.Length);
                        }
-                       Result.WriteLine ("};");
-                       Result.WriteLine ();
+#endif
 
                        // CJK
                        SerializeCJK ("cjkCHS", cjkCHS, char.MaxValue);
@@ -332,42 +540,195 @@ namespace Mono.Globalization.Unicode
                        SerializeCJK ("cjkKOlv2", cjkKOlv2, 0x9FB0);
                }
 
-               void SerializeCJK (string name, ushort [] cjk, int max)
+               void SerializeCJK (string name, ushort [] cjk, int max_unused)
                {
-                       int offset = char.MaxValue - cjk.Length;
-                       Result.WriteLine ("static ushort [] {0} = new ushort [] {{", name);
+//                     CResult.WriteLine ("static const int collation_table_collation_cjk_{0}_size [] = {1};", name, cjk.Length);
+                       CSResult.WriteLine ("const int {0}ArrLength = {1};", name, cjk.Length);
+
+                       int len = cjk.Length;
+                       CResult.WriteLine ("static const guint8 collation_table_collation_cjk_{0} [] = {{", name);
+                       CSResult.WriteLine ("static byte [] {0}Arr = new byte [] {{", name);
+                       // the actual length is *2
+                       for (int i = 0; i < 4; i++, len /= 256) {
+                               CResult.Write ("{0},", len & 0xFF);
+                               CSResult.Write ("0x{0:X04},", len & 0xFF);
+                       }
+                       CResult.WriteLine ();
+                       CSResult.WriteLine ();
+#if Binary
+                       MemoryStream ms = new MemoryStream ();
+                       BinaryWriter binary = new BinaryWriter (ms);
+                       binary.Write (UUtil.ResourceVersion);
+                       binary.Write (cjk.Length); // the actual size is *2.
+#endif
+                       // category
                        for (int i = 0; i < cjk.Length; i++) {
-                               if (i + offset == max)
-                                       break;
-                               ushort value = cjk [i];
+//                             if (i == max)
+//                                     break;
+                               byte value = (byte) (cjk [i] >> 8);
+                               if (value < 10)
+                                       CSResult.Write ("{0},", value);
+                               else
+                                       CSResult.Write ("0x{0:X02},", value);
+                               CResult.Write ("{0},", value);
+#if Binary
+                               binary.Write (value);
+#endif
+                               if ((i & 0xF) == 0xF) {
+                                       CSResult.WriteLine ("// {0:X04}", i - 0xF);
+                                       CResult.WriteLine ();
+                               }
+                       }
+
+                       // level 1
+                       for (int i = 0; i < cjk.Length; i++) {
+//                             if (i == max)
+//                                     break;
+                               byte value = (byte) (cjk [i] & 0xFF);
                                if (value < 10)
-                                       Result.Write ("{0},", value);
+                                       CSResult.Write ("{0},", value);
                                else
-                                       Result.Write ("0x{0:X04},", value);
-                               if ((i & 0xF) == 0xF)
-                                       Result.WriteLine ("// {0:X04}", i - 0xF + offset);
+                                       CSResult.Write ("0x{0:X02},", value);
+                               CResult.Write ("{0},", value);
+#if Binary
+                               binary.Write (value);
+#endif
+                               if ((i & 0xF) == 0xF) {
+                                       CSResult.WriteLine ("// {0:X04}", i - 0xF);
+                                       CResult.WriteLine ();
+                               }
+                       }
+
+                       CResult.WriteLine ("0};");
+                       CSResult.WriteLine ("};");
+                       CSResult.WriteLine ();
+#if Binary
+                       using (FileStream fs = File.Create (String.Format ("../resources/collation.{0}.bin", name))) {
+                               byte [] array = ms.ToArray ();
+                               fs.Write (array, 0, array.Length);
                        }
-                       Result.WriteLine ("};");
-                       Result.WriteLine ();
+#endif
                }
 
                void SerializeCJK (string name, byte [] cjk, int max)
                {
-                       int offset = char.MaxValue - cjk.Length;
-                       Result.WriteLine ("static byte [] {0} = new byte [] {{", name);
+                       CResult.WriteLine ("static const guint8 collation_table_collation_cjk_{0} [] = {{", name);
+                       CSResult.WriteLine ("static byte [] {0}Arr = new byte [] {{", name);
+#if Binary
+                       MemoryStream ms = new MemoryStream ();
+                       BinaryWriter binary = new BinaryWriter (ms);
+                       binary.Write (UUtil.ResourceVersion);
+#endif
                        for (int i = 0; i < cjk.Length; i++) {
-                               if (i + offset == max)
+                               if (i == max)
                                        break;
                                byte value = cjk [i];
                                if (value < 10)
-                                       Result.Write ("{0},", value);
+                                       CSResult.Write ("{0},", value);
                                else
-                                       Result.Write ("0x{0:X02},", value);
-                               if ((i & 0xF) == 0xF)
-                                       Result.WriteLine ("// {0:X04}", i - 0xF + offset);
+                                       CSResult.Write ("0x{0:X02},", value);
+                               CResult.Write ("{0},", value);
+#if Binary
+                               binary.Write (value);
+#endif
+                               if ((i & 0xF) == 0xF) {
+                                       CSResult.WriteLine ("// {0:X04}", i - 0xF);
+                                       CResult.WriteLine ();
+                               }
+                       }
+                       CResult.WriteLine ("0};");
+                       CSResult.WriteLine ("};");
+                       CSResult.WriteLine ();
+#if Binary
+                       using (FileStream fs = File.Create (String.Format ("../resources/collation.{0}.bin", name))) {
+                               byte [] array = ms.ToArray ();
+                               fs.Write (array, 0, array.Length);
                        }
-                       Result.WriteLine ("};");
-                       Result.WriteLine ();
+#endif
+               }
+
+               void SerializeTailorings ()
+               {
+                       Hashtable indexes = new Hashtable ();
+                       Hashtable counts = new Hashtable ();
+                       CResult.WriteLine ("static const guint16 collation_table_tailoring [] = {");
+                       CSResult.WriteLine ("static char [] tailoringArr = new char [] {");
+                       int count = 0;
+#if Binary
+                       MemoryStream ms = new MemoryStream ();
+                       BinaryWriter binary = new BinaryWriter (ms);
+                       // Here we don't need to output resource version.
+                       // This is cached.
+#endif
+                       foreach (Tailoring t in tailorings) {
+                               if (t.Alias != 0)
+                                       continue;
+                               CResult.Write ("/*{0}*/", t.LCID);
+                               CSResult.Write ("/*{0}*/", t.LCID);
+                               indexes.Add (t.LCID, count);
+                               char [] values = t.ItemToCharArray ();
+                               counts.Add (t.LCID, values.Length);
+                               foreach (char c in values) {
+                                       CSResult.Write ("'\\x{0:X}', ", (int) c);
+                                       CResult.Write ("{0},", (int) c);
+                                       if (++count % 16 == 0) {
+                                               CSResult.WriteLine (" // {0:X04}", count - 16);
+                                               CResult.WriteLine ();
+                                       }
+#if Binary
+                                       binary.Write ((ushort) c);
+#endif
+                               }
+                       }
+                       CResult.WriteLine ("0};");
+                       CSResult.WriteLine ("};");
+
+                       CResult.WriteLine ("static const guint32 collation_table_tailoring_infos [] = {");
+                       CResult.WriteLine ("{0}, /*count*/", tailorings.Count);
+                       CSResult.WriteLine ("static TailoringInfo [] tailoringInfos = new TailoringInfo [] {");
+#if Binary
+                       byte [] rawdata = ms.ToArray ();
+                       ms = new MemoryStream ();
+                       binary = new BinaryWriter (ms);
+                       binary.Write (UUtil.ResourceVersion);
+                       binary.Write (tailorings.Count);
+#endif
+                       foreach (Tailoring t in tailorings) {
+                               int target = t.Alias != 0 ? t.Alias : t.LCID;
+                               if (!indexes.ContainsKey (target)) {
+                                       throw new Exception (String.Format ("WARNING: no corresponding definition for tailoring alias. From {0} to {1}", t.LCID, t.Alias));
+                                       continue;
+                               }
+                               int idx = (int) indexes [target];
+                               int cnt = (int) counts [target];
+                               bool french = t.FrenchSort;
+                               if (t.Alias != 0)
+                                       foreach (Tailoring t2 in tailorings)
+                                               if (t2.LCID == t.LCID)
+                                                       french = t2.FrenchSort;
+                               CSResult.WriteLine ("new TailoringInfo ({0}, 0x{1:X}, {2}, {3}), ", t.LCID, idx, cnt, french ? "true" : "false");
+                               CResult.WriteLine ("{0},{1},{2},{3},", t.LCID, idx, cnt, french ? 1 : 0);
+#if Binary
+                               binary.Write (t.LCID);
+                               binary.Write (idx);
+                               binary.Write (cnt);
+                               binary.Write (french);
+#endif
+                       }
+                       CResult.WriteLine ("0};");
+                       CSResult.WriteLine ("};");
+#if Binary
+                       binary.Write ((byte) 0xFF);
+                       binary.Write ((byte) 0xFF);
+                       binary.Write (rawdata.Length / 2);
+                       binary.Write (rawdata, 0, rawdata.Length);
+
+
+                       using (FileStream fs = File.Create ("../resources/collation.tailoring.bin")) {
+                               byte [] array = ms.ToArray ();
+                               fs.Write (array, 0, array.Length);
+                       }
+#endif
                }
 
                #region Parse
@@ -389,11 +750,111 @@ namespace Mono.Globalization.Unicode
                        string koXML = dirname + "/common/collation/ko.xml";
 
                        ParseDerivedAge (derivedAge);
+
+                       FillIgnorables ();
+
                        ParseJISOrder (cp932); // in prior to ParseUnidata()
                        ParseUnidata (unidata);
+                       ModifyUnidata ();
                        ParseDerivedCoreProperties (derivedCoreProps);
                        ParseScripts (scripts);
                        ParseCJK (chXML, jaXML, koXML);
+
+                       ParseTailorings ("mono-tailoring-source.txt");
+               }
+
+               void ParseTailorings (string filename)
+               {
+                       Tailoring t = null;
+                       int line = 0;
+                       using (StreamReader sr = new StreamReader (filename)) {
+                               try {
+                                       while (sr.Peek () >= 0) {
+                                               line++;
+                                               ProcessTailoringLine (ref t,
+                                                       sr.ReadLine ().Trim ());
+                                       }
+                               } catch (Exception) {
+                                       Console.Error.WriteLine ("ERROR at line {0}", line);
+                                       throw;
+                               }
+                       }
+               }
+
+               // For now this is enough.
+               string ParseTailoringSourceValue (string s)
+               {
+                       StringBuilder sb = new StringBuilder ();
+                       for (int i = 0; i < s.Length; i++) {
+                               if (i + 5 < s.Length &&
+                                       s [i] == '\\' && s [i + 1] == 'u') {
+                                       sb.Append (
+                                               (char) int.Parse (
+                                                       s.Substring (i + 2, 4),
+                                                       NumberStyles.HexNumber),
+                                               1);
+                                       i += 5;
+                               }
+                               else
+                                       sb.Append (s [i]);
+                       }
+                       return sb.ToString ();
+               }
+
+               void ProcessTailoringLine (ref Tailoring t, string s)
+               {
+                       int idx = s.IndexOf ('#');
+                       if (idx > 0)
+                               s = s.Substring (0, idx).Trim ();
+                       if (s.Length == 0 || s [0] == '#')
+                               return;
+                       if (s [0] == '@') {
+                               idx = s.IndexOf ('=');
+                               if (idx > 0)
+                                       t = new Tailoring (
+                                               int.Parse (s.Substring (1, idx - 1)),
+                                               int.Parse (s.Substring (idx + 1)));
+                               else
+                                       t = new Tailoring (int.Parse (s.Substring (1)));
+                               tailorings.Add (t);
+                               return;
+                       }
+                       if (s.StartsWith ("*FrenchSort")) {
+                               t.FrenchSort = true;
+                               return;
+                       }
+                       string d = "*Diacritical";
+                       if (s.StartsWith (d)) {
+                               idx = s.IndexOf ("->");
+                               t.AddDiacriticalMap (
+                                       byte.Parse (s.Substring (d.Length, idx - d.Length).Trim (),
+                                               NumberStyles.HexNumber),
+                                       byte.Parse (s.Substring (idx + 2).Trim (),
+                                               NumberStyles.HexNumber));
+                               return;
+                       }
+                       idx = s.IndexOf (':');
+                       if (idx > 0) {
+                               string source = s.Substring (0, idx).Trim ();
+                               string [] l = s.Substring (idx + 1).Trim ().Split (' ');
+                               byte [] b = new byte [4];
+                               for (int i = 0; i < 4; i++) {
+                                       if (l [i] == "*")
+                                               b [i] = 0;
+                                       else
+                                               b [i] = byte.Parse (l [i],
+                                                       NumberStyles.HexNumber);
+                               }
+                               t.AddSortKeyMap (ParseTailoringSourceValue (source),
+                                       b);
+                       }
+                       idx = s.IndexOf ('=');
+                       if (idx > 0)
+                               t.AddReplacementMap (
+                                       ParseTailoringSourceValue (
+                                               s.Substring (0, idx).Trim ()),
+                                       ParseTailoringSourceValue (
+                                               s.Substring (idx + 1).Trim ()));
                }
 
                void ParseDerivedAge (string filename)
@@ -421,10 +882,12 @@ namespace Mono.Globalization.Unicode
                                        if (cp > char.MaxValue)
                                                continue;
 
+                                       double v = double.Parse (value);
                                        for (int i = cp; i <= cpEnd; i++)
-                                               unicodeAge [i] = double.Parse (value);
+                                               unicodeAge [i] = v;
                                }
                        }
+                       unicodeAge [0] = double.MaxValue; // never be supported
                }
 
                void ParseUnidata (string filename)
@@ -444,7 +907,10 @@ namespace Mono.Globalization.Unicode
                        this.decompValues = (int [])
                                decompValues.ToArray (typeof (int));
                }
-               
+
+               char previousLatinTarget = char.MinValue;
+               byte [] diacriticalOffset = new byte ['Z' - 'A' + 1];
+
                void ProcessUnidataLine (string s, ArrayList decompValues)
                {
                        int idx = s.IndexOf ('#');
@@ -462,37 +928,112 @@ namespace Mono.Globalization.Unicode
                        if (IsIgnorable (cp))
                                return;
 
+                       string name = values [0];
+
+                       // SPECIAL CASE: rename some characters for diacritical
+                       // remapping. FIXME: why are they different?
+                       // FIXME: it's still not working.
+                       if (cp == 0x018B || cp == 0x018C)
+                               name = name.Replace ("TOPBAR", "STROKE");
+
                        // isSmallCapital
                        if (s.IndexOf ("SMALL CAPITAL") > 0)
                                isSmallCapital [cp] = true;
 
                        // latin mapping by character name
-                       if (s.IndexOf ("LATIN") > 0) {
+                       if (s.IndexOf ("LATIN") >= 0) {
                                int lidx = s.IndexOf ("LETTER DOTLESS ");
                                int offset = lidx + 15;
                                if (lidx < 0) {
                                        lidx = s.IndexOf ("LETTER TURNED ");
                                        offset = lidx + 14;
                                }
+                               if (lidx < 0) {
+                                       lidx = s.IndexOf ("LETTER CAPITAL ");
+                                       offset = lidx + 15;
+                               }
+                               if (lidx < 0) {
+                                       lidx = s.IndexOf ("LETTER SCRIPT ");
+                                       offset = lidx + 14;
+                               }
                                if (lidx < 0) {
                                        lidx = s.IndexOf ("LETTER ");
                                        offset = lidx + 7;
                                }
                                char c = lidx > 0 ? s [offset] : char.MinValue;
+                               char n = s [offset + 1];
+                               char target = char.MinValue;
                                if ('A' <= c && c <= 'Z' &&
-                                       (s.Length == offset + 1 || s [offset + 1] == ' ')) {
-                                       ArrayList entry = (ArrayList) latinMap [c];
+                                       (n == ' ') || n == ';') {
+                                       target = c;
+                                       // FIXME: After 'Z', I cannot reset this state.
+                                       previousLatinTarget = c == 'Z' ? char.MinValue : c;
+                               }
+
+                               if (s.Substring (offset).StartsWith ("ALPHA"))
+                                       target = 'A';
+                               else if (s.Substring (offset).StartsWith ("TONE SIX"))
+                                       target = 'B';
+                               else if (s.Substring (offset).StartsWith ("OPEN O"))
+                                       target = 'C';
+                               else if (s.Substring (offset).StartsWith ("ETH"))
+                                       target = 'D';
+                               else if (s.Substring (offset).StartsWith ("SCHWA"))
+                                       target = 'E';
+                               else if (s.Substring (offset).StartsWith ("OI;")) // 01A2,01A3
+                                       target = 'O';
+                               else if (s.Substring (offset).StartsWith ("YR;")) // 01A2,01A3
+                                       target = 'R';
+                               else if (s.Substring (offset).StartsWith ("TONE TWO"))
+                                       target = 'S';
+                               else if (s.Substring (offset).StartsWith ("ESH"))
+                                       target = 'S';
+                               else if (s.Substring (offset).StartsWith ("OUNCE"))
+                                       target = 'Z';
+
+                               // For remaining IPA chars, direct mapping is
+                               // much faster.
+                               switch (cp) {
+                               case 0x0166: case 0x0167:
+                                       // Though they are 'T', they have different weight
+                                       target = char.MinValue; break;
+                               case 0x0299: target = 'B'; break;
+                               case 0x029A: target = 'E'; break;
+                               case 0x029B: target = 'G'; break;
+                               case 0x029C: target = 'H'; break;
+                               case 0x029D: target = 'J'; break;
+                               case 0x029E: target = 'K'; break;
+                               case 0x029F: target = 'L'; break;
+                               case 0x02A0: target = 'Q'; break;
+                               case 0x02A7: target = 'T'; break;
+                               case 0x02A8: target = 'T'; break;
+                               }
+
+                               if (target == char.MinValue)
+                                       target = previousLatinTarget;
+
+                               if (target != char.MinValue) {
+                                       ArrayList entry = (ArrayList) latinMap [target];
                                        if (entry == null) {
                                                entry = new ArrayList ();
-                                               latinMap [c] = entry;
+                                               latinMap [target] = entry;
                                        }
                                        entry.Add (cp);
+                                       // FIXME: This secondary weight is hack.
+                                       // They are here because they must not
+                                       // be identical to the corresponding
+                                       // ASCII latins.
+                                       if (c != target && diacritical [cp] == 0) {
+                                               diacriticalOffset [c - 'A']++;
+                                               diacritical [cp] = (byte) (diacriticalOffset [c - 'A'] + 0x7C);
+                                       }
                                }
                        }
 
                        // Arrow names
                        if (0x2000 <= cp && cp < 0x3000) {
                                int value = 0;
+                               // SPECIAL CASES. FIXME: why?
                                switch (cp) {
                                case 0x21C5: value = -1; break; // E2
                                case 0x261D: value = 1; break;
@@ -516,12 +1057,23 @@ namespace Mono.Globalization.Unicode
                                        "SOUTH WEST",
                                        "LEFTWARDS",
                                        "NORTH WEST",
+                                       "LEFT RIGHT",
+                                       "UP DOWN",
                                        };
+                               if (s.IndexOf ("RIGHTWARDS") >= 0 &&
+                                       s.IndexOf ("LEFTWARDS") >= 0)
+                                       value = 0xE1 - 0xD8;
+                               else if (s.IndexOf ("UPWARDS") >= 0 &&
+                                       s.IndexOf ("DOWNWARDS") >= 0)
+                                       value = 0xE2 - 0xD8;
+                               else if (s.IndexOf ("ARROW") >= 0 &&
+                                       s.IndexOf ("COMBINING") < 0 &&
+                                       s.IndexOf ("CLOCKWISE") >= 0)
+                                       value = s.IndexOf ("ANTICLOCKWISE") >= 0 ? 0xE4 - 0xD8 : 0xE3 - 0xD8;
                                if (value == 0)
                                        for (int i = 1; value == 0 && i < arrowTargets.Length; i++)
                                                if (s.IndexOf (arrowTargets [i]) > 0 &&
                                                        s.IndexOf ("BARB " + arrowTargets [i]) < 0 &&
-//                                                     s.IndexOf (" WITH TIP " + arrowTargets [i]) < 0 &&
                                                        s.IndexOf (" OVER") < 0
                                                )
                                                        value = i;
@@ -530,25 +1082,176 @@ namespace Mono.Globalization.Unicode
                                                cp, value));
                        }
 
+                       // Box names
+                       if (0x2500 <= cp && cp < 0x2600) {
+                               int value = int.MinValue;
+                               // flags:
+                               // up:1 down:2 right:4 left:8 vert:16 horiz:32
+                               // [h,rl] [r] [l]
+                               // [v,ud] [u] [d]
+                               // [dr] [dl] [ur] [ul]
+                               // [vr,udr] [vl,vdl]
+                               // [hd,rld] [hu,rlu]
+                               // [hv,udrl,rlv,udh]
+                               ArrayList flags = new ArrayList (new int [] {
+                                       32, 8 + 4, 8, 4,
+                                       16, 1 + 2, 1, 2,
+                                       4 + 2, 8 + 2, 4 + 1, 8 + 1,
+                                       16 + 4, 1 + 2 + 4, 16 + 8, 1 + 2 + 8,
+                                       32 + 2, 4 + 8 + 2, 32 + 1, 4 + 8 + 1,
+                                       16 + 32, 1 + 2 + 4 + 8, 4 + 8 + 16, 1 + 2 + 32
+                                       });
+                               byte [] offsets = new byte [] {
+                                       0, 0, 1, 2,
+                                       3, 3, 4, 5,
+                                       6, 7, 8, 9,
+                                       10, 10, 11, 11,
+                                       12, 12, 13, 13,
+                                       14, 14, 14, 14};
+                               if (s.IndexOf ("BOX DRAWINGS ") >= 0) {
+                                       int flag = 0;
+                                       if (s.IndexOf (" UP") >= 0)
+                                               flag |= 1;
+                                       if (s.IndexOf (" DOWN") >= 0)
+                                               flag |= 2;
+                                       if (s.IndexOf (" RIGHT") >= 0)
+                                               flag |= 4;
+                                       if (s.IndexOf (" LEFT") >= 0)
+                                               flag |= 8;
+                                       if (s.IndexOf (" VERTICAL") >= 0)
+                                               flag |= 16;
+                                       if (s.IndexOf (" HORIZONTAL") >= 0)
+                                               flag |= 32;
+
+                                       int fidx = flags.IndexOf (flag);
+                                       if (fidx >= 0)
+                                               value = offsets [fidx];
+                               } else if (s.IndexOf ("BLOCK") >= 0) {
+                                       if (s.IndexOf ("ONE EIGHTH") >= 0)
+                                               value = 0x12;
+                                       else if (s.IndexOf ("ONE QUARTER") >= 0)
+                                               value = 0x13;
+                                       else if (s.IndexOf ("THREE EIGHTHS") >= 0)
+                                               value = 0x14;
+                                       else if (s.IndexOf ("HALF") >= 0)
+                                               value = 0x15;
+                                       else if (s.IndexOf ("FIVE EIGHTHS") >= 0)
+                                               value = 0x16;
+                                       else if (s.IndexOf ("THREE QUARTERS") >= 0)
+                                               value = 0x17;
+                                       else if (s.IndexOf ("SEVEN EIGHTHS") >= 0)
+                                               value = 0x18;
+                                       else
+                                               value = 0x19;
+                               }
+                               else if (s.IndexOf ("SHADE") >= 0)
+                                       value = 0x19;
+                               else if (s.IndexOf ("SQUARE") >= 0)
+                                       value = 0xBC - 0xE5;
+                               else if (s.IndexOf ("VERTICAL RECTANGLE") >= 0)
+                                       value = 0xBE - 0xE5;
+                               else if (s.IndexOf ("RECTANGLE") >= 0)
+                                       value = 0xBD - 0xE5;
+                               else if (s.IndexOf ("PARALLELOGRAM") >= 0)
+                                       value = 0xBF - 0xE5;
+                               else if (s.IndexOf ("TRIANGLE") >= 0) {
+                                       if (s.IndexOf ("UP-POINTING") >= 0)
+                                               value = 0xC0 - 0xE5;
+                                       else if (s.IndexOf ("RIGHT-POINTING") >= 0)
+                                               value = 0xC1 - 0xE5;
+                                       else if (s.IndexOf ("DOWN-POINTING") >= 0)
+                                               value = 0xC2 - 0xE5;
+                                       else if (s.IndexOf ("LEFT-POINTING") >= 0)
+                                               value = 0xC3 - 0xE5;
+                               }
+                               else if (s.IndexOf ("POINTER") >= 0) {
+                                       if (s.IndexOf ("RIGHT-POINTING") >= 0)
+                                               value = 0xC4 - 0xE5;
+                                       else if (s.IndexOf ("LEFT-POINTING") >= 0)
+                                               value = 0xC5 - 0xE5;
+                               }
+                               else if (s.IndexOf ("DIAMOND") >= 0)
+                                       value = 0xC6 - 0xE5;
+                               else if (s.IndexOf ("FISHEYE") >= 0)
+                                       value = 0xC7 - 0xE5;
+                               else if (s.IndexOf ("LOZENGE") >= 0)
+                                       value = 0xC8 - 0xE5;
+                               else if (s.IndexOf ("BULLSEYE") >= 0)
+                                       value = 0xC9 - 0xE5;
+                               else if (s.IndexOf ("CIRCLE") >= 0) {
+                                       if (cp == 0x25D6) // it could be IndexOf ("LEFT HALF BLACK CIRCLE")
+                                               value = 0xCA - 0xE5;
+                                       else if (cp == 0x25D7) // it could be IndexOf ("RIGHT HALF BLACK CIRCLE")
+                                               value = 0xCB - 0xE5;
+                                       else
+                                               value = 0xC9 - 0xE5;
+                               }
+                               else if (s.IndexOf ("BULLET") >= 0)
+                                       value = 0xCC - 0xE5;
+                               if (0x25DA <= cp && cp <= 0x25E5)
+                                       value = 0xCD + cp - 0x25DA - 0xE5;
+
+                               // SPECIAL CASE: BOX DRAWING DIAGONAL patterns
+                               switch (cp) {
+                               case 0x2571: value = 0xF; break;
+                               case 0x2572: value = 0x10; break;
+                               case 0x2573: value = 0x11; break;
+                               }
+                               if (value != int.MinValue)
+                                       boxValues.Add (new DictionaryEntry (
+                                               cp, value));
+                       }
 
                        // For some characters store the name and sort later
                        // to determine sorting.
                        if (0x2100 <= cp && cp <= 0x213F &&
                                Char.IsSymbol ((char) cp))
                                sortableCharNames.Add (
-                                       new DictionaryEntry (cp, values [0]));
+                                       new DictionaryEntry (cp, name));
                        else if (0x3380 <= cp && cp <= 0x33DD)
                                sortableCharNames.Add (new DictionaryEntry (
-                                       cp, values [0].Substring (7)));
+                                       cp, name.Substring (7)));
+
+                       if (Char.GetUnicodeCategory ((char) cp) ==
+                               UnicodeCategory.MathSymbol) {
+                               if (name.StartsWith ("CIRCLED "))
+                                       diacritical [cp] = 0xEE;
+                               if (name.StartsWith ("SQUARED "))
+                                       diacritical [cp] = 0xEF;
+                       }
 
                        // diacritical weights by character name
-                       for (int d = 0; d < diacritics.Length; d++)
-                               if (s.IndexOf (diacritics [d]) > 0)
-                                       diacritical [cp] |= diacriticWeights [d];
+if (diacritics.Length != diacriticWeights.Length)
+throw new Exception (String.Format ("Should not happen. weights are {0} while labels are {1}", diacriticWeights.Length, diacritics.Length));
+                       for (int d = diacritics.Length - 1; d >= 0; d--) {
+                               if (s.IndexOf (diacritics [d]) > 0) {
+                                       diacritical [cp] += diacriticWeights [d];
+                                       if (s.IndexOf ("COMBINING") >= 0)
+                                               diacritical [cp] -= (byte) 2;
+                                       break;
+                               }
+                               // also process "COMBINING blah" here
+                               // For now it is limited to cp < 0x0370
+//                             if (cp < 0x0300 || cp >= 0x0370)
+//                                     continue;
+                               string tmp = diacritics [d].TrimEnd (';');
+                               if (tmp.IndexOf ("WITH ") == 0)
+                                       tmp = tmp.Substring (4);
+                               tmp = String.Concat ("COMBINING", (tmp [0] != ' ' ? " " : ""), tmp);
+                               if (name == tmp) {
+                                       diacritical [cp] = (byte) (diacriticWeights [d] - 2);
+                                       break;
+                               }
+//if (name == tmp)
+//Console.Error.WriteLine ("======= {2:X04} : '{0}' / '{1}'", name, tmp, cp);
+                       }
                        // Two-step grep required for it.
                        if (s.IndexOf ("FULL STOP") > 0 &&
                                (s.IndexOf ("DIGIT") > 0 || s.IndexOf ("NUMBER") > 0))
                                diacritical [cp] |= 0xF4;
+                       if (s.StartsWith ("SCRIPT") || s.IndexOf (" SCRIPT ") > 0)
+                               diacritical [cp] = (byte) (s.IndexOf ("SMALL") > 0 ? 3 :
+                                       s.IndexOf ("CAPITAL") > 0 ? 5 : 4);
 
                        // Arabic letter name
                        if (0x0621 <= cp && cp <= 0x064A &&
@@ -574,8 +1277,8 @@ namespace Mono.Globalization.Unicode
                                                (cp == 0x0640) ?
                                                // 0x0640 is special: it does
                                                // not start with ARABIC LETTER
-                                               values [0] :
-                                               values [0].Substring (14);
+                                               name :
+                                               name.Substring (14);
                                        int tmpIdx = letterName.IndexOf (' ');
                                        letterName = tmpIdx < 0 ? letterName : letterName.Substring (0, tmpIdx);
 //Console.Error.WriteLine ("Arabic name for {0:X04} is {1}", cp, letterName);
@@ -591,7 +1294,7 @@ namespace Mono.Globalization.Unicode
                        // Japanese square letter
                        if (0x3300 <= cp && cp <= 0x3357)
                                if (!ExistsJIS (cp))
-                                       nonJisJapanese.Add (new NonJISCharacter (cp, values [0]));
+                                       nonJisJapanese.Add (new NonJISCharacter (cp, name));
 
                        // normalizationType
                        string decomp = values [4];
@@ -765,7 +1468,6 @@ namespace Mono.Globalization.Unicode
 
                void ParseScripts (string filename)
                {
-                       ArrayList cyrillic = new ArrayList ();
                        ArrayList gurmukhi = new ArrayList ();
                        ArrayList gujarati = new ArrayList ();
                        ArrayList georgian = new ArrayList ();
@@ -795,11 +1497,6 @@ namespace Mono.Globalization.Unicode
                                                continue;
 
                                        switch (value) {
-                                       case "Cyrillic":
-                                               for (int x = cp; x <= cpEnd; x++)
-                                                       if (!IsIgnorable (x))
-                                                               cyrillic.Add ((char) x);
-                                               break;
                                        case "Gurmukhi":
                                                for (int x = cp; x <= cpEnd; x++)
                                                        if (!IsIgnorable (x))
@@ -823,12 +1520,10 @@ namespace Mono.Globalization.Unicode
                                        }
                                }
                        }
-                       cyrillic.Sort (UCAComparer.Instance);
                        gurmukhi.Sort (UCAComparer.Instance);
                        gujarati.Sort (UCAComparer.Instance);
                        georgian.Sort (UCAComparer.Instance);
                        thaana.Sort (UCAComparer.Instance);
-                       orderedCyrillic = (char []) cyrillic.ToArray (typeof (char));
                        orderedGurmukhi = (char []) gurmukhi.ToArray (typeof (char));
                        orderedGujarati = (char []) gujarati.ToArray (typeof (char));
                        orderedGeorgian = (char []) georgian.ToArray (typeof (char));
@@ -837,26 +1532,37 @@ namespace Mono.Globalization.Unicode
 
                void ParseJISOrder (string filename)
                {
-                       using (StreamReader file =
-                               new StreamReader (filename)) {
-                               while (file.Peek () >= 0) {
-                                       string s = file.ReadLine ();
-                                       int idx = s.IndexOf ('#');
-                                       if (idx >= 0)
-                                               s = s.Substring (0, idx).Trim ();
-                                       if (s.Length == 0)
-                                               continue;
-                                       idx = s.IndexOf (' ');
-                                       if (idx < 0)
-                                               continue;
-                                       // They start with "0x" so cut them out.
-                                       int jis = int.Parse (s.Substring (2, idx), NumberStyles.HexNumber);
-                                       int cp = int.Parse (s.Substring (idx + 3).Trim (), NumberStyles.HexNumber);
-                                       jisJapanese.Add (new JISCharacter (cp, jis));
+                       int line = 1;
+                       try {
+                               using (StreamReader file =
+                                       new StreamReader (filename)) {
+                                       for (;file.Peek () >= 0; line++)
+                                               ProcessJISOrderLine (file.ReadLine ());
                                }
+                       } catch (Exception) {
+                               Console.Error.WriteLine ("---- line {0}", line);
+                               throw;
                        }
                }
 
+               char [] ws = new char [] {'\t', ' '};
+
+               void ProcessJISOrderLine (string s)
+               {
+                       int idx = s.IndexOf ('#');
+                       if (idx >= 0)
+                               s = s.Substring (0, idx).Trim ();
+                       if (s.Length == 0)
+                               return;
+                       idx = s.IndexOfAny (ws);
+                       if (idx < 0)
+                               return;
+                       // They start with "0x" so cut them out.
+                       int jis = int.Parse (s.Substring (2, idx - 2), NumberStyles.HexNumber);
+                       int cp = int.Parse (s.Substring (idx).Trim ().Substring (2), NumberStyles.HexNumber);
+                       jisJapanese.Add (new JISCharacter (cp, jis));
+               }
+
                void ParseCJK (string zhXML, string jaXML, string koXML)
                {
                        XmlDocument doc = new XmlDocument ();
@@ -870,7 +1576,7 @@ namespace Mono.Globalization.Unicode
                        // Chinese Simplified
                        category = "chs";
                        arr = cjkCHS;
-                       offset = char.MaxValue - arr.Length;
+                       offset = 0;//char.MaxValue - arr.Length;
                        doc.Load (zhXML);
                        s = doc.SelectSingleNode ("/ldml/collations/collation[@type='pinyin']/rules/pc").InnerText;
                        v = 0x8008;
@@ -887,7 +1593,7 @@ namespace Mono.Globalization.Unicode
                        // Chinese Traditional
                        category = "cht";
                        arr = cjkCHT;
-                       offset = char.MaxValue - arr.Length;
+                       offset = 0;//char.MaxValue - arr.Length;
                        s = doc.SelectSingleNode ("/ldml/collations/collation[@type='stroke']/rules/pc").InnerText;
                        v = 0x8002;
                        foreach (char c in s) {
@@ -903,17 +1609,56 @@ namespace Mono.Globalization.Unicode
                        // Japanese
                        category = "ja";
                        arr = cjkJA;
-                       offset = char.MaxValue - arr.Length;
-                       doc.Load (jaXML);
-                       s = doc.SelectSingleNode ("/ldml/collations/collation/rules/pc").InnerText;
+                       offset = 0;//char.MaxValue - arr.Length;
+
+                       // SPECIAL CASES
+                       arr [0x4EDD] = 0x8002; // Chinese repetition mark?
+                       arr [0x337B] = 0x8004; // Those 4 characters are Gengou
+                       arr [0x337E] = 0x8005;
+                       arr [0x337D] = 0x8006;
+                       arr [0x337C] = 0x8007;
+
                        v = 0x8008;
-                       foreach (char c in s) {
+                       foreach (JISCharacter jc in jisJapanese) {
+                               if (jc.JIS < 0x8800)
+                                       continue;
+                               char c = (char) jc.CP;
+
                                if (c < '\u4E00')
-                                       Console.Error.WriteLine ("---- warning: for {0} {1:X04} is omitted which should be {2:X04}", category, (int) c, v);
+                                       // Console.Error.WriteLine ("---- warning: for {0} {1:X04} is omitted which should be {2:X04}", category, (int) c, v);
+                                       continue;
                                else {
                                        arr [(int) c - offset] = (ushort) v++;
                                        if (v % 256 == 0)
                                                v += 2;
+
+                                       // SPECIAL CASES:
+                                       if (c == '\u662D') // U+337C
+                                               continue;
+                                       if (c == '\u5927') // U+337D
+                                               continue;
+                                       if (c == '\u5E73') // U+337B
+                                               continue;
+                                       if (c == '\u660E') // U+337E
+                                               continue;
+                                       if (c == '\u9686') // U+F9DC
+                                               continue;
+
+                                       // FIXME: there are still remaining
+                                       // characters after U+FA0C.
+//                                     for (int k = 0; k < char.MaxValue; k++) {
+                                       for (int k = 0; k < '\uFA0D'; k++) {
+                                               if (decompIndex [k] == 0 || IsIgnorable (k))
+                                                       continue;
+                                               if (decompValues [decompIndex [k]] == c /*&&
+                                                       decompLength [k] == 1*/ ||
+                                                       decompLength [k] == 3 &&
+                                                       decompValues [decompIndex [k] + 1] == c) {
+                                                       arr [k - offset] = (ushort) v++;
+                                                       if (v % 256 == 0)
+                                                               v += 2;
+                                               }
+                                       }
                                }
                        }
 
@@ -929,7 +1674,7 @@ namespace Mono.Globalization.Unicode
                        //
                        category = "ko";
                        arr = cjkKO;
-                       offset = char.MaxValue - arr.Length;
+                       offset = 0;//char.MaxValue - arr.Length;
                        doc.Load (koXML);
                        foreach (XmlElement reset in doc.SelectNodes ("/ldml/collations/collation/rules/reset")) {
                                XmlElement sc = (XmlElement) reset.NextSibling;
@@ -969,8 +1714,124 @@ namespace Mono.Globalization.Unicode
                        }
                }
 
+               void ModifyUnidata ()
+               {
+                       ArrayList decompValues = new ArrayList (this.decompValues);
+
+                       // Hebrew uppercase letters.
+                       foreach (int i in new int []
+                               {0x05DB, 0x05DE, 0x05E0, 0x05E4, 0x05E6})
+                               isUppercase [i] = true;
+
+
+                       // Modify some decomposition equivalence
+                       for (int i = 0xFE31; i <= 0xFE34; i++) {
+                               decompType [i] = 0;
+                               decompIndex [i] = 0;
+                               decompLength [i] = 0;
+                       }
+                       decompType [0x037E] = 0;
+                       decompIndex [0x037E] = 0;
+                       decompLength [0x037E] = 0;
+
+                       // Hangzhou numbers
+                       for (int i = 0x3021; i <= 0x3029; i++)
+                               diacritical [i] = 0x4E;
+                       // Korean parens numbers
+                       for (int i = 0x3200; i <= 0x321C; i++)
+                               diacritical [i] = 0xA;
+                       for (int i = 0x3260; i <= 0x327B; i++)
+                               diacritical [i] = 0xC;
+
+                       // LAMESPEC: these remapping should not be done.
+                       // Windows have incorrect CJK compat mappings.
+                       decompValues [decompIndex [0x32A9]] = 0x91AB;
+                       decompLength [0x323B] = 1;
+                       decompValues [decompIndex [0x323B]] = 0x5B78;
+                       decompValues [decompIndex [0x32AB]] = 0x5B78;
+                       decompValues [decompIndex [0x32A2]] = 0x5BEB;
+                       decompLength [0x3238] = 1;
+                       decompValues [decompIndex [0x3238]] = 0x52DE;
+                       decompValues [decompIndex [0x3298]] = 0x52DE;
+
+                       // LAMESPEC: custom remapping (which is not bugs but not fine, non-standard compliant things)
+                       decompIndex [0xFA0C] = decompValues.Count;
+                       decompValues.Add ((int) 0x5140);
+                       decompLength [0xFA0C] = 1;
+                       decompIndex [0xF929] = decompLength [0xF929] = 0;
+
+                       decompValues [decompIndex [0xF92C]] = 0x90DE;
+
+                       decompIndex [0x2125] = decompValues.Count;
+                       decompValues.Add ((int) 0x005A);
+                       decompLength [0x2125] = 1;
+                       decompType [0x2125] = DecompositionFont;
+
+                       this.decompValues = decompValues.ToArray (typeof (int)) as int [];
+               }
+
                void ModifyParsedValues ()
                {
+                       // Sometimes STROKE don't work fine
+                       diacritical [0xD8] = diacritical [0xF8] = 0x21;
+                       diacritical [0x141] = diacritical [0x142] = 0x1F;
+                       // FIXME: why?
+                       diacritical [0xAA] = diacritical [0xBA] = 3;
+                       diacritical [0xD0] = diacritical [0xF0] = 0x68;
+                       diacritical [0x131] = 3;
+                       diacritical [0x138] = 3;
+                       // TOPBAR does not work as an identifier for the weight
+                       diacritical [0x182] = diacritical [0x183] = 0x68; // B
+                       diacritical [0x18B] = diacritical [0x18C] = 0x1E; // D
+                       // TONE TWO
+                       diacritical [0x1A7] = diacritical [0x1A8] = 0x87;
+                       // TONE SIX
+                       diacritical [0x184] = diacritical [0x185] = 0x87;
+                       // OPEN E
+                       diacritical [0x190] = diacritical [0x25B] = 0x7B;
+                       // There are many letters w/ diacritical weight 0x7B
+                       diacritical [0x0192] = diacritical [0x0194] =
+                       diacritical [0x0195] = diacritical [0x0196] =
+                       diacritical [0x019C] = diacritical [0x019E] =
+                       diacritical [0x01A6] = diacritical [0x01B1] =
+                       diacritical [0x01B2] = diacritical [0x01BF] = 0x7B;
+                       // ... as well as 0x7C
+                       diacritical [0x01A2] = diacritical [0x01A3] = 0x7C;
+
+                       // <font> NFKD characters seem to have diacritical
+                       // weight as 3,4,5... but the order does not look
+                       // by codepoint and I have no idea how they are sorted.
+                       diacritical [0x210E] = 3;
+                       diacritical [0x210F] = 0x68;
+                       diacritical [0x2110] = 4;
+                       diacritical [0x2111] = 5;
+                       diacritical [0x2112] = 4;
+                       diacritical [0x2113] = 4;
+                       diacritical [0x211B] = 4;
+                       diacritical [0x211C] = 5;
+
+                       // some cyrillic diacritical weight. They seem to be
+                       // based on old character names, so it's quicker to
+                       // set them directly here.
+                       // FIXME: they are by mostly unknown reason
+                       diacritical [0x0496] = diacritical [0x0497] = 7;
+                       diacritical [0x0498] = diacritical [0x0499] = 0x1A;
+                       diacritical [0x049A] = diacritical [0x049B] = 0x17;
+                       diacritical [0x049C] = diacritical [0x049D] = 9;
+                       diacritical [0x049E] = diacritical [0x049F] = 4;
+                       diacritical [0x04A0] = diacritical [0x04A1] = 0xA;
+                       diacritical [0x04A2] = diacritical [0x04A3] = 7;
+                       diacritical [0x04A4] = diacritical [0x04A5] = 8;
+                       diacritical [0x04AA] = diacritical [0x04AB] = 0x1A; // ES CEDILLA?
+                       diacritical [0x04AC] = diacritical [0x04AD] = 7; // RIGHT DESCENDER? but U+4B2
+                       diacritical [0x04AE] = diacritical [0x04AF] = 0xB; // STRAIGHT U?
+                       diacritical [0x04B2] = diacritical [0x04B3] = 0x17; // RIGHT DESCENDER? but U+4AC
+                       diacritical [0x04B4] = diacritical [0x04B5] = 3;
+                       diacritical [0x04B6] = 8;
+                       diacritical [0x04B7] = 7;
+                       diacritical [0x04B8] = diacritical [0x04B9] = 9;
+                       diacritical [0x04BA] = diacritical [0x04BB] = 9;
+
                        // number, secondary weights
                        byte weight = 0x38;
                        int [] numarr = numberSecondaryWeightBounds;
@@ -979,11 +1840,12 @@ namespace Mono.Globalization.Unicode
                                        if (Char.IsNumber ((char) cp))
                                                diacritical [cp] = weight;
 
-                       // Korean parens numbers
-                       for (int i = 0x3200; i <= 0x321C; i++)
-                               diacritical [i] = 0xA;
-                       for (int i = 0x3260; i <= 0x327B; i++)
-                               diacritical [i] = 0xC;
+                       // Gurmukhi special letters' diacritical weight
+                       for (int i = 0x0A50; i < 0x0A60; i++)
+                               diacritical [i] = 4;
+                       // Oriya special letters' diacritical weight
+                       for (int i = 0x0B5C; i < 0x0B60; i++)
+                               diacritical [i] = 6;
 
                        // Update name part of named characters
                        for (int i = 0; i < sortableCharNames.Count; i++) {
@@ -1025,14 +1887,25 @@ namespace Mono.Globalization.Unicode
 
                        #region Specially ignored // 01
                        // This will raise "Defined" flag up.
+                       // FIXME: Check If it is really fine. Actually for
+                       // Japanese voice marks this code does remapping.
                        foreach (char c in specialIgnore)
                                map [(int) c] = new CharMapEntry (0, 0, 0);
                        #endregion
 
+                       #region Extenders (FF FF)
+                       fillIndex [0xFF] = 0xFF;
+                       char [] specialBiggest = new char [] {
+                               '\u3005', '\u3031', '\u3032', '\u309D',
+                               '\u309E', '\u30FC', '\u30FD', '\u30FE',
+                               '\uFE7C', '\uFE7D', '\uFF70'};
+                       foreach (char c in specialBiggest)
+                               AddCharMap (c, 0xFF, 0);
+                       #endregion
 
                        #region Variable weights
                        // Controls : 06 03 - 06 3D
-                       fillIndex [6] = 3;
+                       fillIndex [0x6] = 3;
                        for (int i = 0; i < 65536; i++) {
                                if (IsIgnorable (i))
                                        continue;
@@ -1045,22 +1918,41 @@ namespace Mono.Globalization.Unicode
                        }
 
                        // Apostrophe 06 80
-                       fillIndex [6] = 0x80;
-                       AddCharMapGroup ('\'', 6, 1, 0);
+                       fillIndex [0x6] = 0x80;
+                       AddCharMap ('\'', 6, 0);
+                       AddCharMap ('\uFF07', 6, 1);
                        AddCharMap ('\uFE63', 6, 1);
 
+                       // SPECIAL CASE: fill FE32 here in prior to be added
+                       // at 2013. Windows does not always respect NFKD.
+                       map [0xFE32] = new CharMapEntry (6, 0x90, 0);
+
                        // Hyphen/Dash : 06 81 - 06 90
                        for (int i = 0; i < char.MaxValue; i++) {
-                               if (Char.GetUnicodeCategory ((char) i)
-                                       == UnicodeCategory.DashPunctuation)
-                                       AddCharMapGroupTail ((char) i, 6, 1);
+                               if (!IsIgnorable (i) &&
+                                       Char.GetUnicodeCategory ((char) i) ==
+                                       UnicodeCategory.DashPunctuation) {
+                                       AddCharMapGroup2 ((char) i, 6, 1, 0);
+                                       if (i == 0x2011) {
+                                               // SPECIAL: add 2027 and 2043
+                                               // Maybe they are regarded the 
+                                               // same hyphens in "central"
+                                               // position.
+                                               AddCharMap ('\u2027', 6, 1);
+                                               AddCharMap ('\u2043', 6, 1);
+                                       }
+                               }
                        }
+                       // They are regarded as primarily equivalent to '-'
+                       map [0x208B] = new CharMapEntry (6, 0x82, 0);
+                       map [0x207B] = new CharMapEntry (6, 0x82, 0);
+                       map [0xFF0D] = new CharMapEntry (6, 0x82, 0);
 
                        // Arabic variable weight chars 06 A0 -
                        fillIndex [6] = 0xA0;
                        // vowels
                        for (int i = 0x64B; i <= 0x650; i++)
-                               AddCharMapGroupTail ((char) i, 6, 1);
+                               AddArabicCharMap ((char) i, 6, 1, 0);
                        // sukun
                        AddCharMapGroup ('\u0652', 6, 1, 0);
                        // shadda
@@ -1080,10 +1972,11 @@ namespace Mono.Globalization.Unicode
                        for (int i = 0x0329; i <= 0x0334; i++)
                                if (!IsIgnorable (i))
                                        AddCharMap ((char) i, 0x1, 1);
+                       fillIndex [0x1]++;
                        for (int i = 0x0339; i <= 0x0341; i++)
                                if (!IsIgnorable (i))
                                        AddCharMap ((char) i, 0x1, 1);
-                       fillIndex [0x1] = 0x72;
+                       fillIndex [0x1] = 0x74;
                        for (int i = 0x0346; i <= 0x0348; i++)
                                if (!IsIgnorable (i))
                                        AddCharMap ((char) i, 0x1, 1);
@@ -1096,6 +1989,7 @@ namespace Mono.Globalization.Unicode
                        for (int i = 0x02CE; i <= 0x02CF; i++)
                                if (!IsIgnorable (i))
                                        AddCharMap ((char) i, 0x1, 1);
+                       fillIndex [0x1]++;
                        for (int i = 0x02D1; i <= 0x02D3; i++)
                                if (!IsIgnorable (i))
                                        AddCharMap ((char) i, 0x1, 1);
@@ -1104,12 +1998,87 @@ namespace Mono.Globalization.Unicode
                                if (!IsIgnorable (i))
                                        AddCharMap ((char) i, 0x1, 1);
 
+
+                       // FIXME: needs more love here (it should eliminate
+                       // all the hacky code above).
+                       for (int i = 0x0300; i < 0x0370; i++)
+                               if (!IsIgnorable (i) && diacritical [i] != 0
+                                       && !map [i].Defined)
+                                       map [i] = new CharMapEntry (
+                                               0x1, 0x1, diacritical [i]);
+
+                       // Cyrillic and Armenian nonspacing mark
+                       fillIndex [0x1] = 0x94;
+                       for (int i = 0x400; i < 0x580; i++)
+                               if (!IsIgnorable (i) &&
+                                       Char.GetUnicodeCategory ((char) i) ==
+                                       UnicodeCategory.NonSpacingMark)
+                                       AddCharMap ((char) i, 1, 1);
+
+                       fillIndex [0x1] = 0x8D;
+                       // syriac dotted nonspacing marks (1)
+                       AddCharMap ('\u0740', 0x1, 1);
+                       AddCharMap ('\u0741', 0x1, 1);
+                       AddCharMap ('\u0742', 0x1, 1);
+                       // syriac oblique nonspacing marks
+                       AddCharMap ('\u0747', 0x1, 1);
+                       AddCharMap ('\u0748', 0x1, 1);
+                       // syriac dotted nonspacing marks (2)
+                       fillIndex [0x1] = 0x94; // this reset is mandatory
+                       AddCharMap ('\u0732', 0x1, 1);
+                       AddCharMap ('\u0735', 0x1, 1);
+                       AddCharMap ('\u0738', 0x1, 1);
+                       AddCharMap ('\u0739', 0x1, 1);
+                       AddCharMap ('\u073C', 0x1, 1);
+                       // SPECIAL CASES: superscripts
+                       AddCharMap ('\u073F', 0x1, 1);
+                       AddCharMap ('\u0711', 0x1, 1);
+                       // syriac "DOTS"
+                       for (int i = 0x0743; i <= 0x0746; i++)
+                               AddCharMap ((char) i, 0x1, 1);
+                       for (int i = 0x0730; i <= 0x0780; i++)
+                               if (!map [i].Defined &&
+                                       Char.GetUnicodeCategory ((char) i) ==
+                                       UnicodeCategory.NonSpacingMark)
+                                       AddCharMap ((char) i, 0x1, 1);
+
                        // LAMESPEC: It should not stop at '\u20E1'. There are
                        // a few more characters (that however results in 
                        // overflow of level 2 unless we start before 0xDD).
-                       fillIndex [0x1] = 0xDC;
-                       for (int i = 0x20d0; i <= 0x20e1; i++)
+                       fillIndex [0x1] = 0xDD;
+                       for (int i = 0x20D0; i <= 0x20DC; i++)
+                               AddCharMap ((char) i, 0x1, 1);
+                       fillIndex [0x1] = 0xEC;
+                       for (int i = 0x20DD; i <= 0x20E1; i++)
+                               AddCharMap ((char) i, 0x1, 1);
+                       fillIndex [0x1] = 0x4;
+                       AddCharMap ('\u0CD5', 0x1, 1);
+                       AddCharMap ('\u0CD6', 0x1, 1);
+                       AddCharMap ('\u093C', 0x1, 1);
+                       for (int i = 0x302A; i <= 0x302D; i++)
                                AddCharMap ((char) i, 0x1, 1);
+                       AddCharMap ('\u0C55', 0x1, 1);
+                       AddCharMap ('\u0C56', 0x1, 1);
+
+                       fillIndex [0x1] = 0x50; // I wonder how they are sorted
+                       for (int i = 0x02D4; i <= 0x02D7; i++)
+                               AddCharMap ((char) i, 0x1, 1);
+
+                       // They are not part of Nonspacing marks, but have
+                       // only diacritical weight.
+                       for (int i = 0x3099; i <= 0x309C; i++)
+                               map [i] = new CharMapEntry (1, 1, 1);
+                       map [0xFF9E] = new CharMapEntry (1, 1, 1);
+                       map [0xFF9F] = new CharMapEntry (1, 1, 2);
+                       map [0x309D] = new CharMapEntry (0xFF, 0xFF, 1);
+                       map [0x309E] = new CharMapEntry (0xFF, 0xFF, 1);
+                       for (int i = 0x30FC; i <= 0x30FE; i++)
+                               map [i] = new CharMapEntry (0xFF, 0xFF, 1);
+
+                       fillIndex [0x1] = 0xA;
+                       for (int i = 0x0951; i <= 0x0954; i++)
+                               AddCharMap ((char) i, 0x1, 2);
+
                        #endregion
 
 
@@ -1132,35 +2101,51 @@ namespace Mono.Globalization.Unicode
                        // while they aren't.
                        AddCharMap ('\u2422', 0x7, 1, 0); // blank symbol
                        AddCharMap ('\u2423', 0x7, 1, 0); // open box
+
                        #endregion
 
-                       // FIXME: 09 should be more complete.
+                       // category 09 - continued symbols from 08
                        fillIndex [0x9] = 2;
                        // misc tech mark
                        for (int cp = 0x2300; cp <= 0x237A; cp++)
                                AddCharMap ((char) cp, 0x9, 1, 0);
 
                        // arrows
-                       byte [] arrowLv2 = new byte [] {0, 3, 3, 3, 3, 3, 3, 3, 3};
+                       byte [] arrowLv2 = new byte [] {0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3};
                        foreach (DictionaryEntry de in arrowValues) {
                                int idx = (int) de.Value;
                                int cp = (int) de.Key;
                                if (map [cp].Defined)
                                        continue;
                                fillIndex [0x9] = (byte) (0xD8 + idx);
-//Console.Error.WriteLine ("-------- {0} {1:X04} {2:X02} {3:X02}", idx, cp, fillIndex [0x9], arrowLv2 [idx]);
                                AddCharMapGroup ((char) cp, 0x9, 0, arrowLv2 [idx]);
                                arrowLv2 [idx]++;
-//                             map [cp] = new CharMapEntry (0x9, fillIndex [0x9]++, arrowLv2 [idx]++);
                        }
-
-                       // FIXME: 08 should be more complete.
-                       fillIndex [0x8] = 2;
-                       for (int cp = 0; cp < char.MaxValue; cp++)
-                               if (!map [cp].Defined &&
-                                       Char.GetUnicodeCategory ((char) cp) ==
-                                       UnicodeCategory.MathSymbol)
-                                       AddCharMapGroup ((char) cp, 0x8, 1, 0);
+                       // boxes
+                       byte [] boxLv2 = new byte [128];
+                       // 0-63 will be used for those offsets are positive,
+                       // and 64-127 are for negative ones.
+                       for (int i = 0; i < boxLv2.Length; i++)
+                               boxLv2 [i] = 3;
+                       foreach (DictionaryEntry de in boxValues) {
+                               int cp = (int) de.Key;
+                               int off = (int) de.Value;
+                               if (map [cp].Defined)
+                                       continue;
+                               if (off < 0) {
+                                       fillIndex [0x9] = (byte) (0xE5 + off);
+                                       AddCharMapGroup ((char) cp, 0x9, 0, boxLv2 [128 + off]++);
+                               }
+                               else {
+                                       fillIndex [0x9] = (byte) (0xE5 + off);
+                                       AddCharMapGroup ((char) cp, 0x9, 0, boxLv2 [off]++);
+                               }
+                       }
+                       // Some special characters (slanted)
+                       fillIndex [0x9] = 0xF4;
+                       AddCharMap ('\u2571', 0x9, 3);
+                       AddCharMap ('\u2572', 0x9, 3);
+                       AddCharMap ('\u2573', 0x9, 3);
 
                        // FIXME: implement 0A
                        #region Symbols
@@ -1175,26 +2160,58 @@ namespace Mono.Globalization.Unicode
                        }
                        // byte other symbols
                        for (int cp = 0; cp < 0x100; cp++) {
+                               if (cp == 0xA6)
+                                       continue; // SPECIAL: skip FIXME: why?
                                uc = Char.GetUnicodeCategory ((char) cp);
                                if (!IsIgnorable (cp) &&
-                                       uc == UnicodeCategory.OtherSymbol)
+                                       uc == UnicodeCategory.OtherSymbol ||
+                                       cp == '\u00AC' || cp == '\u00B5' || cp == '\u00B7')
                                        AddCharMapGroup ((char) cp, 0xA, 1, 0);
                        }
+                       // U+30FB here
+                       AddCharMapGroup ('\u30FB', 0xA, 1, 0);
+
+                       for (int cp = 0x2020; cp <= 0x2031; cp++)
+                               if (Char.IsPunctuation ((char) cp))
+                                       AddCharMap ((char) cp, 0xA, 1, 0);
+                       // SPECIAL CASES: why?
+                       AddCharMap ('\u203B', 0xA, 1, 0);
+                       AddCharMap ('\u2040', 0xA, 1, 0);
+                       AddCharMap ('\u2041', 0xA, 1, 0);
+                       AddCharMap ('\u2042', 0xA, 1, 0);
+
+                       for (int cp = 0x20A0; cp <= 0x20AB; cp++)
+                               AddCharMap ((char) cp, 0xA, 1, 0);
+
+                       // 3004 is skipped at first...
+                       for (int cp = 0x3010; cp <= 0x3040; cp++)
+                               if (Char.IsSymbol ((char) cp))
+                                       AddCharMap ((char) cp, 0xA, 1, 0);
+                       // SPECIAL CASES: added here
+                       AddCharMap ('\u3004', 0xA, 1, 0);
+                       AddCharMap ('\u327F', 0xA, 1, 0);
 
-                       fillIndex [0xA] = 0x2F; // FIXME: it won't be needed
                        for (int cp = 0x2600; cp <= 0x2613; cp++)
                                AddCharMap ((char) cp, 0xA, 1, 0);
+                       // Dingbats
                        for (int cp = 0x2620; cp <= 0x2770; cp++)
                                if (Char.IsSymbol ((char) cp))
                                        AddCharMap ((char) cp, 0xA, 1, 0);
-
+                       // OCR
+                       for (int i = 0x2440; i < 0x2460; i++)
+                               AddCharMap ((char) i, 0xA, 1, 0);
+
+                       // SPECIAL CASES: why?
+                       AddCharMap ('\u0E3F', 0xA, 1, 0);
+                       AddCharMap ('\u2117', 0xA, 1, 0);
+                       AddCharMap ('\u20AC', 0xA, 1, 0);
                        #endregion
 
                        #region Numbers // 0C 02 - 0C E1
                        fillIndex [0xC] = 2;
 
                        // 9F8 : Bengali "one less than the denominator"
-                       AddCharMap ('\u09F8', 0xC, 1);
+                       AddCharMap ('\u09F8', 0xC, 1, 0x3C);
 
                        ArrayList numbers = new ArrayList ();
                        for (int i = 0; i < 65536; i++)
@@ -1206,11 +2223,15 @@ namespace Mono.Globalization.Unicode
                        ArrayList numberValues = new ArrayList ();
                        foreach (int i in numbers)
                                numberValues.Add (new DictionaryEntry (i, decimalValue [(char) i]));
+                       // SPECIAL CASE: Cyrillic Thousand sign
+                       numberValues.Add (new DictionaryEntry (0x0482, 1000m));
                        numberValues.Sort (DecimalDictionaryValueComparer.Instance);
 
 //foreach (DictionaryEntry de in numberValues)
 //Console.Error.WriteLine ("****** number {0:X04} : {1} {2}", de.Key, de.Value, decompType [(int) de.Key]);
 
+                       // FIXME: fillIndex adjustment lines are too
+                       // complicated. It must be simpler.
                        decimal prevValue = -1;
                        foreach (DictionaryEntry de in numberValues) {
                                int cp = (int) de.Key;
@@ -1228,14 +2249,25 @@ namespace Mono.Globalization.Unicode
                                                fillIndex [0xC]++;
 
                                        int xcp;
-                                       xcp = (int) prevValue + 0x2170 - 1;
-                                       AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
-                                       xcp = (int) prevValue + 0x2160 - 1;
-                                       AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
-                                       fillIndex [0xC] += 2;
-                                       xcp = (int) prevValue + 0x3021 - 1;
-                                       AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
-                                       fillIndex [0xC]++;
+                                       if (currValue <= 13) {
+                                               if (currValue == 4)
+                                                       fillIndex [0xC]++;
+                                               // SPECIAL CASE
+                                               if (currValue == 11)
+                                                       AddCharMap ('\u0BF0', 0xC, 1);
+                                               xcp = (int) prevValue + 0x2160 - 1;
+                                               AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
+                                               xcp = (int) prevValue + 0x2170 - 1;
+                                               AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
+                                               fillIndex [0xC]++;
+                                       }
+                                       if (currValue < 12)
+                                               fillIndex [0xC]++;
+                                       if (currValue <= 10) {
+                                               xcp = (int) prevValue + 0x3021 - 1;
+                                               AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
+                                               fillIndex [0xC]++;
+                                       }
                                }
                                if (prevValue < currValue)
                                        prevValue = currValue;
@@ -1243,39 +2275,56 @@ namespace Mono.Globalization.Unicode
                                        continue;
                                // HangZhou and Roman are add later 
                                // (code is above)
-                               else if (0x3021 <= cp && cp < 0x302A
-                                       || 0x2160 <= cp && cp < 0x216A
-                                       || 0x2170 <= cp && cp < 0x217A)
+                               if (0x3021 <= cp && cp < 0x302A
+                                       || 0x2160 <= cp && cp < 0x216C
+                                       || 0x2170 <= cp && cp < 0x217C)
                                        continue;
 
-                               if (cp ==  0x215B) // FIXME: why?
+                               if (cp == 0x215B) // FIXME: why?
                                        fillIndex [0xC] += 2;
                                else if (cp == 0x3021) // FIXME: why?
                                        fillIndex [0xC]++;
-                               AddCharMapGroup ((char) cp, 0xC, 0, diacritical [cp]);
-
                                if (addnew || cp <= '9') {
+                                       int mod = (int) currValue - 1;
                                        int xcp;
-                                       if (1 <= currValue && currValue <= 10) {
-                                               xcp = cp - 0x31 + 0x2776;
+                                       if (1 <= currValue && currValue <= 11) {
+                                               xcp = mod + 0x2776;
                                                AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
-                                               xcp = cp - 0x31 + 0x2780;
+                                               xcp = mod + 0x2780;
                                                AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
-                                               xcp = cp - 0x31 + 0x278A;
+                                               xcp = mod + 0x278A;
                                                AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
                                        }
                                        if (1 <= currValue && currValue <= 20) {
-                                               xcp = cp - 0x31 + 0x2460;
+                                               xcp = mod + 0x2460;
                                                AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
-                                               xcp = cp - 0x31 + 0x2474;
+                                               xcp = mod + 0x2474;
                                                AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
-                                               xcp = cp - 0x31 + 0x2488;
+                                               xcp = mod + 0x2488;
                                                AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
                                        }
                                }
+                               if (addnew && currValue >= 10 && currValue < 13 || cp == 0x09F9)
+                                       fillIndex [0xC]++;
+                               AddCharMapGroup ((char) cp, 0xC, 0, diacritical [cp], true);
 
-                               if (cp != 0x09E7 && cp != 0x09EA)
+                               switch (cp) {
+                               // Maybe Bengali digit numbers do not increase
+                               // indexes, but 0x09E6 does.
+                               case 0x09E7: case 0x09E8: case 0x09E9:
+                               case 0x09EA:
+                               // SPECIAL CASES
+                               case 0x0BF0: case 0x2180: case 0x2181:
+                                       break;
+                               // SPECIAL CASE
+                               case 0x0BF1:
                                        fillIndex [0xC]++;
+                                       break;
+                               default:
+                                       if (currValue < 11 || currValue == 1000)
+                                               fillIndex [0xC]++;
+                                       break;
+                               }
 
                                // Add special cases that are not regarded as 
                                // numbers in UnicodeCategory speak.
@@ -1284,7 +2333,7 @@ namespace Mono.Globalization.Unicode
                                        AddCharMapGroup ('\u01BD', 0xC, 0, 0);
                                        AddCharMapGroup ('\u01BC', 0xC, 1, 0);
                                }
-                               else if (cp == '6') // FIXME: why?
+                               else if (cp == '2' || cp == '6') // FIXME: why?
                                        fillIndex [0xC]++;
                        }
 
@@ -1299,7 +2348,6 @@ namespace Mono.Globalization.Unicode
                        for (int i = 0; i < alphabets.Length; i++)
                                AddAlphaMap (alphabets [i], 0xE, alphaWeights [i]);
 
-
                        // non-ASCII Latin alphabets
                        // FIXME: there is no such characters that are placed
                        // *after* "alphabets" array items. This is nothing
@@ -1319,6 +2367,9 @@ namespace Mono.Globalization.Unicode
                                //   but inside a-to-z range.
                                // 3.there are some expanded characters that
                                //   are not part of Unicode Standard NFKD.
+                               // 4. some characters are letter in IsLetter
+                               //   but not in sortkeys (maybe unicode version
+                               //   difference caused it).
                                switch (i) {
                                // 1. skipping them does not make sense
 //                             case 0xD0: case 0xF0: case 0x131: case 0x138:
@@ -1336,61 +2387,188 @@ namespace Mono.Globalization.Unicode
                                case 0xFE: // Icelandic Thorn
                                case 0xDF: // German ss
                                case 0xFF: // German ss
+                               // 4.
+                               case 0x1C0: case 0x1C1: case 0x1C2: case 0x1C3:
                                // not classified yet
 //                             case 0x1A6: case 0x1A7: case 0x1A8: case 0x1A9:
 //                             case 0x1AA: case 0x1B1: case 0x1B7: case 0x1B8:
 //                             case 0x1B9: case 0x1BA: case 0x1BB: case 0x1BF:
-//                             case 0x1C0: case 0x1C1: case 0x1C2: case 0x1C3:
 //                             case 0x1DD:
                                        continue;
                                }
                                AddCharMapGroup ((char) i, 0xE, 1, 0);
                        }
 
-                       // Greek and Coptic
-                       fillIndex [0xF] = 02;
-                       for (int i = 0x0380; i < 0x0390; i++)
+                       // IPA extensions
+                       // FIXME: this results in not equivalent values to
+                       // Windows, but is safer for comparison.
+                       char [] ipaArray = new char [0x300 - 0x250 + 0x20];
+                       for (int i = 0x40; i < 0x60; i++)
                                if (Char.IsLetter ((char) i))
-                                       AddLetterMap ((char) i, 0xF, 1);
-                       fillIndex [0xF] = 02;
-                       for (int i = 0x0391; i < 0x03CF; i++)
+                                       ipaArray [i - 0x40] = (char) (i);
+                       for (int i = 0x250; i < 0x300; i++)
                                if (Char.IsLetter ((char) i))
-                                       AddLetterMap ((char) i, 0xF, 1);
+                                       ipaArray [i - 0x250 + 0x20] = (char) i;
+                       Array.Sort (ipaArray, UCAComparer.Instance);
+                       int targetASCII = 0;
+                       byte latinDiacritical = 0x7B;
+                       foreach (char c in ipaArray) {
+                               if (c <= 'Z') {
+                                       targetASCII = c;
+                                       latinDiacritical = 0x7B;
+                               }
+                               else
+                                       map [(int) c] = new CharMapEntry (
+                                               0xE,
+                                               map [targetASCII].Level1,
+                                               latinDiacritical++);
+                       }
+
+                       // Greek and Coptic
+
+                       // FIXME: this is (mysterious and) incomplete.
+                       for (int i = 0x0380; i < 0x0400; i++)
+                               if (diacritical [i] == 0 &&
+                                       decompLength [i] == 1 &&
+                                       decompType [i] == DecompositionCompat)
+                                       diacritical [i] = 3;
+
+                       fillIndex [0xF] = 2;
+                       for (int i = 0x0391; i < 0x03AA; i++)
+                               if (i != 0x03A2)
+                                       AddCharMap ((char) i, 0xF, 1,
+                                               diacritical [i]);
+                       fillIndex [0xF] = 2;
+                       for (int i = 0x03B1; i < 0x03CA; i++)
+                               if (i != 0x03C2)
+                                       AddCharMap ((char) i, 0xF, 1,
+                                               diacritical [i]);
+                       // Final Sigma
+                       map [0x03C2] = new CharMapEntry (0xF,
+                               map [0x03C3].Level1, map [0x03C3].Level2);
+
                        fillIndex [0xF] = 0x40;
-                       for (int i = 0x03D0; i < 0x0400; i++)
-                               if (Char.IsLetter ((char) i))
-                                       AddLetterMap ((char) i, 0xF, 1);
+                       for (int i = 0x03DA; i < 0x03F0; i++)
+                               AddCharMap ((char) i, 0xF,
+                                       (byte) (i % 2 == 0 ? 0 : 2),
+                                       diacritical [i]);
+
+                       // NFKD
+                       for (int i = 0x0386; i <= 0x0400; i++)
+                               FillLetterNFKD (i, true, true);
+
+                       // Cyrillic.
+                       // Cyrillic letters are sorted like Latin letters i.e. 
+                       // containing culture-specific letters between the
+                       // standard Cyrillic sequence.
+                       //
+                       // We can't use UCA here; it has different sorting.
+                       char [] orderedCyrillic = new char [] {
+                               '\u0430', '\u0431', '\u0432', '\u0433', '\u0434',
+                               '\u0452', // DJE for Serbocroatian
+                               '\u0435',
+                               '\u0454', // IE for Ukrainian
+                               '\u0436', '\u0437',
+                               '\u0455', // DZE
+                               '\u0438',
+                               '\u0456', // Byelorussian-Ukrainian I
+                               '\u0457', // YI
+                               '\u0439',
+                               '\u0458', // JE
+                               '\u043A', '\u043B',
+                               '\u0459', // LJE
+                               '\u043C', '\u043D',
+                               '\u045A', // NJE
+                               '\u043E',
+                               // 4E9 goes here.
+                               '\u043F', '\u0440', '\u0441', '\u0442',
+                               '\u045B', // TSHE for Serbocroatian
+                               '\u0443',
+                               '\u045E', // Short U for Byelorussian
+                               '\u04B1', // Straight U w/ stroke (diacritical!)
+                               '\u0444', '\u0445', '\u0446', '\u0447',
+                               '\u045F', // DZHE
+                               '\u0448', '\u0449', '\u044A', '\u044B', '\u044C',
+                               '\u044D', '\u044E', '\u044F'};
+
+                       // For some characters here is a map to basic cyrillic
+                       // letters. See UnicodeData.txt character names for
+                       // the sources. Here I simply declare an equiv. array.
+                       // The content characters are map from U+490(,491),
+                       // skipping small letters.
+                       char [] cymap_src = new char [] {
+                               '\u0433', '\u0433', '\u0433', '\u0436',
+                               '\u0437', '\u043A', '\u043A', '\u043A',
+                               '\u043A', '\u043D', '\u043D', '\u043F',
+                               '\u0445', '\u0441', '\u0442', '\u0443',
+                               '\u0443', '\u0445', '\u0446', '\u0447',
+                               '\u0447', '\u0432', '\u0435', '\u0435',
+                               '\u0406', '\u0436', '\u043A', '\u043D',
+                               '\u0447', '\u0435'};
+
+                       fillIndex [0x10] = 0x8D;
+                       for (int i = 0x0460; i < 0x0481; i++) {
+                               if (Char.IsLetter ((char) i)) {
+                                       if (i == 0x0476)
+                                               // U+476/477 have the same
+                                               // primary weight as U+474/475.
+                                               fillIndex [0x10] -= 3;
+                                       AddLetterMap ((char) i, 0x10, 3);
+                               }
+                       }
 
-                       // Cyrillic - UCA order w/ some modification
-                       fillIndex [0x10] = 0x3;
-                       // table which is moslty from UCA DUCET.
+                       fillIndex [0x10] = 0x6;
                        for (int i = 0; i < orderedCyrillic.Length; i++) {
-                               char c = orderedCyrillic [i];
-                               if (Char.IsLetter (c))
-                                       AddLetterMap (c, 0x10, 3);
+                               char c = Char.ToUpper (orderedCyrillic [i], CultureInfo.InvariantCulture);
+                               if (!IsIgnorable ((int) c) &&
+                                       Char.IsLetter (c) &&
+                                       !map [c].Defined) {
+                                       AddLetterMap (c, 0x10, 0);
+                                       fillIndex [0x10] += 3;
+                               }
                        }
-                       for (int i = 0x0460; i < 0x0481; i++) {
-                               if (Char.IsLetter ((char) i))
-                                       AddLetterMap ((char) i, 0x10, 3);
+
+                       // NFKD
+                       for (int i = 0x0401; i <= 0x045F; i++)
+                               FillLetterNFKD (i, false, false);
+
+                       for (int i = 0; i < cymap_src.Length; i++) {
+                               char c = cymap_src [i];
+                               fillIndex [0x10] = map [c].Level1;
+                               int c2 = 0x0490 + i * 2;
+                               AddLetterMapCore ((char) c2, 0x10, 0, diacritical [c2], false);
                        }
 
                        // Armenian
                        fillIndex [0x11] = 0x3;
-                       for (int i = 0x0531; i < 0x0586; i++)
+                       fillIndex [0x1] = 0x98;
+                       for (int i = 0x0531; i < 0x0586; i++) {
+                               if (i == 0x0559 || i == 0x55A)
+                                       AddCharMap ((char) i, 1, 1);
                                if (Char.IsLetter ((char) i))
                                        AddLetterMap ((char) i, 0x11, 1);
+                       }
 
                        // Hebrew
                        // -Letters
-                       fillIndex [0x12] = 0x3;
+                       fillIndex [0x12] = 0x2;
                        for (int i = 0x05D0; i < 0x05FF; i++)
-                               if (Char.IsLetter ((char) i))
-                                       AddLetterMap ((char) i, 0x12, 1);
+                               if (Char.IsLetter ((char) i)) {
+                                       if (isUppercase [i]) {
+                                               fillIndex [0x12]--;
+                                               AddLetterMap ((char) i, 0x12, 2);
+                                       }
+                                       else
+                                               AddLetterMap ((char) i, 0x12, 1);
+                               }
                        // -Accents
                        fillIndex [0x1] = 0x3;
-                       for (int i = 0x0591; i <= 0x05C2; i++)
+                       for (int i = 0x0591; i <= 0x05C2; i++) {
+                               if (i == 0x05A3 || i == 0x05BB)
+                                       fillIndex [0x1]++;
                                if (i != 0x05BE)
                                        AddCharMap ((char) i, 0x1, 1);
+                       }
 
                        // Arabic
                        fillIndex [0x1] = 0x8E;
@@ -1408,24 +2586,59 @@ namespace Mono.Globalization.Unicode
 //                                     (byte) arabicLetterPrimaryValues [i], 1);
                                fillIndex [0x13] = 
                                        (byte) arabicLetterPrimaryValues [i];
-                               AddLetterMap ((char) i, 0x13, 0);
+                               byte formDiacritical = 8; // default
+                               // SPECIAL CASES:
+                               switch (i) {
+                               case 0x0622: formDiacritical = 9; break;
+                               case 0x0623: formDiacritical = 0xA; break;
+                               case 0x0624: formDiacritical = 5; break;
+                               case 0x0625: formDiacritical = 0xB; break;
+                               case 0x0626: formDiacritical = 7; break;
+                               case 0x0649: formDiacritical = 5; break;
+                               case 0x064A: formDiacritical = 7; break;
+                               }
+//                             AddLetterMapCore ((char) i, 0x13, 1, formDiacritical, false);
+                               AddArabicCharMap ((char) i, 0x13, 1, formDiacritical);
                        }
+                       for (int i = 0x0670; i < 0x0673; i++)
+                               map [i] = new CharMapEntry (0x13, 0xB, (byte) (0xC + i - 0x670));
                        fillIndex [0x13] = 0x84;
                        for (int i = 0x0674; i < 0x06D6; i++)
                                if (Char.IsLetter ((char) i))
-                                       AddLetterMap ((char) i, 0x13, 1);
+                                       AddLetterMapCore ((char) i, 0x13, 1, 0, false);
 
                        // Devanagari
+
+                       // FIXME: this could be fixed in more decent way
+                       for (int i = 0x0958; i <= 0x095F; i++)
+                               diacritical [i] = 8;
+
                        // FIXME: it does seem straight codepoint mapping.
                        fillIndex [0x14] = 04;
                        for (int i = 0x0901; i < 0x0905; i++)
                                if (!IsIgnorable (i))
                                        AddLetterMap ((char) i, 0x14, 2);
                        fillIndex [0x14] = 0xB;
-                       for (int i = 0x0905; i < 0x093A; i++)
+                       for (int i = 0x0905; i < 0x093A; i++) {
+                               if (i == 0x0928)
+                                       AddCharMap ('\u0929', 0x14, 0, 8);
+                               if (i == 0x0930)
+                                       AddCharMap ('\u0931', 0x14, 0, 8);
+                               if (i == 0x0933)
+                                       AddCharMap ('\u0934', 0x14, 0, 8);
                                if (Char.IsLetter ((char) i))
                                        AddLetterMap ((char) i, 0x14, 4);
-                       for (int i = 0x093E; i < 0x094F; i++)
+                               if (i == 0x090B)
+                                       AddCharMap ('\u0960', 0x14, 4);
+                               if (i == 0x090C)
+                                       AddCharMap ('\u0961', 0x14, 4);
+                       }
+                       fillIndex [0x14] = 0xDA;
+                       for (int i = 0x093E; i < 0x0945; i++)
+                               if (!IsIgnorable (i))
+                                       AddLetterMap ((char) i, 0x14, 2);
+                       fillIndex [0x14] = 0xEC;
+                       for (int i = 0x0945; i < 0x094F; i++)
                                if (!IsIgnorable (i))
                                        AddLetterMap ((char) i, 0x14, 2);
 
@@ -1454,36 +2667,90 @@ namespace Mono.Globalization.Unicode
 
                        // Gurmukhi. orderedGurmukhi is from UCA
                        // FIXME: it does not look equivalent to UCA.
-                       fillIndex [0x1] = 03;
-                       fillIndex [0x16] = 02;
+                       fillIndex [0x16] = 04;
+                       fillIndex [0x1] = 3;
                        for (int i = 0; i < orderedGurmukhi.Length; i++) {
                                char c = orderedGurmukhi [i];
                                if (IsIgnorable ((int) c))
                                        continue;
-                               if (!Char.IsLetter (c)) {
+                               if (IsIgnorableNonSpacing (c)) {
                                        AddLetterMap (c, 0x1, 1);
                                        continue;
                                }
                                if (c == '\u0A3C' || c == '\u0A4D' ||
                                        '\u0A66' <= c && c <= '\u0A71')
                                        continue;
-                               AddLetterMap (c, 0x16, 4);
+                               // SPECIAL CASES
+                               byte shift = 4;
+                               switch (c) {
+                               case '\u0A33': case '\u0A36': case '\u0A16':
+                               case '\u0A17': case '\u0A5B': case '\u0A5E':
+                                       shift = 0;
+                                       break;
+                               }
+                               if (c == '\u0A3E') // Skip
+                                       fillIndex [0x16] = 0xC0;
+                               AddLetterMap (c, 0x16, shift);
                        }
 
                        // Gujarati. orderedGujarati is from UCA
-                       fillIndex [0x17] = 02;
-                       for (int i = 0; i < orderedGujarati.Length; i++)
-                               AddLetterMap (orderedGujarati [i], 0x17, 4);
+                       fillIndex [0x17] = 0x4;
+                       // nonspacing marks
+                       map [0x0A4D] = new CharMapEntry (1, 0, 0x3);
+                       map [0x0ABD] = new CharMapEntry (1, 0, 0x3);
+                       map [0x0A3C] = new CharMapEntry (1, 0, 0x4);
+                       map [0x0A71] = new CharMapEntry (1, 0, 0x6);
+                       map [0x0ABC] = new CharMapEntry (1, 0, 0xB);
+                       map [0x0A70] = new CharMapEntry (1, 0, 0xE);
+                       // letters go first.
+                       for (int i = 0; i < orderedGujarati.Length; i++) {
+                               // SPECIAL CASE
+                               char c = orderedGujarati [i];
+                               if (Char.IsLetter (c)) {
+                                       // SPECIAL CASES
+                                       if (c == '\u0AB3' || c == '\u0A32')
+                                               continue;
+                                       if (c == '\u0A33') {
+                                               AddCharMap ('\u0A32', 0x17, 0);
+                                               AddCharMap ('\u0A33', 0x17, 4, 4);
+                                               continue;
+                                       }
+                                       if (c == '\u0A8B')
+                                               AddCharMap ('\u0AE0', 0x17, 0, 5);
+                                       AddCharMap (c, 0x17, 4);
+
+                                       if (c == '\u0AB9')
+                                               AddCharMap ('\u0AB3', 0x17, 6);
+                               }
+                       }
+                       // non-letters
+                       byte gujaratiShift = 4;
+                       fillIndex [0x17] = 0xC0;
+                       for (int i = 0; i < orderedGujarati.Length; i++) {
+                               char c = orderedGujarati [i];
+                               if (fillIndex [0x17] == 0xCC)
+                                       gujaratiShift = 3;
+                               if (!Char.IsLetter (c)) {
+                                       // SPECIAL CASES
+                                       if (c == '\u0A82')
+                                               AddCharMap ('\u0A81', 0x17, 2);
+                                       if (c == '\u0AC2')
+                                               fillIndex [0x17]++;
+                                       AddLetterMap (c, 0x17, gujaratiShift);
+                               }
+                       }
 
                        // Oriya
+                       fillIndex [0x1] = 03;
                        fillIndex [0x18] = 02;
                        for (int i = 0x0B00; i < 0x0B7F; i++) {
                                switch (Char.GetUnicodeCategory ((char) i)) {
                                case UnicodeCategory.NonSpacingMark:
                                case UnicodeCategory.DecimalDigitNumber:
+                                       AddLetterMap ((char) i, 0x1, 1);
                                        continue;
                                }
-                               AddLetterMap ((char) i, 0x18, 1);
+                               AddLetterMapCore ((char) i, 0x18, 1, 0, true);
                        }
 
                        // Tamil
@@ -1491,13 +2758,11 @@ namespace Mono.Globalization.Unicode
                        AddCharMap ('\u0BD7', 0x19, 0);
                        fillIndex [0x19] = 0xA;
                        // vowels
-                       for (int i = 0x0BD7; i < 0x0B94; i++)
-                               if (Char.IsLetter ((char) i))
+                       for (int i = 0x0B82; i <= 0x0B94; i++)
+                               if (!IsIgnorable ((char) i))
                                        AddCharMap ((char) i, 0x19, 2);
                        // special vowel
-                       fillIndex [0x19] = 0x24;
-                       AddCharMap ('\u0B94', 0x19, 0);
-                       fillIndex [0x19] = 0x26;
+                       fillIndex [0x19] = 0x28;
                        // The array for Tamil consonants is a constant.
                        // Windows have almost similar sequence to TAM from
                        // tamilnet but a bit different in Grantha.
@@ -1529,47 +2794,82 @@ namespace Mono.Globalization.Unicode
                        for (int i = 0x0C80; i < 0x0CE5; i++) {
                                if (i == 0x0CD5 || i == 0x0CD6)
                                        continue; // ignore
+                               if (i == 0x0CB1 || i == 0x0CB3 || i == 0x0CDE)
+                                       continue; // shift after 0xCB9
                                AddCharMap ((char) i, 0x1B, 3);
+                               if (i == 0x0CB9) {
+                                       // SPECIAL CASES: but why?
+                                       AddCharMap ('\u0CB1', 0x1B, 3); // RRA
+                                       AddCharMap ('\u0CB3', 0x1B, 3); // LLA
+                                       AddCharMap ('\u0CDE', 0x1B, 3); // FA
+                               }
+                               if (i == 0x0CB2)
+                                       AddCharMap ('\u0CE1', 0x1B, 3); // vocalic LL
                        }
                        
                        // Malayalam
                        fillIndex [0x1C] = 2;
-                       for (int i = 0x0D02; i < 0x0D61; i++)
+                       fillIndex [0x1] = 3;
+                       for (int i = 0x0D02; i < 0x0D61; i++) {
                                // FIXME: I avoided MSCompatUnicodeTable usage
                                // here (it results in recursion). So check if
                                // using NonSpacingMark makes sense or not.
                                if (Char.GetUnicodeCategory ((char) i) != UnicodeCategory.NonSpacingMark)
 //                             if (!MSCompatUnicodeTable.IsIgnorable ((char) i))
                                        AddCharMap ((char) i, 0x1C, 1);
+                               else if (!IsIgnorable ((char) i))
+                                       AddCharMap ((char) i, 1, 1);
+                       }
 
                        // Thai ... note that it breaks 0x1E wall after E2B!
                        // Also, all Thai characters have level 2 value 3.
                        fillIndex [0x1E] = 2;
-                       for (int i = 0xE44; i < 0xE48; i++)
+                       fillIndex [0x1] = 3;
+                       for (int i = 0xE40; i <= 0xE44; i++)
                                AddCharMap ((char) i, 0x1E, 1, 3);
                        for (int i = 0xE01; i < 0xE2B; i++)
-                               AddCharMap ((char) i, 0x1E, 6, 0);
+                               AddCharMap ((char) i, 0x1E, 6, 3);
                        fillIndex [0x1F] = 5;
                        for (int i = 0xE2B; i < 0xE30; i++)
-                               AddCharMap ((char) i, 0x1F, 6, 0);
+                               AddCharMap ((char) i, 0x1F, 6, 3);
+                       fillIndex [0x1F] = 0x1E;
                        for (int i = 0xE30; i < 0xE3B; i++)
                                AddCharMap ((char) i, 0x1F, 1, 3);
                        // some Thai characters remains.
                        char [] specialThai = new char [] {'\u0E45', '\u0E46',
                                '\u0E4E', '\u0E4F', '\u0E5A', '\u0E5B'};
                        foreach (char c in specialThai)
-                               AddCharMap (c, 0x1F, 1);
+                               AddCharMap (c, 0x1F, 1, 3);
+
+                       for (int i = 0xE00; i < 0xE80; i++)
+                               if (Char.GetUnicodeCategory ((char) i) ==
+                                       UnicodeCategory.NonSpacingMark)
+                                       AddCharMap ((char) i, 1, 1);
 
                        // Lao
                        fillIndex [0x1F] = 2;
-                       for (int i = 0xE80; i < 0xEDF; i++)
-                               if (Char.IsLetter ((char) i))
+                       fillIndex [0x1] = 3;
+                       for (int i = 0xE80; i < 0xEDF; i++) {
+                               if (IsIgnorable ((char) i))
+                                       continue;
+                               else if (Char.IsLetter ((char) i))
                                        AddCharMap ((char) i, 0x1F, 1);
+                               else if (Char.GetUnicodeCategory ((char) i) ==
+                                       UnicodeCategory.NonSpacingMark)
+                                       AddCharMap ((char) i, 1, 1);
+                       }
 
                        // Georgian. orderedGeorgian is from UCA DUCET.
                        fillIndex [0x21] = 5;
-                       for (int i = 0; i < orderedGeorgian.Length; i++)
-                               AddLetterMap (orderedGeorgian [i], 0x21, 5);
+                       for (int i = 0; i < orderedGeorgian.Length; i++) {
+                               char c = orderedGeorgian [i];
+                               if (map [(int) c].Defined)
+                                       continue;
+                               AddCharMap (c, 0x21, 0);
+                               if (c < '\u10F6')
+                                       AddCharMap ((char) (c - 0x30), 0x21, 0);
+                               fillIndex [0x21] += 5;
+                       }
 
                        // Japanese Kana.
                        fillIndex [0x22] = 2;
@@ -1594,6 +2894,16 @@ namespace Mono.Globalization.Unicode
                                                AddKanaMap (cp, kanaLines [gyo]);
                                        fillIndex [0x22]++;
 
+                                       if (cp == 0x30AB) {
+                                               // add small 'ka' (before normal one)
+                                               AddKanaMap (0x30F5, 1);
+                                               kanaOffset++;
+                                       }
+                                       if (cp == 0x30B1) {
+                                               // add small 'ke' (before normal one)
+                                               AddKanaMap (0x30F6, 1);
+                                               kanaOffset++;
+                                       }
                                        if (cp == 0x3061) {
                                                // add small 'Tsu' (before normal one)
                                                AddKanaMap (0x3063, 1);
@@ -1624,11 +2934,27 @@ namespace Mono.Globalization.Unicode
                        AddLetterMap ((char) 0x3093, 0x22, 0);
                        AddLetterMap ((char) (0x3093 + 0x60), 0x22, 0);
 
+                       map [0x3094] = new CharMapEntry (map [0x30A6].Category,
+                               map [0x30A6].Level1, 3);// voiced hiragana U
+                       map [0x30F4] = new CharMapEntry (map [0x30A6].Category,
+                               map [0x30A6].Level1, 3);// voiced katakana U
+
+                       map [0x30F5] = new CharMapEntry (map [0x30AB].Category,
+                               map [0x30AB].Level1, 0);// small katakana Ka
+                       map [0x30F6] = new CharMapEntry (map [0x30B1].Category,
+                               map [0x30B1].Level1, 0);// small katakana Ke
+                       // voiced Wa lines
+                       for (int i = 0x30F7; i < 0x30FB; i++)
+                               map [i] = new CharMapEntry (map [i - 8].Category,
+                                       map [i - 8].Level1,
+                                       3);
+
                        // JIS Japanese square chars.
                        fillIndex [0x22] = 0x97;
                        jisJapanese.Sort (JISComparer.Instance);
                        foreach (JISCharacter j in jisJapanese)
-                               AddCharMap ((char) j.CP, 0x22, 1);
+                               if (0x3300 <= j.CP && j.CP <= 0x3357)
+                                       AddCharMap ((char) j.CP, 0x22, 1);
                        // non-JIS Japanese square chars.
                        nonJisJapanese.Sort (NonJISComparer.Instance);
                        foreach (NonJISCharacter j in nonJisJapanese)
@@ -1658,14 +2984,19 @@ namespace Mono.Globalization.Unicode
                                map [cp] = new CharMapEntry (0x24,
                                        (byte) (map [cp - 1].Level1 + 2),
                                        0);
+                       // FIXME: Syriac NonSpacingMark should go here.
 
                        // Thaana
                        // FIXME: it turned out that it does not look like UCA
                        fillIndex [0x24] = 0x6E;
+                       fillIndex [0x1] = 0xAC;
                        for (int i = 0; i < orderedThaana.Length; i++) {
-                               if (IsIgnorableNonSpacing (i))
-                                       continue;
-                               AddCharMap (orderedThaana [i], 0x24, 2);
+                               char c = orderedThaana [i];
+                               if (IsIgnorableNonSpacing ((int) c))
+                                       AddCharMap (c, 1, 1);
+                               AddCharMap (c, 0x24, 2);
+                               if (c == '\u0782') // SPECIAL CASE: why?
+                                       fillIndex [0x24] += 2;
                        }
                        #endregion
 
@@ -1704,28 +3035,31 @@ namespace Mono.Globalization.Unicode
                        + "<{\u1113 \u1116}, \u3165,"
                                + "\u11C5, \u11C6=\u3166,, \u11C7, \u11C8,"
                                + "\u11AC, \u11C9, \u11AD, \u1103=\u11AE  >"
-                       + "\u11CA, \u1104, \u11CB > \u1105 >"
-                       + "\u11B0, [\u11CC \u11D0], \u11B1, [\u11D1 \u11D2],"
-                               + "\u11B2, [\u11D3 \u11D5], \u11B3,"
+                       + "<\u1117, \u11CA, \u1104, \u11CB > \u1105=\u11AF >"
+                       + "<{\u1118 \u111B}, \u11B0, [\u11CC \u11D0], \u11B1,"
+                               + "[\u11D1 \u11D2], \u11B2,"
+                               + "[\u11D3 \u11D5], \u11B3,"
                                + "[\u11D6 \u11D7], \u11B4, \u11B5,"
                                + "\u11B6=\u11D8, \u3140,, \u11D9, \u1106=\u11B7 >"
-                       + "[\u11DA \u11E2], \u1107=\u11B8 >"
-                       + "<{\u111E \u1120}, \u3172,, \u3173, "
-                               + "\u11E3, \u1108 >"
-                       + "\u11B9,,,,,,,,, [\u11E4 \u11E6],, \u1109=\u11BA,,,"
-                               + "\u3214=\u3274 <>"
-                       + "<{\u112D \u1133}, \u11E7,, [\u11E8 \u11E9],,"
-                               + "\u11EA,, \u110A=\u11BB,,, >"
-                       + "{\u1134 \u1140}, \u317E,,,,,, \u11EB,"
-                       + "\u110B=\u11BC, [\u1161 \u11A2], \u1160 >"
-                       + "\u11EE, \u11EC, \u11ED,,,,, \u11F1,, \u11F2,,,"
-                               + "\u11EF,,, \u11F0, \u110C=\u11BD,, >"
-                       + "\u110D,,  >"
+                       + "<{\u111C \u111D}, [\u11DA \u11E2], \u1107=\u11B8 >"
+                       + "<{\u111E \u1120}, \u3172,, \u3173, \u11E3, \u1108 >"
+                       + "<{\u1121 \u112C}, \u3144 \u11B9, \u3174, \u3175,,,, "
+                               + "\u3176,, \u3177, [\u11E4 \u11E6] \u3178,"
+                               + "\u3179, \u1109=\u11BA,,, \u3214=\u3274 <>"
+                       + "<{\u112D \u1133}, \u11E7 \u317A, \u317B, \u317C "
+                               + "[\u11E8 \u11E9],, \u11EA \u317D,, \u110A=\u11BB,,, >"
+                       + "<{\u1134 \u1140}, \u317E,,,,,,, \u11EB,"
+                               + "\u110B=\u11BC, [\u1161 \u11A2], \u1160 >"
+                       + "<{\u1141 \u114C}, \u3180=\u11EE, \u11EC, \u11ED,,,,, "
+                               + "\u11F1,, \u11F2,,,"
+                               + "\u11EF,,, \u3181=\u11F0, \u110C=\u11BD,, >"
+                       + "<\u114D, \u110D,,  >"
                        + "<{\u114E \u1151},, \u110E=\u11BE,,  >"
                        + "<{\u1152 \u1155},,, \u110F=\u11BF >"
                        + "\u1110=\u11C0 > \u1111=\u11C1 >"
-                       + "\u11F3, \u11F4, \u1112=\u11C2 >"
-                       + "\u11F9, [\u11F5 \u11F8]"
+                       + "<\u1156=\u1157, \u11F3, \u11F4, \u1112=\u11C2 >"
+                       + "<\u1158=\u1159=\u115F, \u3185, \u11F9,"
+                               + "[\u11F5 \u11F8]"
                        ;
 
                        byte hangulCat = 0x52;
@@ -1781,6 +3115,40 @@ namespace Mono.Globalization.Unicode
                                }
                        }
 
+                       // Some Jamo NFKD.
+                       for (int i = 0x3200; i < 0x3300; i++) {
+                               if (IsIgnorable (i) || map [i].Defined)
+                                       continue;
+                               int ch = 0;
+                               // w/ bracket
+                               if (decompLength [i] == 4 &&
+                                       decompValues [decompIndex [i]] == '(')
+                                       ch = decompIndex [i] + 1;
+                               // circled
+                               else if (decompLength [i] == 2 &&
+                                       decompValues [decompIndex [i] + 1] == '\u1161')
+                                       ch = decompIndex [i];
+                               else if (decompLength [i] == 1)
+                                       ch = decompIndex [i];
+                               else
+                                       continue;
+                               ch = decompValues [ch];
+                               if (ch < 0x1100 || 0x1200 < ch &&
+                                       ch < 0xAC00 || 0xD800 < ch)
+                                       continue;
+
+                               // SPECIAL CASE ?
+                               int offset = i < 0x3260 ? 1 : 0;
+                               if (0x326E <= i && i <= 0x3273)
+                                       offset = 1;
+
+                               map [i] = new CharMapEntry (map [ch].Category,
+                                       (byte) (map [ch].Level1 + offset),
+                                       map [ch].Level2);
+//                                     Console.Error.WriteLine ("Jamo {0:X04} -> {1:X04}", i, decompValues [decompIndex [i] + 1]);
+                       }
+
+
                        #endregion
 
                        // Letterlike characters and CJK compatibility square
@@ -1817,30 +3185,31 @@ namespace Mono.Globalization.Unicode
                        // PrivateUse ... computed.
                        // remaining Surrogate ... computed.
 
-                       #region Special "biggest" area (FF FF)
-                       fillIndex [0xFF] = 0xFF;
-                       char [] specialBiggest = new char [] {
-                               '\u3005', '\u3031', '\u3032', '\u309D',
-                               '\u309E', '\u30FC', '\u30FD', '\u30FE',
-                               '\uFE7C', '\uFE7D', '\uFF70'};
-                       foreach (char c in specialBiggest)
-                               AddCharMap (c, 0xFF, 0);
-                       #endregion
-
                        #region 07 - ASCII non-alphanumeric + 3001, 3002 // 07
                        // non-alphanumeric ASCII except for: + - < = > '
                        for (int i = 0x21; i < 0x7F; i++) {
+                               // SPECIAL CASE: 02C6 looks regarded as 
+                               // equivalent to '^', which does not conform 
+                               // to Unicode standard character database.
+                               if (i == 0x005B)
+                                       AddCharMap ('\u2045', 0x7, 0, 0x1C);
+                               if (i == 0x005D)
+                                       AddCharMap ('\u2046', 0x7, 0, 0x1C);
+                               if (i == 0x005E)
+                                       AddCharMap ('\u02C6', 0x7, 0, 3);
+                               if (i == 0x0060)
+                                       AddCharMap ('\u02CB', 0x7, 0, 3);
+
                                if (Char.IsLetterOrDigit ((char) i)
                                        || "+-<=>'".IndexOf ((char) i) >= 0)
                                        continue; // they are not added here.
-                                       AddCharMapGroup2 ((char) i, 0x7, 1, 0);
+
+                               AddCharMapGroup2 ((char) i, 0x7, 1, 0);
                                // Insert 3001 after ',' and 3002 after '.'
                                if (i == 0x2C)
                                        AddCharMapGroup2 ('\u3001', 0x7, 1, 0);
-                               else if (i == 0x2E) {
-                                       fillIndex [0x7]--;
+                               else if (i == 0x2E)
                                        AddCharMapGroup2 ('\u3002', 0x7, 1, 0);
-                               }
                                else if (i == 0x3A)
                                        AddCharMap ('\uFE30', 0x7, 1, 0);
                        }
@@ -1851,9 +3220,37 @@ namespace Mono.Globalization.Unicode
                                if (IsIgnorable (i))
                                        continue;
 
+                               // FIXME: actually those reset should not be 
+                               // done but here I put for easy goal.
+                               if (i == 0x05C3)
+                                       fillIndex [0x7]++;
+                               if (i == 0x0700)
+                                       fillIndex [0x7] = 0xE2;
+                               if (i == 0x2016)
+                                       fillIndex [0x7] = 0x77;
+                               if (i == 0x3008)
+                                       fillIndex [0x7] = 0x93;
+
+                               if (0x02C8 <= i && i <= 0x02CD)
+                                       continue; // nonspacing marks
+
+                               // SPECIAL CASE: maybe they could be allocated
+                               // dummy NFKD mapping and no special processing
+                               // would be required here.
+                               if (i == 0x00AF)
+                                       AddCharMap ('\u02C9', 0x7, 0, 3);
+                               if (i == 0x00B4)
+                                       AddCharMap ('\u02CA', 0x7, 0, 3);
+                               if (i == 0x02C7)
+                                       AddCharMap ('\u02D8', 0x7, 0, 3);
+
                                // SPECIAL CASES:
                                switch (i) {
                                case 0xAB: // 08
+                               case 0xB7: // 0A
+                               case 0xBB: // 08
+                               case 0x02B9: // 01
+                               case 0x02BA: // 01
                                case 0x2329: // 09
                                case 0x232A: // 09
                                        continue;
@@ -1862,23 +3259,107 @@ namespace Mono.Globalization.Unicode
                                switch (Char.GetUnicodeCategory ((char) i)) {
                                case UnicodeCategory.OtherPunctuation:
                                case UnicodeCategory.ClosePunctuation:
+                               case UnicodeCategory.OpenPunctuation:
+                               case UnicodeCategory.ConnectorPunctuation:
                                case UnicodeCategory.InitialQuotePunctuation:
                                case UnicodeCategory.FinalQuotePunctuation:
                                case UnicodeCategory.ModifierSymbol:
                                        // SPECIAL CASES: // 0xA
-                                       if (0x2020 <= i && i <= 0x2042)
+                                       if (0x2020 <= i && i <= 0x2031)
+                                               continue;
+                                       if (i == 0x3003) // added later
                                                continue;
-                                       AddCharMapGroup ((char) i, 0x7, 1, 0);
+                                       AddCharMapGroup2 ((char) i, 0x7, 1, 0);
                                        break;
                                default:
-                                       if (i == 0xA6) // SPECIAL CASE. FIXME: why?
+                                       if (i == 0xA6 || i == 0x1C3 || i == 0x037A) // SPECIAL CASE. FIXME: why?
                                                goto case UnicodeCategory.OtherPunctuation;
                                        break;
                                }
                        }
+
+                       // Control pictures
+                       // FIXME: it should not need to reset level 1, but
+                       // it's for easy goal.
+                       fillIndex [0x7] = 0xB6;
+                       for (int i = 0x2400; i <= 0x2424; i++)
+                               AddCharMap ((char) i, 0x7, 1, 0);
+
+                       // FIXME: what are they?
+                       AddCharMap ('\u3003', 0x7, 1);
+                       AddCharMap ('\u3006', 0x7, 1);
+                       AddCharMap ('\u02D0', 0x7, 1);
+                       AddCharMap ('\u10FB', 0x7, 1);
+                       AddCharMap ('\u0950', 0x7, 1);
+                       AddCharMap ('\u093D', 0x7, 1);
+                       AddCharMap ('\u0964', 0x7, 1);
+                       AddCharMap ('\u0965', 0x7, 1);
+                       AddCharMap ('\u0970', 0x7, 1);
+
+                       #endregion
+
+                       #region category 08 - symbols
+                       fillIndex [0x8] = 2;
+                       // Here Windows mapping is not straightforward. It is
+                       // not based on computation but seems manual sorting.
+                       AddCharMapGroup ('+', 0x8, 1, 0); // plus
+                       AddCharMapGroup ('\u2212', 0x8, 1); // minus
+                       AddCharMapGroup ('\u229D', 0x8, 1); // minus
+                       AddCharMapGroup ('\u2297', 0x8, 1); // mul
+                       AddCharMapGroup ('\u2044', 0x8, 1); // div
+                       AddCharMapGroup ('\u2215', 0x8, 0); // div
+                       AddCharMapGroup ('\u2298', 0x8, 1); // div slash
+                       AddCharMapGroup ('\u2217', 0x8, 0); // mul
+                       AddCharMapGroup ('\u229B', 0x8, 1); // asterisk oper
+                       AddCharMapGroup ('\u2218', 0x8, 0); // ring
+                       AddCharMapGroup ('\u229A', 0x8, 1); // ring
+                       AddCharMapGroup ('\u2219', 0x8, 0); // bullet
+                       AddCharMapGroup ('\u2299', 0x8, 1); // dot oper
+                       AddCharMapGroup ('\u2213', 0x8, 1); // minus-or-plus
+                       AddCharMapGroup ('\u003C', 0x8, 1); // <
+                       AddCharMapGroup ('\u227A', 0x8, 1); // precedes relation
+                       AddCharMapGroup ('\u22B0', 0x8, 1); // precedes under relation
+
+                       for (int cp = 0; cp < 0x2300; cp++) {
+                               if (cp == 0xAC) // SPECIAL CASE: skip
+                                       continue;
+                               if (cp == 0x200) {
+                                       cp = 0x2200; // skip to 2200
+                                       fillIndex [0x8] = 0x21;
+                               }
+                               if (cp == 0x2295)
+                                       fillIndex [0x8] = 0x3;
+                               if (cp == 0x22A2)
+                                       fillIndex [0x8] = 0xAB;
+                               if (cp == 0x22B2)
+                                       fillIndex [0x8] = 0xB9;
+                               if (!map [cp].Defined &&
+//                                     Char.GetUnicodeCategory ((char) cp) ==
+//                                     UnicodeCategory.MathSymbol)
+                                       Char.IsSymbol ((char) cp))
+                                       AddCharMapGroup ((char) cp, 0x8, 1);
+                               // SPECIAL CASES: no idea why Windows sorts as such
+                               switch (cp) {
+                               case 0x3E:
+                                       AddCharMap ('\u227B', 0x8, 1, 0);
+                                       AddCharMap ('\u22B1', 0x8, 1, 0);
+                                       break;
+                               case 0xB1:
+                                       AddCharMapGroup ('\u00AB', 0x8, 1);
+                                       AddCharMapGroup ('\u226A', 0x8, 1);
+                                       AddCharMapGroup ('\u00BB', 0x8, 1);
+                                       AddCharMapGroup ('\u226B', 0x8, 1);
+                                       break;
+                               case 0xF7:
+                                       AddCharMap ('\u01C0', 0x8, 1, 0);
+                                       AddCharMap ('\u01C1', 0x8, 1, 0);
+                                       AddCharMap ('\u01C2', 0x8, 1, 0);
+                                       break;
+                               }
+                       }
                        #endregion
 
-                       // FIXME: for 07 xx we need more love.
+                       #region Hack!
 
                        // Characters w/ diacritical marks (NFKD)
                        for (int i = 0; i <= char.MaxValue; i++) {
@@ -1889,7 +3370,7 @@ namespace Mono.Globalization.Unicode
 
                                int start = decompIndex [i];
                                int primaryChar = decompValues [start];
-                               int secondary = 0;
+                               int secondary = diacritical [i];
                                bool skip = false;
                                int length = decompLength [i];
                                // special processing for parenthesized ones.
@@ -1918,7 +3399,8 @@ namespace Mono.Globalization.Unicode
                                
                        }
 
-                       #region Level2 adjustment
+                       // Diacritical weight adjustment
+
                        // Arabic Hamzah
                        diacritical [0x624] = 0x5;
                        diacritical [0x626] = 0x7;
@@ -1928,7 +3410,6 @@ namespace Mono.Globalization.Unicode
                        diacritical [0x649] = 0x5; // 'alif maqs.uurah
                        diacritical [0x64A] = 0x7; // Yaa'
 
-
                        for (int i = 0; i < char.MaxValue; i++) {
                                byte mod = 0;
                                byte cat = map [i].Category;
@@ -1938,7 +3419,11 @@ namespace Mono.Globalization.Unicode
                                        mod = diacritical [i];
                                        break;
                                case 0x13: // Arabic
-                                       if (diacritical [i] == 0)
+                                       if (i == 0x0621)
+                                               break; // 0
+                                       if (diacritical [i] == 0 && decompLength [i] != 0)
+                                               diacritical [i] = map [decompValues [decompIndex [i]]].Level2;
+                                       if (diacritical [i] == 0 && i >= 0xFE8D)
                                                mod = 0x8; // default for arabic
                                        break;
                                }
@@ -1948,17 +3433,79 @@ namespace Mono.Globalization.Unicode
                                        map [i] = new CharMapEntry (
                                                cat, map [i].Level1, mod);
                        }
-                       #endregion
 
-                       // FIXME: this is hack but those which are 
-                       // NonSpacingMark characters and still undefined
-                       // are likely to be nonspacing.
-                       for (int i = 0; i < char.MaxValue; i++)
-                               if (!map [i].Defined &&
-                                       !IsIgnorable (i) &&
-                                       Char.GetUnicodeCategory ((char) i) ==
+                       // FIXME: this is halfly hack but those NonSpacingMark 
+                       // characters and still undefined are likely to
+                       // be nonspacing.
+                       for (int i = 0; i < char.MaxValue; i++) {
+                               if (map [i].Defined ||
+                                       IsIgnorable (i))
+                                       continue;
+                               switch (i) {
+                               // SPECIAL CASES.
+                               case 0x02B9:
+                               case 0x02BA:
+                                       break;
+                               default:
+                                       if (Char.GetUnicodeCategory ((char) i) !=
                                        UnicodeCategory.NonSpacingMark)
+                                               continue;
+                                       break;
+                               }
+                               if (diacritical [i] != 0)
+                                       map [i] = new CharMapEntry (1, 1, diacritical [i]);
+                               else
                                        AddCharMap ((char) i, 1, 1);
+                       }
+
+                       #endregion
+               }
+
+               TextInfo ti = CultureInfo.InvariantCulture.TextInfo;
+
+               private void FillLetterNFKD (int i, bool checkUpper, bool greekRemap)
+               {
+                       if (map [i].Defined)
+                               return;
+                       int up = (int) ti.ToUpper ((char) i);
+                       if (checkUpper && map [up].Category == 0xF) {
+                               if (i == up)
+                                       return;
+                               FillLetterNFKD (up, checkUpper, greekRemap);
+                               map [i] = new CharMapEntry (0xF,
+                                       map [up].Level1,
+                                       map [up].Level2);
+                       } else {
+                               int idx = decompIndex [i];
+                               if (idx == 0)
+                                       return;
+                               int primary = decompValues [decompIndex [i]];
+                               FillLetterNFKD (primary, checkUpper, greekRemap);
+
+                               int lv2 = map [primary].Level2;
+                               byte off = 0;
+                               for (int l = 1; l < decompLength [i]; l++) {
+                                       int tmp = decompValues [idx + l];
+                                       if (map [tmp].Category != 1)
+                                               return;
+                                       if (greekRemap && map [tmp].Level2 == 0xC)
+                                               off += 3;
+                                       else
+                                               off += map [tmp].Level2;
+                               }
+                               if (off > 0) {
+                                       if (lv2 == 0)
+                                               lv2 += 2;
+                                       lv2 += off;
+                               }
+                               // ... but override if the value already exists.
+                               if (diacritical [i] != 0)
+                                       lv2 = diacritical [i];
+                               map [i] = new CharMapEntry (
+                                       map [primary].Category,
+                                       map [primary].Level1,
+                                       (byte) lv2);
+                       }
                }
 
                private void IncrementSequentialIndex (ref byte hangulCat)
@@ -1990,32 +3537,32 @@ namespace Mono.Globalization.Unicode
                                char c = (char) (i + b);
                                byte arg = (byte) (b > 0 ? b + 2 : 0);
                                // Hiragana
-                               AddLetterMapCore (c, 0x22, 0, arg);
+                               AddLetterMapCore (c, 0x22, 0, arg, false);
                                // Katakana
-                               AddLetterMapCore ((char) (c + 0x60), 0x22, 0, arg);
+                               AddLetterMapCore ((char) (c + 0x60), 0x22, 0, arg, false);
                        }
                }
 
                private void AddLetterMap (char c, byte category, byte updateCount)
                {
-                       AddLetterMapCore (c, category, updateCount, 0);
+                       AddLetterMapCore (c, category, updateCount, 0, true);
                }
 
-               private void AddLetterMapCore (char c, byte category, byte updateCount, byte level2)
+               private void AddLetterMapCore (char c, byte category, byte updateCount, byte level2, bool deferLevel2)
                {
                        char c2;
                        // <small> updates index
                        c2 = ToSmallForm (c);
                        if (c2 != c)
-                               AddCharMapGroup (c2, category, updateCount, level2);
+                               AddCharMapGroup (c2, category, updateCount, level2, deferLevel2);
                        c2 = Char.ToLower (c, CultureInfo.InvariantCulture);
                        if (c2 != c && !map [(int) c2].Defined)
-                               AddLetterMapCore (c2, category, 0, level2);
+                               AddLetterMapCore (c2, category, 0, level2, deferLevel2);
                        bool doUpdate = true;
                        if (IsIgnorable ((int) c) || map [(int) c].Defined)
                                doUpdate = false;
                        else
-                               AddCharMapGroup (c, category, 0, level2);
+                               AddCharMapGroup (c, category, 0, level2, deferLevel2);
                        if (doUpdate)
                                fillIndex [category] += updateCount;
                }
@@ -2036,19 +3583,6 @@ namespace Mono.Globalization.Unicode
                        return true;
                }
 
-               private void AddCharMapGroupTail (char c, byte category, byte updateCount)
-               {
-                       char c2 = ToSmallFormTail (c);
-                       if (c2 != c)
-                               AddCharMap (c2, category, updateCount, 0);
-                       // itself
-                       AddCharMap (c, category, updateCount, 0);
-                       // <full>
-                       c2 = ToFullWidthTail (c);
-                       if (c2 != c)
-                               AddCharMapGroupTail (c2, category, updateCount);
-               }
-
                //
                // Adds characters to table in the order below 
                // (+ increases weight):
@@ -2070,11 +3604,24 @@ namespace Mono.Globalization.Unicode
                        DecompositionWide,
                        DecompositionNarrow,
                        };
+               private void AddCharMapGroup (char c, byte category, byte updateCount)
+               {
+                       AddCharMapGroup (c, category, updateCount, 0, true);
+               }
+
                private void AddCharMapGroup (char c, byte category, byte updateCount, byte level2)
+               {
+                       AddCharMapGroup (c, category, updateCount, level2, false);
+               }
+
+               private void AddCharMapGroup (char c, byte category, byte updateCount, byte level2, bool deferLevel2)
                {
                        if (map [(int) c].Defined)
                                return;
 
+                       if (deferLevel2)
+                               level2 = diacritical [(int) c];
+
                        char small = char.MinValue;
                        char vertical = char.MinValue;
                        Hashtable nfkd = (Hashtable) nfkdMap [(int) c];
@@ -2088,8 +3635,11 @@ namespace Mono.Globalization.Unicode
                        }
 
                        // <small> updates index
-                       if (small != char.MinValue)
-                               AddCharMap (small, category, updateCount);
+                       if (small != char.MinValue) {
+                               if (level2 == 0 && deferLevel2)
+                                       level2 = diacritical [small];
+                               AddCharMap (small, category, updateCount, level2);
+                       }
 
                        // itself
                        AddCharMap (c, category, 0, level2);
@@ -2097,16 +3647,22 @@ namespace Mono.Globalization.Unicode
                        if (nfkd != null) {
                                foreach (int weight in sameWeightItems) {
                                        object wv = nfkd [(byte) weight];
-                                       if (wv != null)
+                                       if (wv != null) {
+                                               if (deferLevel2)
+                                                       level2 = diacritical [(int) wv];
                                                AddCharMap ((char) ((int) wv), category, 0, level2);
+                                       }
                                }
                        }
 
                        // update index here.
                        fillIndex [category] += updateCount;
 
-                       if (vertical != char.MinValue)
+                       if (vertical != char.MinValue) {
+                               if (level2 == 0 && deferLevel2)
+                                       level2 = diacritical [vertical];
                                AddCharMap (vertical, category, updateCount, level2);
+                       }
                }
 
                private void AddCharMapCJK (char c, ref byte category)
@@ -2124,6 +3680,10 @@ namespace Mono.Globalization.Unicode
                        AddCharMapCJK (c, ref category);
 
                        // LAMESPEC: see below.
+                       if (c == '\u5B78') {
+                               AddCharMapCJK ('\u32AB', ref category);
+                               AddCharMapCJK ('\u323B', ref category);
+                       }
                        if (c == '\u52DE') {
                                AddCharMapCJK ('\u3298', ref category);
                                AddCharMapCJK ('\u3238', ref category);
@@ -2153,7 +3713,8 @@ namespace Mono.Globalization.Unicode
                                // mix Chinise and Japanese Kanji when
                                // ordering those characters.
                                switch (w) {
-                               case 0x32A2: case 0x3298: case 0x3238: case 0x32A9:
+                               case 0x32A2: case 0x3298: case 0x3238:
+                               case 0x32A9: case 0x323B: case 0x32AB:
                                        continue;
                                }
 
@@ -2164,23 +3725,44 @@ namespace Mono.Globalization.Unicode
                // For now it is only for 0x7 category.
                private void AddCharMapGroup2 (char c, byte category, byte updateCount, byte level2)
                {
-                       char small = char.MinValue;
-                       char vertical = char.MinValue;
-                       Hashtable nfkd = (Hashtable) nfkdMap [(int) c];
-                       if (nfkd != null) {
-                               object smv = nfkd [(byte) DecompositionSmall];
-                               if (smv != null)
-                                       small = (char) ((int) smv);
-                               object vv = nfkd [(byte) DecompositionVertical];
-                               if (vv != null)
-                                       vertical = (char) ((int) vv);
+                       if (map [(int) c].Defined)
+                               return;
+
+                       bool updateWeight = false;
+                       // Process in advance (lower primary weight)
+                       for (int c2 = 0; c2 < char.MaxValue; c2++) {
+                               if (!map [c2].Defined &&
+                                       decompLength [c2] == 1 &&
+                                       (int) (decompValues [decompIndex [c2]]) == (int) c) {
+                                       switch (decompType [c2]) {
+                                       case DecompositionSmall:
+                                               updateWeight = true;
+                                               AddCharMap ((char) c2, category,
+                                                       0, level2);
+                                               break;
+                                       }
+                               }
                        }
+                       if (updateWeight)
+                               fillIndex [category] = (byte)
+                                       (fillIndex [category] + updateCount);
 
-                       // <small> updates index
-                       if (small != char.MinValue)
-                               // SPECIAL CASE excluded (FIXME: why?)
-                               if (small != '\u2024')
-                                       AddCharMap (small, category, updateCount);
+                       // Identical weight
+                       for (int c2 = 0; c2 < char.MaxValue; c2++) {
+                               if (!map [c2].Defined &&
+                                       decompLength [c2] == 1 &&
+                                       (int) (decompValues [decompIndex [c2]]) == (int) c) {
+                                       switch (decompType [c2]) {
+                                       case DecompositionSub:
+                                       case DecompositionSuper:
+                                       case DecompositionWide:
+                                       case DecompositionNarrow:
+                                               AddCharMap ((char) c2, category,
+                                                       0, level2);
+                                               break;
+                                       }
+                               }
+                       }
 
                        // itself
                        AddCharMap (c, category, updateCount, level2);
@@ -2188,30 +3770,40 @@ namespace Mono.Globalization.Unicode
                        // Since nfkdMap is problematic to have two or more
                        // NFKD to an identical character, here I iterate all.
                        for (int c2 = 0; c2 < char.MaxValue; c2++) {
-                               if (decompLength [c2] == 1 &&
+                               if (!map [c2].Defined &&
+                                       decompLength [c2] == 1 &&
                                        (int) (decompValues [decompIndex [c2]]) == (int) c) {
                                        switch (decompType [c2]) {
-                                       case DecompositionCompat:
+                                       case DecompositionWide:
+                                       case DecompositionNarrow:
+                                       case DecompositionSmall:
+                                       case DecompositionSub:
+                                       case DecompositionSuper:
+                                               continue;
+                                       default:
                                                AddCharMap ((char) c2, category, updateCount, level2);
                                                break;
                                        }
                                }
                        }
-
-                       if (vertical != char.MinValue)
-                               // SPECIAL CASE excluded (FIXME: why?)
-                               if (vertical != '\uFE33' && vertical != '\uFE34')
-                                       AddCharMap (vertical, category, updateCount, level2);
                }
 
-               char ToFullWidth (char c)
+               private void AddArabicCharMap (char c, byte category, byte updateCount, byte level2)
                {
-                       return ToDecomposed (c, DecompositionFull, false);
-               }
+                       // itself
+                       AddCharMap (c, category, 0, level2);
 
-               char ToFullWidthTail (char c)
-               {
-                       return ToDecomposed (c, DecompositionFull, true);
+                       // Since nfkdMap is problematic to have two or more
+                       // NFKD to an identical character, here I iterate all.
+                       for (int c2 = 0; c2 < char.MaxValue; c2++) {
+                               if (decompLength [c2] == 0)
+                                       continue;
+                               int idx = decompIndex [c2] + decompLength [c2] - 1;
+                               if ((int) (decompValues [idx]) == (int) c)
+                                       AddCharMap ((char) c2, category,
+                                               0, level2);
+                       }
+                       fillIndex [category] += updateCount;
                }
 
                char ToSmallForm (char c)
@@ -2219,11 +3811,6 @@ namespace Mono.Globalization.Unicode
                        return ToDecomposed (c, DecompositionSmall, false);
                }
 
-               char ToSmallFormTail (char c)
-               {
-                       return ToDecomposed (c, DecompositionSmall, true);
-               }
-
                char ToDecomposed (char c, byte d, bool tail)
                {
                        if (decompType [(int) c] != d)
@@ -2254,6 +3841,30 @@ namespace Mono.Globalization.Unicode
 
                private byte ComputeLevel3WeightRaw (char c) // add 2 for sortkey value
                {
+                       // CJK compat
+                       if ('\u3192' <= c && c <= '\u319F')
+                               return 0;
+
+                       // They have <narrow> NFKD mapping, and on Windows
+                       // those narrow characters are regarded as "normal",
+                       // thus those characters themselves are regarded as
+                       // "wide". grep "<narrow>" and you can pick them up
+                       // (ignoring Kana, Hangul etc.)
+                       switch (c) {
+                       case '\u3002':
+                       case '\u300C':
+                       case '\u300D':
+                       case '\u3001':
+                       case '\u30FB':
+                       case '\u2502':
+                       case '\u2190':
+                       case '\u2191':
+                       case '\u2192':
+                       case '\u2193':
+                       case '\u25A0':
+                       case '\u25CB':
+                               return 1;
+                       }
                        // Korean
                        if ('\u11A8' <= c && c <= '\u11F9')
                                return 2;
@@ -2261,6 +3872,11 @@ namespace Mono.Globalization.Unicode
                                return 4;
                        if ('\u3130' <= c && c <= '\u3164')
                                return 5;
+                       if ('\u3165' <= c && c <= '\u318E')
+                               return 4;
+                       // Georgian Capital letters
+                       if ('\u10A0' <= c && c <= '\u10C5')
+                               return 0x10;
                        // numbers
                        if ('\u2776' <= c && c <= '\u277F')
                                return 4;
@@ -2269,24 +3885,34 @@ namespace Mono.Globalization.Unicode
                        if ('\u2776' <= c && c <= '\u2793')
                                return 0xC;
                        if ('\u2160' <= c && c <= '\u216F')
-                               return 0x18;
+                               return 0x10;
                        if ('\u2181' <= c && c <= '\u2182')
-                               return 0x18;
+                               return 0x10;
                        // Arabic
                        if ('\u2135' <= c && c <= '\u2138')
                                return 4;
-                       if ('\uFE80' <= c && c < '\uFE8E') {
+                       // I believe that Windows has a bug on setting level 3
+                       // weight here. NFKD results in different values.
+                       if ('\uFE80' < c && c < '\uFF00') {
                                // 2(Isolated)/8(Final)/0x18(Medial)
                                switch (decompType [(int) c]) {
                                case DecompositionIsolated:
-                                       return 2;
+                                       return 0; // 2;
                                case DecompositionFinal:
                                        return 8;
                                case DecompositionMedial:
                                        return 0x18;
+                               case DecompositionInitial:
+                                       return 0x10;
                                }
                        }
 
+                       // I have no idea why those symbols have level 3 weight
+                       if (c == '\u2104' || c == '\u212B')
+                               return 0x18;
+                       if ('\u211E' <= c && c <= '\u212B')
+                               return 0x10;
+
                        // actually I dunno the reason why they have weights.
                        switch (c) {
                        case '\u01BC':
@@ -2295,17 +3921,23 @@ namespace Mono.Globalization.Unicode
                                return 0x20;
                        case '\u06AA':
                                return 0x28;
+                       // Gurmukhi
+                       case '\u0A39':
+                       case '\u0A59':
+                       case '\u0A5A':
+                       case '\u0A5B':
+                       case '\u0A5E':
+                               return 0x10;
                        }
 
                        byte ret = 0;
                        switch (c) {
                        case '\u03C2':
-                       case '\u2104':
                        case '\u212B':
-                               ret |= 8;
+                               ret = 8;
                                break;
                        case '\uFE42':
-                               ret |= 0xC;
+                               ret = 0xA;
                                break;
                        }
 
@@ -2328,6 +3960,20 @@ namespace Mono.Globalization.Unicode
                #endregion
 
                #region IsIgnorable
+/*
+               static bool IsIgnorable (int i)
+               {
+                       if (unicodeAge [i] >= 3.1)
+                               return true;
+                       switch (char.GetUnicodeCategory ((char) i)) {
+                       case UnicodeCategory.OtherNotAssigned:
+                       case UnicodeCategory.Format:
+                               return true;
+                       }
+                       return false;
+               }
+*/
+
                // FIXME: In the future use DerivedAge.txt to examine character
                // versions and set those ones that have higher version than
                // 1.0 as ignorable.
@@ -2362,6 +4008,7 @@ namespace Mono.Globalization.Unicode
                        // those ranges.
                        case 0x4d8: case 0x4d9:
                        case 0x4e8: case 0x4e9:
+                       case 0x70F:
                        case 0x3036: case 0x303f:
                        case 0x337b: case 0xfb1e:
                                return false;
@@ -2720,7 +4367,7 @@ namespace Mono.Globalization.Unicode
                {
                        JISCharacter j1 = (JISCharacter) o1;
                        JISCharacter j2 = (JISCharacter) o2;
-                       return j2.JIS - j1.JIS;
+                       return j1.JIS - j2.JIS;
                }
        }
 
@@ -2831,4 +4478,136 @@ namespace Mono.Globalization.Unicode
                        return l1 - l2;
                }
        }
+
+       class Tailoring
+       {
+               int lcid;
+               int alias;
+               bool frenchSort;
+               ArrayList items = new ArrayList ();
+
+               public Tailoring (int lcid)
+                       : this (lcid, 0)
+               {
+               }
+
+               public Tailoring (int lcid, int alias)
+               {
+                       this.lcid = lcid;
+                       this.alias = alias;
+               }
+
+               public int LCID {
+                       get { return lcid; }
+               }
+
+               public int Alias {
+                       get { return alias; }
+               }
+
+               public bool FrenchSort {
+                       get { return frenchSort; }
+                       set { frenchSort = value; }
+               }
+
+               public void AddDiacriticalMap (byte target, byte replace)
+               {
+                       items.Add (new DiacriticalMap (target, replace));
+               }
+
+               public void AddSortKeyMap (string source, byte [] sortkey)
+               {
+                       items.Add (new SortKeyMap (source, sortkey));
+               }
+
+               public void AddReplacementMap (string source, string replace)
+               {
+                       items.Add (new ReplacementMap (source, replace));
+               }
+
+               public char [] ItemToCharArray ()
+               {
+                       ArrayList al = new ArrayList ();
+                       foreach (ITailoringMap m in items)
+                               al.AddRange (m.ToCharArray ());
+                       return al.ToArray (typeof (char)) as char [];
+               }
+
+               interface ITailoringMap
+               {
+                       char [] ToCharArray ();
+               }
+
+               class DiacriticalMap : ITailoringMap
+               {
+                       public readonly byte Target;
+                       public readonly byte Replace;
+
+                       public DiacriticalMap (byte target, byte replace)
+                       {
+                               Target = target;
+                               Replace = replace;
+                       }
+
+                       public char [] ToCharArray ()
+                       {
+                               char [] ret = new char [3];
+                               ret [0] = (char) 02; // kind:DiacriticalMap
+                               ret [1] = (char) Target;
+                               ret [2] = (char) Replace;
+                               return ret;
+                       }
+               }
+
+               class SortKeyMap : ITailoringMap
+               {
+                       public readonly string Source;
+                       public readonly byte [] SortKey;
+
+                       public SortKeyMap (string source, byte [] sortkey)
+                       {
+                               Source = source;
+                               SortKey = sortkey;
+                       }
+
+                       public char [] ToCharArray ()
+                       {
+                               char [] ret = new char [Source.Length + 7];
+                               ret [0] = (char) 01; // kind:SortKeyMap
+                               for (int i = 0; i < Source.Length; i++)
+                                       ret [i + 1] = Source [i];
+                               // null terminate
+                               for (int i = 0; i < 4; i++)
+                                       ret [i + Source.Length + 2] = (char) SortKey [i];
+                               return ret;
+                       }
+               }
+
+               class ReplacementMap : ITailoringMap
+               {
+                       public readonly string Source;
+                       public readonly string Replace;
+
+                       public ReplacementMap (string source, string replace)
+                       {
+                               Source = source;
+                               Replace = replace;
+                       }
+
+                       public char [] ToCharArray ()
+                       {
+                               char [] ret = new char [Source.Length + Replace.Length + 3];
+                               ret [0] = (char) 03; // kind:ReplaceMap
+                               int pos = 1;
+                               for (int i = 0; i < Source.Length; i++)
+                                       ret [pos++] = Source [i];
+                               // null terminate
+                               pos++;
+                               for (int i = 0; i < Replace.Length; i++)
+                                       ret [pos++] = Replace [i];
+                               // null terminate
+                               return ret;
+                       }
+               }
+       }
 }