updating to the latest module.
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / create-mscompat-collation-table.cs
index 66de6bea712614c470f4922b0a22233747ea7bf2..c54b9ab8eb936c46967e4952531edbb3207b16a5 100644 (file)
@@ -24,7 +24,6 @@
 //     If there are characters whose primary weight is 0, they are consumed
 //     and considered as a part of the character element.
 //
-#define Binary
 
 using System;
 using System.IO;
@@ -97,23 +96,18 @@ namespace Mono.Globalization.Unicode
 
                string [] diacritics = new string [] {
                        // LATIN
-                       "WITH VERTICAL LINE ABOVE;",
-                       "WITH GRAVE ACCENT;", "WITH ACUTE ACCENT;", "WITH CIRCUMFLEX ACCENT;",
-                       "WITH ACUTE;", "WITH GRAVE;", "WITH DOT ABOVE;", " MIDDLE DOT;",
+                       "WITH ACUTE;", "WITH GRAVE;", " DOT ABOVE;", " MIDDLE DOT;",
                        "WITH CIRCUMFLEX;", "WITH DIAERESIS;", "WITH CARON;", "WITH BREVE;",
-                       " DIALYTIKA AND TONOS;", "WITH MACRON;", "WITH TILDE;", "WITH RING ABOVE;",
-                       "WITH OGONEK;", "WITH CEDILLA;",
-                       //
+                       " DIALYTIKA AND TONOS;", "WITH MACRON;", "WITH TILDE;", " RING ABOVE;",
+                       " OGONEK;", " CEDILLA;",
                        " DOUBLE ACUTE;", " ACUTE AND DOT ABOVE;",
-                       "WITH STROKE;", " CIRCUMFLEX AND ACUTE;",
-                       "STROKE OVERLAY",
+                       " STROKE;", " CIRCUMFLEX AND ACUTE;",
                        " DIAERESIS AND ACUTE;", "WITH CIRCUMFLEX AND GRAVE;", " L SLASH;",
                        " DIAERESIS AND GRAVE;",
                        " BREVE AND ACUTE;",
                        " CARON AND DOT ABOVE;", " BREVE AND GRAVE;",
                        " MACRON AND ACUTE;",
                        " MACRON AND GRAVE;",
-                       //
                        " DIAERESIS AND CARON", " DOT ABOVE AND MACRON", " TILDE AND ACUTE",
                        " RING ABOVE AND ACUTE",
                        " DIAERESIS AND MACRON", " CEDILLA AND ACUTE", " MACRON AND DIAERESIS",
@@ -123,24 +117,19 @@ namespace Mono.Globalization.Unicode
                        " BREVE AND TILDE",
                        " CEDILLA AND BREVE",
                        " OGONEK AND MACRON",
-                       //
-                       "WITH OVERLINE",
-                       "WITH HOOK;", "LEFT HOOK;", " WITH HOOK ABOVE;",
+                       " HOOK;", "LEFT HOOK;", " WITH HOOK ABOVE;",
                        " DOUBLE GRAVE;",
                        " INVERTED BREVE",
-                       "ROMAN NUMERAL",
                        " PRECEDED BY APOSTROPHE",
-                       "WITH HORN;",
+                       " HORN;",
                        " LINE BELOW;", " CIRCUMFLEX AND HOOK ABOVE",
                        " PALATAL HOOK",
                        " DOT BELOW;",
                        " RETROFLEX;", "DIAERESIS BELOW",
                        " RING BELOW",
-                       //
                        " CIRCUMFLEX BELOW", "HORN AND ACUTE",
                        " BREVE BELOW;", " HORN AND GRAVE",
                        " TILDE BELOW",
-                       " TOPBAR",
                        " DOT BELOW AND DOT ABOVE",
                        " RIGHT HALF RING", " HORN AND TILDE",
                        " CIRCUMFLEX AND DOT BELOW",
@@ -149,32 +138,24 @@ namespace Mono.Globalization.Unicode
                        " HORN AND HOOK ABOVE",
                        " HORN AND DOT",
                        // CIRCLED, PARENTHESIZED and so on
-                       "CIRCLED DIGIT", "CIRCLED NUMBER", "CIRCLED LATIN",
-                       "CIRCLED KATAKANA", "CIRCLED SANS-SERIF",
+                       "CIRCLED DIGIT", "CIRCLED NUMBER", "CIRCLED LATIN", "CIRCLED KATAKANA",
                        "PARENTHESIZED DIGIT", "PARENTHESIZED NUMBER", "PARENTHESIZED LATIN",
                        };
                byte [] diacriticWeights = new byte [] {
                        // LATIN.
-                       5,
-                       0xF, 0xE, 0x12,
                        0xE, 0xF, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
                        0x17, 0x19, 0x1A, 0x1B, 0x1C,
-                       //
-                       0x1D, 0x1D, 0x1E, 0x1E, 0x1E, 0x1F, 0x1F, 0x1F,
+                       0x1D, 0x1D, 0x1E, 0x1E, 0x1F, 0x1F, 0x1F,
                        0x20, 0x21, 0x22, 0x22, 0x23, 0x24,
-                       //
                        0x25, 0x25, 0x25, 0x26, 0x28, 0x28, 0x28,
                        0x29, 0x2A, 0x2B, 0x2C, 0x2F, 0x30,
-                       //
-                       0x40, 0x43, 0x43, 0x43, 0x44, 0x46, 0x47, 0x48,
+                       0x43, 0x43, 0x43, 0x44, 0x46, 0x48,
                        0x52, 0x55, 0x55, 0x57, 0x58, 0x59, 0x59, 0x5A,
-                       //
-                       0x60, 0x60, 0x61, 0x61, 0x63, 0x68, 0x68,
+                       0x60, 0x60, 0x61, 0x61, 0x63, 0x68, 
                        0x69, 0x69, 0x6A, 0x6D, 0x6E,
                        0x95, 0xAA,
                        // CIRCLED, PARENTHESIZED and so on.
-                       0xEE, 0xEE, 0xEE, 0xEE, 0xEE,
-                       0xF3, 0xF3, 0xF3
+                       0xEE, 0xEE, 0xEE, 0xEE, 0xF3, 0xF3, 0xF3, 0xF3
                        };
 
                int [] numberSecondaryWeightBounds = new int [] {
@@ -195,11 +176,11 @@ namespace Mono.Globalization.Unicode
                        // based on traditional Tamil consonants, except for
                        // Grantha (where Microsoft breaks traditionalism).
                        // http://www.angelfire.com/empire/thamizh/padanGaL
-                       '\u0B95', '\u0B99', '\u0B9A', '\u0B9E', '\u0B9F',
-                       '\u0BA3', '\u0BA4', '\u0BA8', '\u0BAA', '\u0BAE',
-                       '\u0BAF', '\u0BB0', '\u0BB2', '\u0BB5', '\u0BB4',
-                       '\u0BB3', '\u0BB1', '\u0BA9', '\u0B9C', '\u0BB8',
-                       '\u0BB7', '\u0BB9'};
+                       '\u0B99', '\u0B9A', '\u0B9E', '\u0B9F', '\u0BA3',
+                       '\u0BA4', '\u0BA8', '\u0BAA', '\u0BAE', '\u0BAF',
+                       '\u0BB0', '\u0BB2', '\u0BB5', '\u0BB4', '\u0BB3',
+                       '\u0BB1', '\u0BA9', '\u0B9C', '\u0BB8', '\u0BB7',
+                       '\u0BB9'};
 
                // cp -> character name (only for some characters)
                ArrayList sortableCharNames = new ArrayList ();
@@ -212,11 +193,9 @@ namespace Mono.Globalization.Unicode
 
                // cp -> level1 value
                Hashtable arabicLetterPrimaryValues = new Hashtable ();
-               Hashtable cyrillicLetterPrimaryValues = new Hashtable ();
 
                // letterName -> cp
                Hashtable arabicNameMap = new Hashtable ();
-               Hashtable cyrillicNameMap = new Hashtable ();
 
                // cp -> Hashtable [decompType] -> cp
                Hashtable nfkdMap = new Hashtable ();
@@ -227,11 +206,11 @@ namespace Mono.Globalization.Unicode
                ArrayList jisJapanese = new ArrayList ();
                ArrayList nonJisJapanese = new ArrayList ();
 
-               ushort [] cjkJA = new ushort [char.MaxValue +1];// - 0x4E00];
-               ushort [] cjkCHS = new ushort [char.MaxValue +1];// - 0x3100];
-               ushort [] cjkCHT = new ushort [char.MaxValue +1];// - 0x4E00];
-               ushort [] cjkKO = new ushort [char.MaxValue +1];// - 0x4E00];
-               byte [] cjkKOlv2 = new byte [char.MaxValue +1];// - 0x4E00];
+               ushort [] cjkJA = new ushort [char.MaxValue - 0x4E00];
+               ushort [] cjkCHS = new ushort [char.MaxValue - 0x3100];
+               ushort [] cjkCHT = new ushort [char.MaxValue - 0x4E00];
+               ushort [] cjkKO = new ushort [char.MaxValue - 0x4E00];
+               byte [] cjkKOlv2 = new byte [char.MaxValue - 0x4E00];
 
                byte [] ignorableFlags = new byte [char.MaxValue + 1];
 
@@ -273,12 +252,6 @@ sw.Close ();
                                source, typeof (byte), i);
                }
 
-               ushort [] CompressArray (ushort [] source, CodePointIndexer i)
-               {
-                       return (ushort []) CodePointIndexer.CompressArray  (
-                               source, typeof (ushort), i);
-               }
-
                void Serialize ()
                {
                        // Tailorings
@@ -288,7 +261,7 @@ sw.Close ();
                        byte [] level1 = new byte [map.Length];
                        byte [] level2 = new byte [map.Length];
                        byte [] level3 = new byte [map.Length];
-                       ushort [] widthCompat = new ushort [map.Length];
+                       int [] widthCompat = new int [map.Length];
                        for (int i = 0; i < map.Length; i++) {
                                categories [i] = map [i].Category;
                                level1 [i] = map [i].Level1;
@@ -300,7 +273,7 @@ sw.Close ();
                                case DecompositionSuper:
                                case DecompositionSub:
                                        // they are always 1 char
-                                       widthCompat [i] = (ushort) decompValues [decompIndex [i]];
+                                       widthCompat [i] = decompValues [decompIndex [i]];
                                        break;
                                }
                        }
@@ -316,36 +289,18 @@ sw.Close ();
                                MSCompatUnicodeTableUtil.Level2);
                        level3 = CompressArray (level3, 
                                MSCompatUnicodeTableUtil.Level3);
-                       widthCompat = (ushort []) CodePointIndexer.CompressArray (
-                               widthCompat, typeof (ushort),
+                       widthCompat = (int []) CodePointIndexer.CompressArray (
+                               widthCompat, typeof (int),
                                MSCompatUnicodeTableUtil.WidthCompat);
-                       cjkCHS = CompressArray (cjkCHS,
-                               MSCompatUnicodeTableUtil.CjkCHS);
-                       cjkCHT = CompressArray (cjkCHT,
-                               MSCompatUnicodeTableUtil.Cjk);
-                       cjkJA = CompressArray (cjkJA,
-                               MSCompatUnicodeTableUtil.Cjk);
-                       cjkKO = CompressArray (cjkKO,
-                               MSCompatUnicodeTableUtil.Cjk);
-                       cjkKOlv2 = CompressArray (cjkKOlv2,
-                               MSCompatUnicodeTableUtil.Cjk);
 
                        // Ignorables
-                       Result.WriteLine ("internal static readonly byte [] ignorableFlags = new byte [] {");
-#if Binary
-                       MemoryStream ms = new MemoryStream ();
-                       BinaryWriter binary = new BinaryWriter (ms);
-                       binary.Write (ignorableFlags.Length);
-#endif
+                       Result.WriteLine ("static byte [] ignorableFlags = new byte [] {");
                        for (int i = 0; i < ignorableFlags.Length; i++) {
                                byte value = ignorableFlags [i];
                                if (value < 10)
                                        Result.Write ("{0},", value);
                                else
                                        Result.Write ("0x{0:X02},", value);
-#if Binary
-                               binary.Write (value);
-#endif
                                if ((i & 0xF) == 0xF)
                                        Result.WriteLine ("// {0:X04}", i - 0xF);
                        }
@@ -353,19 +308,13 @@ sw.Close ();
                        Result.WriteLine ();
 
                        // Primary category
-                       Result.WriteLine ("internal static readonly byte [] categories = new byte [] {");
-#if Binary
-                       binary.Write (categories.Length);
-#endif
+                       Result.WriteLine ("static byte [] categories = new byte [] {");
                        for (int i = 0; i < categories.Length; i++) {
                                byte value = categories [i];
                                if (value < 10)
                                        Result.Write ("{0},", value);
                                else
                                        Result.Write ("0x{0:X02},", value);
-#if Binary
-                               binary.Write (value);
-#endif
                                if ((i & 0xF) == 0xF)
                                        Result.WriteLine ("// {0:X04}", i - 0xF);
                        }
@@ -373,19 +322,13 @@ sw.Close ();
                        Result.WriteLine ();
 
                        // Primary weight value
-                       Result.WriteLine ("internal static readonly byte [] level1 = new byte [] {");
-#if Binary
-                       binary.Write (level1.Length);
-#endif
+                       Result.WriteLine ("static byte [] level1 = new byte [] {");
                        for (int i = 0; i < level1.Length; i++) {
                                byte value = level1 [i];
                                if (value < 10)
                                        Result.Write ("{0},", value);
                                else
                                        Result.Write ("0x{0:X02},", value);
-#if Binary
-                               binary.Write (value);
-#endif
                                if ((i & 0xF) == 0xF)
                                        Result.WriteLine ("// {0:X04}", i - 0xF);
                        }
@@ -393,19 +336,13 @@ sw.Close ();
                        Result.WriteLine ();
 
                        // Secondary weight
-                       Result.WriteLine ("internal static readonly byte [] level2 = new byte [] {");
-#if Binary
-                       binary.Write (level2.Length);
-#endif
+                       Result.WriteLine ("static byte [] level2 = new byte [] {");
                        for (int i = 0; i < level2.Length; i++) {
-                               byte value = level2 [i];
+                               int value = level2 [i];
                                if (value < 10)
                                        Result.Write ("{0},", value);
                                else
                                        Result.Write ("0x{0:X02},", value);
-#if Binary
-                               binary.Write (value);
-#endif
                                if ((i & 0xF) == 0xF)
                                        Result.WriteLine ("// {0:X04}", i - 0xF);
                        }
@@ -413,19 +350,13 @@ sw.Close ();
                        Result.WriteLine ();
 
                        // Thirtiary weight
-                       Result.WriteLine ("internal static readonly byte [] level3 = new byte [] {");
-#if Binary
-                       binary.Write (level3.Length);
-#endif
+                       Result.WriteLine ("static byte [] level3 = new byte [] {");
                        for (int i = 0; i < level3.Length; i++) {
                                byte value = level3 [i];
                                if (value < 10)
                                        Result.Write ("{0},", value);
                                else
                                        Result.Write ("0x{0:X02},", value);
-#if Binary
-                               binary.Write (value);
-#endif
                                if ((i & 0xF) == 0xF)
                                        Result.WriteLine ("// {0:X04}", i - 0xF);
                        }
@@ -435,30 +366,18 @@ sw.Close ();
                        // Width insensitivity mappings
                        // (for now it is more lightweight than dumping the
                        // entire NFKD table).
-                       Result.WriteLine ("internal static readonly ushort [] widthCompat = new ushort [] {");
-#if Binary
-                       binary.Write (widthCompat.Length);
-#endif
+                       Result.WriteLine ("static int [] widthCompat = new int [] {");
                        for (int i = 0; i < widthCompat.Length; i++) {
-                               ushort value = widthCompat [i];
+                               int value = widthCompat [i];
                                if (value < 10)
                                        Result.Write ("{0},", value);
                                else
                                        Result.Write ("0x{0:X02},", value);
-#if Binary
-                               binary.Write (value);
-#endif
                                if ((i & 0xF) == 0xF)
                                        Result.WriteLine ("// {0:X04}", i - 0xF);
                        }
                        Result.WriteLine ("};");
                        Result.WriteLine ();
-#if Binary
-                       using (FileStream fs = File.Create ("../collation.core.bin")) {
-                               byte [] array = ms.ToArray ();
-                               fs.Write (array, 0, array.Length);
-                       }
-#endif
 
                        // CJK
                        SerializeCJK ("cjkCHS", cjkCHS, char.MaxValue);
@@ -470,12 +389,8 @@ sw.Close ();
 
                void SerializeCJK (string name, ushort [] cjk, int max)
                {
-                       int offset = 0;//char.MaxValue - cjk.Length;
+                       int offset = char.MaxValue - cjk.Length;
                        Result.WriteLine ("static ushort [] {0} = new ushort [] {{", name);
-#if Binary
-                       MemoryStream ms = new MemoryStream ();
-                       BinaryWriter binary = new BinaryWriter (ms);
-#endif
                        for (int i = 0; i < cjk.Length; i++) {
                                if (i + offset == max)
                                        break;
@@ -484,30 +399,17 @@ sw.Close ();
                                        Result.Write ("{0},", value);
                                else
                                        Result.Write ("0x{0:X04},", value);
-#if Binary
-                               binary.Write (value);
-#endif
                                if ((i & 0xF) == 0xF)
                                        Result.WriteLine ("// {0:X04}", i - 0xF + offset);
                        }
                        Result.WriteLine ("};");
                        Result.WriteLine ();
-#if Binary
-                       using (FileStream fs = File.Create (String.Format ("../collation.{0}.bin", name))) {
-                               byte [] array = ms.ToArray ();
-                               fs.Write (array, 0, array.Length);
-                       }
-#endif
                }
 
                void SerializeCJK (string name, byte [] cjk, int max)
                {
-                       int offset = 0;//char.MaxValue - cjk.Length;
+                       int offset = char.MaxValue - cjk.Length;
                        Result.WriteLine ("static byte [] {0} = new byte [] {{", name);
-#if Binary
-                       MemoryStream ms = new MemoryStream ();
-                       BinaryWriter binary = new BinaryWriter (ms);
-#endif
                        for (int i = 0; i < cjk.Length; i++) {
                                if (i + offset == max)
                                        break;
@@ -516,20 +418,11 @@ sw.Close ();
                                        Result.Write ("{0},", value);
                                else
                                        Result.Write ("0x{0:X02},", value);
-#if Binary
-                               binary.Write (value);
-#endif
                                if ((i & 0xF) == 0xF)
                                        Result.WriteLine ("// {0:X04}", i - 0xF + offset);
                        }
                        Result.WriteLine ("};");
                        Result.WriteLine ();
-#if Binary
-                       using (FileStream fs = File.Create (String.Format ("../collation.{0}.bin", name))) {
-                               byte [] array = ms.ToArray ();
-                               fs.Write (array, 0, array.Length);
-                       }
-#endif
                }
 
                void SerializeTailorings ()
@@ -538,10 +431,6 @@ sw.Close ();
                        Hashtable counts = new Hashtable ();
                        Result.WriteLine ("static char [] tailorings = new char [] {");
                        int count = 0;
-#if Binary
-                       MemoryStream ms = new MemoryStream ();
-                       BinaryWriter binary = new BinaryWriter (ms);
-#endif
                        foreach (Tailoring t in tailorings) {
                                if (t.Alias != 0)
                                        continue;
@@ -553,24 +442,15 @@ sw.Close ();
                                        Result.Write ("'\\x{0:X}', ", (int) c);
                                        if (++count % 16 == 0)
                                                Result.WriteLine (" // {0:X04}", count - 16);
-#if Binary
-                                       binary.Write ((ushort) c);
-#endif
                                }
                        }
                        Result.WriteLine ("};");
 
                        Result.WriteLine ("static TailoringInfo [] tailoringInfos = new TailoringInfo [] {");
-#if Binary
-                       byte [] rawdata = ms.ToArray ();
-                       ms = new MemoryStream ();
-                       binary = new BinaryWriter (ms);
-                       binary.Write (tailorings.Count);
-#endif
                        foreach (Tailoring t in tailorings) {
                                int target = t.Alias != 0 ? t.Alias : t.LCID;
                                if (!indexes.ContainsKey (target)) {
-                                       throw new Exception (String.Format ("WARNING: no corresponding definition for tailoring alias. From {0} to {1}", t.LCID, t.Alias));
+                                       Console.Error.WriteLine ("WARNING: no corresponding definition for tailoring alias. From {0} to {1}", t.LCID, t.Alias);
                                        continue;
                                }
                                int idx = (int) indexes [target];
@@ -581,26 +461,8 @@ sw.Close ();
                                                if (t2.LCID == t.LCID)
                                                        french = t2.FrenchSort;
                                Result.WriteLine ("new TailoringInfo ({0}, 0x{1:X}, {2}, {3}), ", t.LCID, idx, cnt, french ? "true" : "false");
-#if Binary
-                               binary.Write (t.LCID);
-                               binary.Write (idx);
-                               binary.Write (cnt);
-                               binary.Write (french);
-#endif
                        }
                        Result.WriteLine ("};");
-#if Binary
-                       binary.Write ((byte) 0xFF);
-                       binary.Write ((byte) 0xFF);
-                       binary.Write (rawdata.Length / 2);
-                       binary.Write (rawdata, 0, rawdata.Length);
-
-
-                       using (FileStream fs = File.Create ("../collation.tailoring.bin")) {
-                               byte [] array = ms.ToArray ();
-                               fs.Write (array, 0, array.Length);
-                       }
-#endif
                }
 
                #region Parse
@@ -705,8 +567,8 @@ sw.Close ();
                        if (idx > 0) {
                                string source = s.Substring (0, idx).Trim ();
                                string [] l = s.Substring (idx + 1).Trim ().Split (' ');
-                               byte [] b = new byte [4];
-                               for (int i = 0; i < 4; i++) {
+                               byte [] b = new byte [5];
+                               for (int i = 0; i < 5; i++) {
                                        if (l [i] == "*")
                                                b [i] = 0;
                                        else
@@ -750,9 +612,8 @@ sw.Close ();
                                        if (cp > char.MaxValue)
                                                continue;
 
-                                       double v = double.Parse (value);
                                        for (int i = cp; i <= cpEnd; i++)
-                                               unicodeAge [i] = v;
+                                               unicodeAge [i] = double.Parse (value);
                                }
                        }
                        unicodeAge [0] = double.MaxValue; // never be supported
@@ -775,10 +636,7 @@ sw.Close ();
                        this.decompValues = (int [])
                                decompValues.ToArray (typeof (int));
                }
-
-               char previousLatinTarget = char.MinValue;
-               byte [] diacriticalOffset = new byte ['Z' - 'A' + 1];
-
+               
                void ProcessUnidataLine (string s, ArrayList decompValues)
                {
                        int idx = s.IndexOf ('#');
@@ -798,83 +656,31 @@ sw.Close ();
 
                        string name = values [0];
 
-                       // SPECIAL CASE: rename some characters for diacritical
-                       // remapping. FIXME: why are they different?
-                       // FIXME: it's still not working.
-                       if (cp == 0x018B || cp == 0x018C)
-                               name = name.Replace ("TOPBAR", "STROKE");
-
                        // isSmallCapital
                        if (s.IndexOf ("SMALL CAPITAL") > 0)
                                isSmallCapital [cp] = true;
 
                        // latin mapping by character name
-                       if (s.IndexOf ("LATIN") >= 0) {
+                       if (s.IndexOf ("LATIN") > 0) {
                                int lidx = s.IndexOf ("LETTER DOTLESS ");
                                int offset = lidx + 15;
                                if (lidx < 0) {
                                        lidx = s.IndexOf ("LETTER TURNED ");
                                        offset = lidx + 14;
                                }
-                               if (lidx < 0) {
-                                       lidx = s.IndexOf ("LETTER CAPITAL ");
-                                       offset = lidx + 15;
-                               }
-                               if (lidx < 0) {
-                                       lidx = s.IndexOf ("LETTER SCRIPT ");
-                                       offset = lidx + 14;
-                               }
                                if (lidx < 0) {
                                        lidx = s.IndexOf ("LETTER ");
                                        offset = lidx + 7;
                                }
                                char c = lidx > 0 ? s [offset] : char.MinValue;
-                               char n = s [offset + 1];
-                               char target = char.MinValue;
                                if ('A' <= c && c <= 'Z' &&
-                                       (n == ' ') || n == ';') {
-                                       target = c;
-                                       // FIXME: After 'Z', I cannot reset this state.
-                                       previousLatinTarget = c == 'Z' ? char.MinValue : c;
-                               }
-
-                               if (s.Substring (offset).StartsWith ("ALPHA"))
-                                       target = 'A';
-                               else if (s.Substring (offset).StartsWith ("TONE SIX"))
-                                       target = 'B';
-                               else if (s.Substring (offset).StartsWith ("OPEN O"))
-                                       target = 'C';
-                               else if (s.Substring (offset).StartsWith ("SCHWA"))
-                                       target = 'E';
-                               else if (s.Substring (offset).StartsWith ("ENG"))
-                                       target = 'N';
-                               else if (s.Substring (offset).StartsWith ("OI;")) // 01A2,01A3
-                                       target = 'O';
-                               else if (s.Substring (offset).StartsWith ("YR;")) // 01A2,01A3
-                                       target = 'R';
-                               else if (s.Substring (offset).StartsWith ("TONE TWO"))
-                                       target = 'S';
-                               else if (s.Substring (offset).StartsWith ("ESH"))
-                                       target = 'S';
-
-                               if (target == char.MinValue)
-                                       target = previousLatinTarget;
-
-                               if (target != char.MinValue) {
-                                       ArrayList entry = (ArrayList) latinMap [target];
+                                       (s.Length == offset + 1 || s [offset + 1] == ' ')) {
+                                       ArrayList entry = (ArrayList) latinMap [c];
                                        if (entry == null) {
                                                entry = new ArrayList ();
-                                               latinMap [target] = entry;
+                                               latinMap [c] = entry;
                                        }
                                        entry.Add (cp);
-                                       // FIXME: This secondary weight is hack.
-                                       // They are here because they must not
-                                       // be identical to the corresponding
-                                       // ASCII latins.
-                                       if (c != target && diacritical [cp] == 0) {
-                                               diacriticalOffset [c - 'A']++;
-                                               diacritical [cp] = (byte) (diacriticalOffset [c - 'A'] + 0x7C);
-                                       }
                                }
                        }
 
@@ -978,13 +784,6 @@ sw.Close ();
                                                value = 0x18;
                                        else
                                                value = 0x19;
-                               } else if (s.IndexOf ("SHADE") > 0)
-                                       value = 0x19;
-                               // SPECIAL CASE: BOX DRAWING DIAGONAL patterns
-                               switch (cp) {
-                               case 0x2571: value = 0xF; break;
-                               case 0x2572: value = 0x10; break;
-                               case 0x2573: value = 0x11; break;
                                }
                                if (value >= 0)
                                        boxValues.Add (new DictionaryEntry (
@@ -996,59 +795,20 @@ sw.Close ();
                        if (0x2100 <= cp && cp <= 0x213F &&
                                Char.IsSymbol ((char) cp))
                                sortableCharNames.Add (
-                                       new DictionaryEntry (cp, name));
+                                       new DictionaryEntry (cp, values [0]));
                        else if (0x3380 <= cp && cp <= 0x33DD)
                                sortableCharNames.Add (new DictionaryEntry (
-                                       cp, name.Substring (7)));
+                                       cp, values [0].Substring (7)));
 
                        // diacritical weights by character name
-if (diacritics.Length != diacriticWeights.Length)
-throw new Exception (String.Format ("Should not happen. weights are {0} while labels are {1}", diacriticWeights.Length, diacritics.Length));
-                       for (int d = 0; d < diacritics.Length; d++) {
-                               if (s.IndexOf (diacritics [d]) > 0) {
-                                       diacritical [cp] += diacriticWeights [d];
-                                       if (s.IndexOf ("COMBINING") >= 0)
-                                               diacritical [cp] -= (byte) 2;
-                                       continue;
-                               }
-                               // also process "COMBINING blah" here
-                               // For now it is limited to cp < 0x0370
-//                             if (cp < 0x0300 || cp >= 0x0370)
-//                                     continue;
-                               string tmp = diacritics [d].TrimEnd (';');
-                               if (tmp.IndexOf ("WITH ") == 0)
-                                       tmp = tmp.Substring (4);
-                               tmp = String.Concat ("COMBINING", (tmp [0] != ' ' ? " " : ""), tmp);
-                               if (name == tmp)
-                                       diacritical [cp] = (byte) (diacriticWeights [d] - 2);
-//if (name == tmp)
-//Console.Error.WriteLine ("======= {2:X04} : '{0}' / '{1}'", name, tmp, cp);
-                       }
+                       for (int d = 0; d < diacritics.Length; d++)
+                               if (s.IndexOf (diacritics [d]) > 0)
+                                       diacritical [cp] |= diacriticWeights [d];
                        // Two-step grep required for it.
                        if (s.IndexOf ("FULL STOP") > 0 &&
                                (s.IndexOf ("DIGIT") > 0 || s.IndexOf ("NUMBER") > 0))
                                diacritical [cp] |= 0xF4;
 
-                       // Cyrillic letter name
-                       if (0x0430 <= cp && cp <= 0x0486 &&
-                               Char.IsLetter ((char) cp)) {
-                               byte value = (byte) (cyrillicNameMap.Count * 3 + 0x06);
-                               // Get primary letter name i.e.
-                               // XXX part of CYRILLIC LETTER XXX yyy
-                               // e.g. "IZHITSA" for "IZHITSA DOUBLE GRAVE".
-                               string letterName =
-                                       name.Substring (name.IndexOf ("LETTER ") + 7);
-                               int tmpIdx = letterName.IndexOf (' ');
-                               letterName = tmpIdx < 0 ? letterName : letterName.Substring (0, tmpIdx);
-//Console.Error.WriteLine ("Arabic name for {0:X04} is {1}", cp, letterName);
-                               if (cyrillicNameMap.ContainsKey (letterName))
-                                       value = (byte) cyrillicLetterPrimaryValues [cyrillicNameMap [letterName]];
-                               else
-                                       cyrillicNameMap [letterName] = cp;
-
-                               cyrillicLetterPrimaryValues [cp] = value;
-                       }
-
                        // Arabic letter name
                        if (0x0621 <= cp && cp <= 0x064A &&
                                Char.GetUnicodeCategory ((char) cp)
@@ -1073,8 +833,8 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                                (cp == 0x0640) ?
                                                // 0x0640 is special: it does
                                                // not start with ARABIC LETTER
-                                               name :
-                                               name.Substring (14);
+                                               values [0] :
+                                               values [0].Substring (14);
                                        int tmpIdx = letterName.IndexOf (' ');
                                        letterName = tmpIdx < 0 ? letterName : letterName.Substring (0, tmpIdx);
 //Console.Error.WriteLine ("Arabic name for {0:X04} is {1}", cp, letterName);
@@ -1090,7 +850,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                        // Japanese square letter
                        if (0x3300 <= cp && cp <= 0x3357)
                                if (!ExistsJIS (cp))
-                                       nonJisJapanese.Add (new NonJISCharacter (cp, name));
+                                       nonJisJapanese.Add (new NonJISCharacter (cp, values [0]));
 
                        // normalizationType
                        string decomp = values [4];
@@ -1336,37 +1096,26 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 
                void ParseJISOrder (string filename)
                {
-                       int line = 1;
-                       try {
-                               using (StreamReader file =
-                                       new StreamReader (filename)) {
-                                       for (;file.Peek () >= 0; line++)
-                                               ProcessJISOrderLine (file.ReadLine ());
+                       using (StreamReader file =
+                               new StreamReader (filename)) {
+                               while (file.Peek () >= 0) {
+                                       string s = file.ReadLine ();
+                                       int idx = s.IndexOf ('#');
+                                       if (idx >= 0)
+                                               s = s.Substring (0, idx).Trim ();
+                                       if (s.Length == 0)
+                                               continue;
+                                       idx = s.IndexOf (' ');
+                                       if (idx < 0)
+                                               continue;
+                                       // They start with "0x" so cut them out.
+                                       int jis = int.Parse (s.Substring (2, idx), NumberStyles.HexNumber);
+                                       int cp = int.Parse (s.Substring (idx + 3).Trim (), NumberStyles.HexNumber);
+                                       jisJapanese.Add (new JISCharacter (cp, jis));
                                }
-                       } catch (Exception) {
-                               Console.Error.WriteLine ("---- line {0}", line);
-                               throw;
                        }
                }
 
-               char [] ws = new char [] {'\t', ' '};
-
-               void ProcessJISOrderLine (string s)
-               {
-                       int idx = s.IndexOf ('#');
-                       if (idx >= 0)
-                               s = s.Substring (0, idx).Trim ();
-                       if (s.Length == 0)
-                               return;
-                       idx = s.IndexOfAny (ws);
-                       if (idx < 0)
-                               return;
-                       // They start with "0x" so cut them out.
-                       int jis = int.Parse (s.Substring (2, idx - 2), NumberStyles.HexNumber);
-                       int cp = int.Parse (s.Substring (idx).Trim ().Substring (2), NumberStyles.HexNumber);
-                       jisJapanese.Add (new JISCharacter (cp, jis));
-               }
-
                void ParseCJK (string zhXML, string jaXML, string koXML)
                {
                        XmlDocument doc = new XmlDocument ();
@@ -1380,7 +1129,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                        // Chinese Simplified
                        category = "chs";
                        arr = cjkCHS;
-                       offset = 0;//char.MaxValue - arr.Length;
+                       offset = char.MaxValue - arr.Length;
                        doc.Load (zhXML);
                        s = doc.SelectSingleNode ("/ldml/collations/collation[@type='pinyin']/rules/pc").InnerText;
                        v = 0x8008;
@@ -1397,7 +1146,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                        // Chinese Traditional
                        category = "cht";
                        arr = cjkCHT;
-                       offset = 0;//char.MaxValue - arr.Length;
+                       offset = char.MaxValue - arr.Length;
                        s = doc.SelectSingleNode ("/ldml/collations/collation[@type='stroke']/rules/pc").InnerText;
                        v = 0x8002;
                        foreach (char c in s) {
@@ -1413,7 +1162,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                        // Japanese
                        category = "ja";
                        arr = cjkJA;
-                       offset = 0;//char.MaxValue - arr.Length;
+                       offset = char.MaxValue - arr.Length;
                        doc.Load (jaXML);
                        s = doc.SelectSingleNode ("/ldml/collations/collation/rules/pc").InnerText;
                        v = 0x8008;
@@ -1439,7 +1188,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                        //
                        category = "ko";
                        arr = cjkKO;
-                       offset = 0;//char.MaxValue - arr.Length;
+                       offset = char.MaxValue - arr.Length;
                        doc.Load (koXML);
                        foreach (XmlElement reset in doc.SelectNodes ("/ldml/collations/collation/rules/reset")) {
                                XmlElement sc = (XmlElement) reset.NextSibling;
@@ -1489,14 +1238,6 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                        if (Char.IsNumber ((char) cp))
                                                diacritical [cp] = weight;
 
-                       // Modify some decomposition equivalence
-                       decompType [0xFE31] = 0;
-                       decompIndex [0xFE31] = 0;
-                       decompLength [0xFE31] = 0;
-                       decompType [0xFE32] = 0;
-                       decompIndex [0xFE32] = 0;
-                       decompLength [0xFE32] = 0;
-
                        // Korean parens numbers
                        for (int i = 0x3200; i <= 0x321C; i++)
                                diacritical [i] = 0xA;
@@ -1569,26 +1310,16 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
 
                        // Hyphen/Dash : 06 81 - 06 90
                        for (int i = 0; i < char.MaxValue; i++) {
-                               if (!IsIgnorable (i) &&
-                                       Char.GetUnicodeCategory ((char) i) ==
-                                       UnicodeCategory.DashPunctuation) {
-                                       AddCharMapGroup2 ((char) i, 6, 1, 0);
-                                       if (i == 0x2011) {
-                                               // SPECIAL: add 2027 and 2043
-                                               // Maybe they are regarded the 
-                                               // same hyphens in "central"
-                                               // position.
-                                               AddCharMap ('\u2027', 6, 1);
-                                               AddCharMap ('\u2043', 6, 1);
-                                       }
-                               }
+                               if (Char.GetUnicodeCategory ((char) i)
+                                       == UnicodeCategory.DashPunctuation)
+                                       AddCharMapGroupTail ((char) i, 6, 1);
                        }
 
                        // Arabic variable weight chars 06 A0 -
                        fillIndex [6] = 0xA0;
                        // vowels
                        for (int i = 0x64B; i <= 0x650; i++)
-                               AddArabicCharMap ((char) i);
+                               AddCharMapGroupTail ((char) i, 6, 1);
                        // sukun
                        AddCharMapGroup ('\u0652', 6, 1, 0);
                        // shadda
@@ -1632,19 +1363,6 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                if (!IsIgnorable (i))
                                        AddCharMap ((char) i, 0x1, 1);
 
-                       // FIXME: needs more love here (it should eliminate
-                       // all the hacky code above).
-                       for (int i = 0x0300; i < 0x0370; i++)
-                               if (!IsIgnorable (i) && diacritical [i] != 0
-                                       /* especiall here*/ && !map [i].Defined)
-                                       map [i] = new CharMapEntry (
-                                               0x1, 0x1, diacritical [i]);
-
-                       fillIndex [0x1] = 0xAC;
-                       for (int i = 0x07A6; i <= 0x07B0; i++)
-                               if (!IsIgnorable (i))
-                                       AddCharMap ((char) i, 0x1, 1);
-
                        // LAMESPEC: It should not stop at '\u20E1'. There are
                        // a few more characters (that however results in 
                        // overflow of level 2 unless we start before 0xDD).
@@ -1675,7 +1393,7 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                        AddCharMap ('\u2423', 0x7, 1, 0); // open box
                        #endregion
 
-                       // category 09 - continued symbols from 08
+                       // FIXME: 09 should be more complete.
                        fillIndex [0x9] = 2;
                        // misc tech mark
                        for (int cp = 0x2300; cp <= 0x237A; cp++)
@@ -1783,18 +1501,14 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                                fillIndex [0xC]++;
 
                                        int xcp;
-                                       if (currValue <= 10) {
-                                               xcp = (int) prevValue + 0x2170 - 1;
-                                               AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
-                                               xcp = (int) prevValue + 0x2160 - 1;
-                                               AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
-                                               fillIndex [0xC] += 2;
-                                               xcp = (int) prevValue + 0x3021 - 1;
-                                               AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
-                                               fillIndex [0xC]++;
-                                       }
-                                       else if (currValue == 11)
-                                               fillIndex [0xC]++;
+                                       xcp = (int) prevValue + 0x2170 - 1;
+                                       AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
+                                       xcp = (int) prevValue + 0x2160 - 1;
+                                       AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
+                                       fillIndex [0xC] += 2;
+                                       xcp = (int) prevValue + 0x3021 - 1;
+                                       AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
+                                       fillIndex [0xC]++;
                                }
                                if (prevValue < currValue)
                                        prevValue = currValue;
@@ -1812,23 +1526,23 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                else if (cp == 0x3021) // FIXME: why?
                                        fillIndex [0xC]++;
                                AddCharMapGroup ((char) cp, 0xC, 0, diacritical [cp]);
+
                                if (addnew || cp <= '9') {
-                                       int mod = (int) currValue - 1;
                                        int xcp;
                                        if (1 <= currValue && currValue <= 10) {
-                                               xcp = mod + 0x2776;
+                                               xcp = cp - 0x31 + 0x2776;
                                                AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
-                                               xcp = mod + 0x2780;
+                                               xcp = cp - 0x31 + 0x2780;
                                                AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
-                                               xcp = mod + 0x278A;
+                                               xcp = cp - 0x31 + 0x278A;
                                                AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
                                        }
                                        if (1 <= currValue && currValue <= 20) {
-                                               xcp = mod + 0x2460;
+                                               xcp = cp - 0x31 + 0x2460;
                                                AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
-                                               xcp = mod + 0x2474;
+                                               xcp = cp - 0x31 + 0x2474;
                                                AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
-                                               xcp = mod + 0x2488;
+                                               xcp = cp - 0x31 + 0x2488;
                                                AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
                                        }
                                }
@@ -1878,9 +1592,6 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                //   but inside a-to-z range.
                                // 3.there are some expanded characters that
                                //   are not part of Unicode Standard NFKD.
-                               // 4. some characters are letter in IsLetter
-                               //   but not in sortkeys (maybe unicode version
-                               //   difference caused it).
                                switch (i) {
                                // 1. skipping them does not make sense
 //                             case 0xD0: case 0xF0: case 0x131: case 0x138:
@@ -1898,12 +1609,11 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                case 0xFE: // Icelandic Thorn
                                case 0xDF: // German ss
                                case 0xFF: // German ss
-                               // 4.
-                               case 0x1C0: case 0x1C1: case 0x1C2: case 0x1C3:
                                // not classified yet
 //                             case 0x1A6: case 0x1A7: case 0x1A8: case 0x1A9:
 //                             case 0x1AA: case 0x1B1: case 0x1B7: case 0x1B8:
 //                             case 0x1B9: case 0x1BA: case 0x1BB: case 0x1BF:
+//                             case 0x1C0: case 0x1C1: case 0x1C2: case 0x1C3:
 //                             case 0x1DD:
                                        continue;
                                }
@@ -1924,45 +1634,18 @@ throw new Exception (String.Format ("Should not happen. weights are {0} while la
                                if (Char.IsLetter ((char) i))
                                        AddLetterMap ((char) i, 0xF, 1);
 
-                       // Cyrillic - character name order
-                       fillIndex [0x10] = 0x6;
-//*
-for (int i = 0; i < orderedCyrillic.Length; i++)
-Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
-
+                       // Cyrillic - UCA order w/ some modification
+                       fillIndex [0x10] = 0x3;
                        // table which is moslty from UCA DUCET.
                        for (int i = 0; i < orderedCyrillic.Length; i++) {
-                               char c = Char.ToUpper (orderedCyrillic [i], CultureInfo.InvariantCulture);
-                               if (!IsIgnorable ((int) c) &&
-                                       c <= '\u045C' &&
-                                       Char.IsLetter (c)) {
-                                       AddLetterMap (c, 0x10, 0);
-                                       fillIndex [0x10] += 3;
-                               }
+                               char c = orderedCyrillic [i];
+                               if (Char.IsLetter (c))
+                                       AddLetterMap (c, 0x10, 3);
                        }
-                       /*
                        for (int i = 0x0460; i < 0x0481; i++) {
-                               if (Char.IsLetter ((char) i)) {
-                                       AddLetterMap ((char) i, 0x10, 0);
-                                       fillIndex [0x10] += 3;
-                               }
-                       }
-                       */
-/*
-                       for (int i = 0x0400; i <= 0x0486; i++) {
-                               if (!Char.IsLetter ((char) i)) {
-//                                     AddCharMap ((char) i, 0x1, 1);
-                                       continue;
-                               }
-                               if (!cyrillicLetterPrimaryValues.ContainsKey (i)) {
-                                       Console.Error.WriteLine ("no value for {0:x04}", i);
-                                       continue;
-                               }
-                               fillIndex [0x10] = 
-                                       (byte) cyrillicLetterPrimaryValues [i];
-                               AddLetterMap ((char) i, 0x10, 0);
+                               if (Char.IsLetter ((char) i))
+                                       AddLetterMap ((char) i, 0x10, 3);
                        }
-*/
 
                        // Armenian
                        fillIndex [0x11] = 0x3;
@@ -2012,26 +1695,10 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
                                if (!IsIgnorable (i))
                                        AddLetterMap ((char) i, 0x14, 2);
                        fillIndex [0x14] = 0xB;
-                       for (int i = 0x0905; i < 0x093A; i++) {
-                               if (i == 0x0928)
-                                       AddCharMap ('\u0929', 0x14, 0, 8);
-                               if (i == 0x0930)
-                                       AddCharMap ('\u0931', 0x14, 0, 8);
-                               if (i == 0x0933)
-                                       AddCharMap ('\u0934', 0x14, 0, 8);
+                       for (int i = 0x0905; i < 0x093A; i++)
                                if (Char.IsLetter ((char) i))
                                        AddLetterMap ((char) i, 0x14, 4);
-                               if (i == 0x090B)
-                                       AddCharMap ('\u0960', 0x14, 4);
-                               if (i == 0x090C)
-                                       AddCharMap ('\u0961', 0x14, 4);
-                       }
-                       fillIndex [0x14] = 0xDA;
-                       for (int i = 0x093E; i < 0x0945; i++)
-                               if (!IsIgnorable (i))
-                                       AddLetterMap ((char) i, 0x14, 2);
-                       fillIndex [0x14] = 0xEC;
-                       for (int i = 0x0945; i < 0x094F; i++)
+                       for (int i = 0x093E; i < 0x094F; i++)
                                if (!IsIgnorable (i))
                                        AddLetterMap ((char) i, 0x14, 2);
 
@@ -2060,81 +1727,33 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 
                        // Gurmukhi. orderedGurmukhi is from UCA
                        // FIXME: it does not look equivalent to UCA.
-                       fillIndex [0x16] = 04;
-                       fillIndex [0x1] = 3;
+                       fillIndex [0x1] = 03;
+                       fillIndex [0x16] = 02;
                        for (int i = 0; i < orderedGurmukhi.Length; i++) {
                                char c = orderedGurmukhi [i];
                                if (IsIgnorable ((int) c))
                                        continue;
-                               if (IsIgnorableNonSpacing (c)) {
+                               if (!Char.IsLetter (c)) {
                                        AddLetterMap (c, 0x1, 1);
                                        continue;
                                }
                                if (c == '\u0A3C' || c == '\u0A4D' ||
                                        '\u0A66' <= c && c <= '\u0A71')
                                        continue;
-                               // SPECIAL CASE: U+A38 = U+A36 at primary level (why?)
-                               byte shift = 4;
-                               if (c == '\u0A36' || c == '\u0A16' || c == '\u0A17' || c == '\u0A5B' || c == '\u0A5E')
-                                       shift = 0;
-                               AddLetterMap (c, 0x16, shift);
+                               AddLetterMap (c, 0x16, 4);
                        }
 
                        // Gujarati. orderedGujarati is from UCA
-                       fillIndex [0x17] = 0x4;
-                       // nonspacing marks
-                       map [0x0A4D] = new CharMapEntry (1, 0, 0x3);
-                       map [0x0ABD] = new CharMapEntry (1, 0, 0x3);
-                       map [0x0A3C] = new CharMapEntry (1, 0, 0x4);
-                       map [0x0A71] = new CharMapEntry (1, 0, 0x6);
-                       map [0x0ABC] = new CharMapEntry (1, 0, 0xB);
-                       map [0x0A70] = new CharMapEntry (1, 0, 0xE);
-                       // letters go first.
-                       for (int i = 0; i < orderedGujarati.Length; i++) {
-                               // SPECIAL CASE
-                               char c = orderedGujarati [i];
-                               if (Char.IsLetter (c)) {
-                                       // SPECIAL CASES
-                                       if (c == '\u0AB3' || c == '\u0A32')
-                                               continue;
-                                       if (c == '\u0A33') {
-                                               AddCharMap ('\u0A32', 0x17, 0);
-                                               AddCharMap ('\u0A33', 0x17, 4, 4);
-                                               continue;
-                                       }
-                                       if (c == '\u0A8B')
-                                               AddCharMap ('\u0AE0', 0x17, 0, 5);
-                                       AddCharMap (c, 0x17, 4);
-
-                                       if (c == '\u0AB9')
-                                               AddCharMap ('\u0AB3', 0x17, 6);
-                               }
-                       }
-                       // non-letters
-                       byte gujaratiShift = 4;
-                       fillIndex [0x17] = 0xC0;
-                       for (int i = 0; i < orderedGujarati.Length; i++) {
-                               char c = orderedGujarati [i];
-                               if (fillIndex [0x17] == 0xCC)
-                                       gujaratiShift = 3;
-                               if (!Char.IsLetter (c)) {
-                                       // SPECIAL CASES
-                                       if (c == '\u0A82')
-                                               AddCharMap ('\u0A81', 0x17, 2);
-                                       if (c == '\u0AC2')
-                                               fillIndex [0x17]++;
-                                       AddLetterMap (c, 0x17, gujaratiShift);
-                               }
-                       }
+                       fillIndex [0x17] = 02;
+                       for (int i = 0; i < orderedGujarati.Length; i++)
+                               AddLetterMap (orderedGujarati [i], 0x17, 4);
 
                        // Oriya
-                       fillIndex [0x1] = 03;
                        fillIndex [0x18] = 02;
                        for (int i = 0x0B00; i < 0x0B7F; i++) {
                                switch (Char.GetUnicodeCategory ((char) i)) {
                                case UnicodeCategory.NonSpacingMark:
                                case UnicodeCategory.DecimalDigitNumber:
-                                       AddLetterMap ((char) i, 0x1, 1);
                                        continue;
                                }
                                AddLetterMap ((char) i, 0x18, 1);
@@ -2145,11 +1764,13 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
                        AddCharMap ('\u0BD7', 0x19, 0);
                        fillIndex [0x19] = 0xA;
                        // vowels
-                       for (int i = 0x0B82; i <= 0x0B94; i++)
-                               if (!IsIgnorable ((char) i))
+                       for (int i = 0x0BD7; i < 0x0B94; i++)
+                               if (Char.IsLetter ((char) i))
                                        AddCharMap ((char) i, 0x19, 2);
                        // special vowel
-                       fillIndex [0x19] = 0x28;
+                       fillIndex [0x19] = 0x24;
+                       AddCharMap ('\u0B94', 0x19, 0);
+                       fillIndex [0x19] = 0x26;
                        // The array for Tamil consonants is a constant.
                        // Windows have almost similar sequence to TAM from
                        // tamilnet but a bit different in Grantha.
@@ -2181,17 +1802,7 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
                        for (int i = 0x0C80; i < 0x0CE5; i++) {
                                if (i == 0x0CD5 || i == 0x0CD6)
                                        continue; // ignore
-                               if (i == 0x0CB1 || i == 0x0CB3 || i == 0x0CDE)
-                                       continue; // shift after 0xCB9
                                AddCharMap ((char) i, 0x1B, 3);
-                               if (i == 0x0CB9) {
-                                       // SPECIAL CASES: but why?
-                                       AddCharMap ('\u0CB1', 0x1B, 3); // RRA
-                                       AddCharMap ('\u0CB3', 0x1B, 3); // LLA
-                                       AddCharMap ('\u0CDE', 0x1B, 3); // FA
-                               }
-                               if (i == 0x0CB2)
-                                       AddCharMap ('\u0CE1', 0x1B, 3); // vocalic LL
                        }
                        
                        // Malayalam
@@ -2207,14 +1818,13 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
                        // Thai ... note that it breaks 0x1E wall after E2B!
                        // Also, all Thai characters have level 2 value 3.
                        fillIndex [0x1E] = 2;
-                       for (int i = 0xE40; i <= 0xE44; i++)
+                       for (int i = 0xE44; i < 0xE48; i++)
                                AddCharMap ((char) i, 0x1E, 1, 3);
                        for (int i = 0xE01; i < 0xE2B; i++)
-                               AddCharMap ((char) i, 0x1E, 6, 3);
+                               AddCharMap ((char) i, 0x1E, 6, 0);
                        fillIndex [0x1F] = 5;
                        for (int i = 0xE2B; i < 0xE30; i++)
-                               AddCharMap ((char) i, 0x1F, 6, 3);
-                       fillIndex [0x1F] = 0x1E;
+                               AddCharMap ((char) i, 0x1F, 6, 0);
                        for (int i = 0xE30; i < 0xE3B; i++)
                                AddCharMap ((char) i, 0x1F, 1, 3);
                        // some Thai characters remains.
@@ -2231,15 +1841,8 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 
                        // Georgian. orderedGeorgian is from UCA DUCET.
                        fillIndex [0x21] = 5;
-                       for (int i = 0; i < orderedGeorgian.Length; i++) {
-                               char c = orderedGeorgian [i];
-                               if (map [(int) c].Defined)
-                                       continue;
-                               AddCharMap (c, 0x21, 0);
-                               if (c < '\u10F6')
-                                       AddCharMap ((char) (c - 0x30), 0x21, 0);
-                               fillIndex [0x21] += 5;
-                       }
+                       for (int i = 0; i < orderedGeorgian.Length; i++)
+                               AddLetterMap (orderedGeorgian [i], 0x21, 5);
 
                        // Japanese Kana.
                        fillIndex [0x22] = 2;
@@ -2264,16 +1867,6 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
                                                AddKanaMap (cp, kanaLines [gyo]);
                                        fillIndex [0x22]++;
 
-                                       if (cp == 0x30AB) {
-                                               // add small 'ka' (before normal one)
-                                               AddKanaMap (0x30F5, 1);
-                                               kanaOffset++;
-                                       }
-                                       if (cp == 0x30B1) {
-                                               // add small 'ke' (before normal one)
-                                               AddKanaMap (0x30F6, 1);
-                                               kanaOffset++;
-                                       }
                                        if (cp == 0x3061) {
                                                // add small 'Tsu' (before normal one)
                                                AddKanaMap (0x3063, 1);
@@ -2308,8 +1901,7 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
                        fillIndex [0x22] = 0x97;
                        jisJapanese.Sort (JISComparer.Instance);
                        foreach (JISCharacter j in jisJapanese)
-                               if (0x3300 <= j.CP && j.CP <= 0x3357)
-                                       AddCharMap ((char) j.CP, 0x22, 1);
+                               AddCharMap ((char) j.CP, 0x22, 1);
                        // non-JIS Japanese square chars.
                        nonJisJapanese.Sort (NonJISComparer.Instance);
                        foreach (NonJISCharacter j in nonJisJapanese)
@@ -2339,18 +1931,14 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
                                map [cp] = new CharMapEntry (0x24,
                                        (byte) (map [cp - 1].Level1 + 2),
                                        0);
-                       // FIXME: Syriac NonSpacingMark should go here.
 
                        // Thaana
                        // FIXME: it turned out that it does not look like UCA
                        fillIndex [0x24] = 0x6E;
                        for (int i = 0; i < orderedThaana.Length; i++) {
-                               char c = orderedThaana [i];
-                               if (IsIgnorableNonSpacing ((int) c))
+                               if (IsIgnorableNonSpacing (i))
                                        continue;
-                               AddCharMap (c, 0x24, 2);
-                               if (c == '\u0782') // SPECIAL CASE: why?
-                                       fillIndex [0x24] += 2;
+                               AddCharMap (orderedThaana [i], 0x24, 2);
                        }
                        #endregion
 
@@ -2389,7 +1977,7 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
                        + "<{\u1113 \u1116}, \u3165,"
                                + "\u11C5, \u11C6=\u3166,, \u11C7, \u11C8,"
                                + "\u11AC, \u11C9, \u11AD, \u1103=\u11AE  >"
-                       + "<\u1117, \u11CA, \u1104, \u11CB > \u1105=\u11AF >"
+                       + "<\u1117, \u11CA, \u1104, \u11CB > \u1105 >"
                        + "<{\u1118 \u111B}, \u11B0, [\u11CC \u11D0], \u11B1,"
                                + "[\u11D1 \u11D2], \u11B2,"
                                + "[\u11D3 \u11D5], \u11B3,"
@@ -2397,11 +1985,10 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
                                + "\u11B6=\u11D8, \u3140,, \u11D9, \u1106=\u11B7 >"
                        + "<{\u111C \u111D}, [\u11DA \u11E2], \u1107=\u11B8 >"
                        + "<{\u111E \u1120}, \u3172,, \u3173, \u11E3, \u1108 >"
-                       + "<{\u1121 \u112C}, \u3144 \u11B9, \u3174, \u3175,,,, "
-                               + "\u3176,, \u3177, [\u11E4 \u11E6] \u3178,"
-                               + "\u3179, \u1109=\u11BA,,, \u3214=\u3274 <>"
-                       + "<{\u112D \u1133}, \u11E7 \u317A, \u317B, \u317C "
-                               + "[\u11E8 \u11E9],, \u11EA \u317D,, \u110A=\u11BB,,, >"
+                       + "<{\u1121 \u112C}, \u11B9,,,,,,,,, [\u11E4 \u11E6],,"
+                               + "\u1109=\u11BA,,, \u3214=\u3274 <>"
+                       + "<{\u112D \u1133}, \u11E7,, [\u11E8 \u11E9],,"
+                               + "\u11EA,, \u110A=\u11BB,,, >"
                        + "<{\u1134 \u1140}, \u317E,,,,,,, \u11EB,"
                                + "\u110B=\u11BC, [\u1161 \u11A2], \u1160 >"
                        + "<{\u1141 \u114C}, \u11EE, \u11EC, \u11ED,,,,, "
@@ -2469,40 +2056,6 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
                                }
                        }
 
-                       // Some Jamo NFKD.
-                       for (int i = 0x3200; i < 0x3300; i++) {
-                               if (IsIgnorable (i) || map [i].Defined)
-                                       continue;
-                               int ch = 0;
-                               // w/ bracket
-                               if (decompLength [i] == 4 &&
-                                       decompValues [decompIndex [i]] == '(')
-                                       ch = decompIndex [i] + 1;
-                               // circled
-                               else if (decompLength [i] == 2 &&
-                                       decompValues [decompIndex [i] + 1] == '\u1161')
-                                       ch = decompIndex [i];
-                               else if (decompLength [i] == 1)
-                                       ch = decompIndex [i];
-                               else
-                                       continue;
-                               ch = decompValues [ch];
-                               if (ch < 0x1100 || 0x1200 < ch &&
-                                       ch < 0xAC00 || 0xD800 < ch)
-                                       continue;
-
-                               // SPECIAL CASE ?
-                               int offset = i < 0x3260 ? 1 : 0;
-                               if (0x326E <= i && i <= 0x3273)
-                                       offset = 1;
-
-                               map [i] = new CharMapEntry (map [ch].Category,
-                                       (byte) (map [ch].Level1 + offset),
-                                       map [ch].Level2);
-//                                     Console.Error.WriteLine ("Jamo {0:X04} -> {1:X04}", i, decompValues [decompIndex [i] + 1]);
-                       }
-
-
                        #endregion
 
                        // Letterlike characters and CJK compatibility square
@@ -2573,18 +2126,10 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
                                if (IsIgnorable (i))
                                        continue;
 
-                               // FIXME: actually those reset should not be 
-                               // done but here I put for easy goal.
-                               if (i == 0x0700)
-                                       fillIndex [0x7] = 0xE2;
-                               if (i == 0x2016)
-                                       fillIndex [0x7] = 0x77;
-
                                // SPECIAL CASES:
                                switch (i) {
                                case 0xAB: // 08
                                case 0xB7: // 0A
-                               case 0xBB: // 08
                                case 0x2329: // 09
                                case 0x232A: // 09
                                        continue;
@@ -2609,15 +2154,20 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
                                }
                        }
                        // Control pictures
-                       // FIXME: it should not need to reset level 1, but
-                       // it's for easy goal.
-                       fillIndex [0x7] = 0xB6;
                        for (int i = 0x2400; i <= 0x2421; i++)
                                AddCharMap ((char) i, 0x7, 1, 0);
                        #endregion
 
                        // FIXME: for 07 xx we need more love.
 
+                       // FIXME: 08 should be more complete.
+                       fillIndex [0x8] = 2;
+                       for (int cp = 0; cp < char.MaxValue; cp++)
+                               if (!map [cp].Defined &&
+                                       Char.GetUnicodeCategory ((char) cp) ==
+                                       UnicodeCategory.MathSymbol)
+                                       AddCharMapGroup ((char) cp, 0x8, 1, 0);
+
                        // Characters w/ diacritical marks (NFKD)
                        for (int i = 0; i <= char.MaxValue; i++) {
                                if (map [i].Defined || IsIgnorable (i))
@@ -2656,54 +2206,6 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
                                
                        }
 
-                       // category 08 - symbols
-                       fillIndex [0x8] = 2;
-                       // Here Windows mapping is not straightforward. It is
-                       // not based on computation but seems manual sorting.
-                       AddCharMapGroup ('+', 0x8, 1, 0); // plus
-                       AddCharMapGroup ('\u2212', 0x8, 1, 0); // minus
-                       AddCharMapGroup ('\u229D', 0x8, 1, 0); // minus
-                       AddCharMapGroup ('\u2297', 0x8, 1, 0); // mul
-                       AddCharMapGroup ('\u2044', 0x8, 1, 0); // div
-                       AddCharMapGroup ('\u2215', 0x8, 1, 0); // div
-                       AddCharMapGroup ('\u2217', 0x8, 1, 0); // mul
-                       AddCharMapGroup ('\u2218', 0x8, 1, 0); // ring
-                       AddCharMapGroup ('\u2219', 0x8, 1, 0); // bullet
-                       AddCharMapGroup ('\u2213', 0x8, 1, 0); // minus-or-plus
-                       AddCharMapGroup ('\u003C', 0x8, 1, 0); // <
-                       AddCharMapGroup ('\u227A', 0x8, 1, 0); // precedes relation
-                       AddCharMapGroup ('\u22B0', 0x8, 1, 0); // precedes under relation
-
-                       for (int cp = 0; cp < 0x2300; cp++) {
-                               if (cp == 0x200)
-                                       cp = 0x2200; // skip to 2200
-                               if (cp == 0xAC) // SPECIAL CASE: skip
-                                       continue;
-                               if (!map [cp].Defined &&
-//                                     Char.GetUnicodeCategory ((char) cp) ==
-//                                     UnicodeCategory.MathSymbol)
-                                       Char.IsSymbol ((char) cp))
-                                       AddCharMapGroup ((char) cp, 0x8, 1, 0);
-                               // SPECIAL CASES: no idea why Windows sorts as such
-                               switch (cp) {
-                               case 0x3E:
-                                       AddCharMap ('\u227B', 0x8, 1, 0);
-                                       AddCharMap ('\u22B1', 0x8, 1, 0);
-                                       break;
-                               case 0xB1:
-                                       AddCharMapGroup ('\u00AB', 0x8, 1, 0);
-                                       AddCharMapGroup ('\u226A', 0x8, 1, 0);
-                                       AddCharMapGroup ('\u00BB', 0x8, 1, 0);
-                                       AddCharMapGroup ('\u226B', 0x8, 1, 0);
-                                       break;
-                               case 0xF7:
-                                       AddCharMap ('\u01C0', 0x8, 1, 0);
-                                       AddCharMap ('\u01C1', 0x8, 1, 0);
-                                       AddCharMap ('\u01C2', 0x8, 1, 0);
-                                       break;
-                               }
-                       }
-
                        #region Level2 adjustment
                        // Arabic Hamzah
                        diacritical [0x624] = 0x5;
@@ -2714,6 +2216,7 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
                        diacritical [0x649] = 0x5; // 'alif maqs.uurah
                        diacritical [0x64A] = 0x7; // Yaa'
 
+
                        for (int i = 0; i < char.MaxValue; i++) {
                                byte mod = 0;
                                byte cat = map [i].Category;
@@ -2723,7 +2226,7 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
                                        mod = diacritical [i];
                                        break;
                                case 0x13: // Arabic
-                                       if (diacritical [i] == 0 && i >= 0xFE8D)
+                                       if (diacritical [i] == 0)
                                                mod = 0x8; // default for arabic
                                        break;
                                }
@@ -2735,23 +2238,15 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
                        }
                        #endregion
 
-                       // FIXME: this is hack but those NonSpacingMark 
-                       // characters and still undefined are likely to
-                       // be nonspacing.
+                       // FIXME: this is hack but those which are 
+                       // NonSpacingMark characters and still undefined
+                       // are likely to be nonspacing.
                        for (int i = 0; i < char.MaxValue; i++)
                                if (!map [i].Defined &&
                                        !IsIgnorable (i) &&
                                        Char.GetUnicodeCategory ((char) i) ==
                                        UnicodeCategory.NonSpacingMark)
                                        AddCharMap ((char) i, 1, 1);
-
-                       // FIXME: this is hack but those Symbol characters
-                       // are likely to fall into 0xA category.
-                       for (int i = 0; i < char.MaxValue; i++)
-                               if (!map [i].Defined &&
-                                       !IsIgnorable (i) &&
-                                       Char.IsSymbol ((char) i))
-                                       AddCharMap ((char) i, 0xA, 1);
                }
 
                private void IncrementSequentialIndex (ref byte hangulCat)
@@ -2917,10 +2412,6 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
                        AddCharMapCJK (c, ref category);
 
                        // LAMESPEC: see below.
-                       if (c == '\u5B78') {
-                               AddCharMapCJK ('\u32AB', ref category);
-                               AddCharMapCJK ('\u323B', ref category);
-                       }
                        if (c == '\u52DE') {
                                AddCharMapCJK ('\u3298', ref category);
                                AddCharMapCJK ('\u3238', ref category);
@@ -2950,8 +2441,7 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
                                // mix Chinise and Japanese Kanji when
                                // ordering those characters.
                                switch (w) {
-                               case 0x32A2: case 0x3298: case 0x3238:
-                               case 0x32A9: case 0x323B: case 0x32AB:
+                               case 0x32A2: case 0x3298: case 0x3238: case 0x32A9:
                                        continue;
                                }
 
@@ -3002,28 +2492,6 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
                                        AddCharMap (vertical, category, updateCount, level2);
                }
 
-               private void AddArabicCharMap (char c)
-               {
-                       byte category = 6;
-                       byte updateCount = 1;
-                       byte level2 = 0;
-
-                       // itself
-                       AddCharMap (c, category, 0, level2);
-
-                       // Since nfkdMap is problematic to have two or more
-                       // NFKD to an identical character, here I iterate all.
-                       for (int c2 = 0; c2 < char.MaxValue; c2++) {
-                               if (decompLength [c2] == 0)
-                                       continue;
-                               int idx = decompIndex [c2] + decompLength [c2] - 1;
-                               if ((int) (decompValues [idx]) == (int) c)
-                                       AddCharMap ((char) c2, category,
-                                               0, level2);
-                       }
-                       fillIndex [category] += updateCount;
-               }
-
                char ToFullWidth (char c)
                {
                        return ToDecomposed (c, DecompositionFull, false);
@@ -3074,12 +2542,6 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
 
                private byte ComputeLevel3WeightRaw (char c) // add 2 for sortkey value
                {
-                       // CJK compat
-                       if ('\u3192' <= c && c <= '\u319F')
-                               return 0;
-                       // Japanese reading marks
-                       if (c == '\u3001' || c == '\u3002')
-                               return 2;
                        // Korean
                        if ('\u11A8' <= c && c <= '\u11F9')
                                return 2;
@@ -3087,11 +2549,6 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
                                return 4;
                        if ('\u3130' <= c && c <= '\u3164')
                                return 5;
-                       if ('\u3165' <= c && c <= '\u318E')
-                               return 4;
-                       // Georgian Capital letters
-                       if ('\u10A0' <= c && c <= '\u10C5')
-                               return 0x10;
                        // numbers
                        if ('\u2776' <= c && c <= '\u277F')
                                return 4;
@@ -3100,13 +2557,13 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
                        if ('\u2776' <= c && c <= '\u2793')
                                return 0xC;
                        if ('\u2160' <= c && c <= '\u216F')
-                               return 0x10;
+                               return 0x18;
                        if ('\u2181' <= c && c <= '\u2182')
                                return 0x18;
                        // Arabic
                        if ('\u2135' <= c && c <= '\u2138')
                                return 4;
-                       if ('\uFE80' <= c && c < '\uFF00') {
+                       if ('\uFE80' <= c && c < '\uFE8E') {
                                // 2(Isolated)/8(Final)/0x18(Medial)
                                switch (decompType [(int) c]) {
                                case DecompositionIsolated:
@@ -3207,7 +2664,6 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
                        // those ranges.
                        case 0x4d8: case 0x4d9:
                        case 0x4e8: case 0x4e9:
-                       case 0x70F:
                        case 0x3036: case 0x303f:
                        case 0x337b: case 0xfb1e:
                                return false;
@@ -3566,7 +3022,7 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
                {
                        JISCharacter j1 = (JISCharacter) o1;
                        JISCharacter j2 = (JISCharacter) o2;
-                       return j1.JIS - j2.JIS;
+                       return j2.JIS - j1.JIS;
                }
        }
 
@@ -3776,7 +3232,7 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
                                for (int i = 0; i < Source.Length; i++)
                                        ret [i + 1] = Source [i];
                                // null terminate
-                               for (int i = 0; i < 4; i++)
+                               for (int i = 0; i < 5; i++)
                                        ret [i + Source.Length + 2] = (char) SortKey [i];
                                return ret;
                        }