2005-07-13 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / CollationDataStructures.txt
index e2eb0c374914c75e0abd61300000a3741677c115..01451c8593731c9e4775bff9f80ae87ba76ebe23 100644 (file)
@@ -16,6 +16,9 @@
        CollationSource class is used to represent a culture-specific collation
        resource set.
 
+       Note that there are many characters whose sortkey cannot be acquired
+       only via those tables. For example, Korean Jamo ((U+1113 - U+115F) has
+       primary keys which is more than 2 bytes.
 
 
 ** Manual tasks required to maintain the source.
                ushort tailoringIndex;
                ushort tailoringCount;
                short reverseAccentOrder; /* 1:French sort. 0:Normal */
-       } CollationSource;
+       } TailoringInfo;
 
        Those [*] characters will be compressed using CodePointIndexer
        whose max value is char.MaxValue+1.
 
        // Holds sortkey basis.
-       char [*] category;
-       char [*] level1;
-       char [*] level2;
-       char [*] level3;
-       char [*] ignorableFlags; // 1:complete, 2:symbol, 3:nonspace
+       guint8 [*] category;
+       guint8 [*] level1;
+       guint8 [*] level2;
+       guint8 [*] level3;
+       guint8 [*] ignorableFlags; // 1:complete, 2:symbol, 3:nonspace
        gunichar [*] widthCompat;
 
        // Holds special arrays for CJK order which is culture dependent.
-       ushort [*] cjkCHS;
-       ushort [*] cjkCHT;
-       ushort [*] cjkJA;
-       ushort [*] cjkKO;
-       char [*] cjkKOlv2;
+       guint16 [*] cjkCHS;
+       guint16 [*] cjkCHT;
+       guint16 [*] cjkJA;
+       guint16 [*] cjkKO;
+       guint8 [*] cjkKOlv2;
        gunichar [whole_tailoring_count] tailorings;
        CollationSource [culture_count] collationSources;
 
-
+       "tailorings" table holds the entire contract mappings and expansion
+       mappings for all cultures.
+       Actually it is not only "culture dependent tailorings" but also
+       contains expansions for Invariant culture.
+       CollationSource.tailoringIndex holds the index of "tailorings" array
+       to point where its tailorings begin.
+       Tailorings for the culture is counted by tailoringCount.
+
+       If tailoringIndex is 0 then there is no tailorings for the specific
+       culture (it should still handle invariant tailorings).
+
+** CollationSourceUtil.cs
+       static CodePointIndexer Category;
+       static CodePointIndexer Level1;
+       static CodePointIndexer Level2;
+       static CodePointIndexer Level3;
+       static CodePointIndexer Ignorable;
+       static CodePointIndexer WidthCompat;
+       static CodePointIndexer CjkCHS;
+       static CodePointIndexer Cjk;
 
 ** CollatorSource.cs
 
        static byte [*] level3;
        static byte [*] ignorableFlags; // 1:complete, 2:symbol, 3:nonspace
        static char [*] widthCompat;
-       static CodePointIndexer category;
-       static CodePointIndexer level1;
-       static CodePointIndexer level2;
-       static CodePointIndexer level3;
-       static CodePointIndexer widthCompat;
        static char [] tailorings;
        static ushort [] cjkCHS;
        static ushort [] cjkCHT;
        static ushort [] cjkJA;
        static ushort [] cjkKO;
        static byte [] cjkKOlv2;
-       static CodePointIndexer [] cjkIndexer;
 
-       class CollationSource
+       class TailoringInfo // instantiated for each CultureInfo
        {
                // Primary constants
                int tailoringIndex;
                int tailoringCount;
-               bool reverseAccentOrder;
+               bool frenchSort;
 
+/*
                // This array is set according to CJK type, and CJK type
                // will be hardcoded, being identified from LCID.
                ushort [] cjk;
-
                // For Korean, level2 table is specially treated.
-
-               // Computed values for optimization in use.
-               byte [*] hasTailoringHead;
-               byte [*] hasTailoringTail;
+               byte [] cjkLevel2;
+*/
        }