CollationSource class is used to represent a culture-specific collation
resource set.
+ Note that there are many characters whose sortkey cannot be acquired
+ only via those tables. For example, Korean Jamo ((U+1113 - U+115F) has
+ primary keys which is more than 2 bytes.
** Manual tasks required to maintain the source.
ushort tailoringIndex;
ushort tailoringCount;
short reverseAccentOrder; /* 1:French sort. 0:Normal */
- } CollationSource;
+ } TailoringInfo;
Those [*] characters will be compressed using CodePointIndexer
whose max value is char.MaxValue+1.
// Holds sortkey basis.
- byte [*] category;
- byte [*] level1;
- byte [*] level2;
- byte [*] level3;
- char [*] widthCompat;
+ guint8 [*] category;
+ guint8 [*] level1;
+ guint8 [*] level2;
+ guint8 [*] level3;
+ guint8 [*] ignorableFlags; // 1:complete, 2:symbol, 3:nonspace
+ gunichar [*] widthCompat;
// Holds special arrays for CJK order which is culture dependent.
- ushort [*] cjkCHS;
- ushort [*] cjkCHT;
- ushort [*] cjkJA;
- ushort [*] cjkKO;
- byte [*] cjkKOlv2;
- char [whole_tailoring_count] tailorings;
+ guint16 [*] cjkCHS;
+ guint16 [*] cjkCHT;
+ guint16 [*] cjkJA;
+ guint16 [*] cjkKO;
+ guint8 [*] cjkKOlv2;
+ gunichar [whole_tailoring_count] tailorings;
CollationSource [culture_count] collationSources;
-
+ "tailorings" table holds the entire contract mappings and expansion
+ mappings for all cultures.
+ Actually it is not only "culture dependent tailorings" but also
+ contains expansions for Invariant culture.
+ CollationSource.tailoringIndex holds the index of "tailorings" array
+ to point where its tailorings begin.
+ Tailorings for the culture is counted by tailoringCount.
+
+ If tailoringIndex is 0 then there is no tailorings for the specific
+ culture (it should still handle invariant tailorings).
+
+** CollationSourceUtil.cs
+ static CodePointIndexer Category;
+ static CodePointIndexer Level1;
+ static CodePointIndexer Level2;
+ static CodePointIndexer Level3;
+ static CodePointIndexer Ignorable;
+ static CodePointIndexer WidthCompat;
+ static CodePointIndexer CjkCHS;
+ static CodePointIndexer Cjk;
** CollatorSource.cs
static byte [*] level1;
static byte [*] level2;
static byte [*] level3;
+ static byte [*] ignorableFlags; // 1:complete, 2:symbol, 3:nonspace
static char [*] widthCompat;
- static CodePointIndexer category;
- static CodePointIndexer level1;
- static CodePointIndexer level2;
- static CodePointIndexer level3;
- static CodePointIndexer widthCompat;
static char [] tailorings;
static ushort [] cjkCHS;
static ushort [] cjkCHT;
static ushort [] cjkJA;
static ushort [] cjkKO;
static byte [] cjkKOlv2;
- static CodePointIndexer [] cjkIndexer;
- class CollationSource
+ class TailoringInfo // instantiated for each CultureInfo
{
// Primary constants
int tailoringIndex;
int tailoringCount;
- bool reverseAccentOrder;
+ bool frenchSort;
+/*
// This array is set according to CJK type, and CJK type
// will be hardcoded, being identified from LCID.
ushort [] cjk;
-
// For Korean, level2 table is specially treated.
-
- // Computed values for optimization in use.
- byte [*] hasTailoringHead;
- byte [*] hasTailoringTail;
+ byte [] cjkLevel2;
+*/
}