1 #define USE_MANAGED_RESOURCE
3 using System.Collections;
4 using System.Globalization;
5 using System.Reflection;
6 using System.Runtime.CompilerServices;
8 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
10 namespace Mono.Globalization.Unicode
12 internal class TailoringInfo
14 public readonly int LCID;
15 public readonly int TailoringIndex;
16 public readonly int TailoringCount;
17 public readonly bool FrenchSort;
19 public TailoringInfo (int lcid, int tailoringIndex, int tailoringCount, bool frenchSort)
22 TailoringIndex = tailoringIndex;
23 TailoringCount = tailoringCount;
24 FrenchSort = frenchSort;
28 #region Tailoring support classes
29 // Possible mapping types are:
31 // - string to string (ReplacementMap)
32 // - string to SortKey (SortKeyMap)
33 // - diacritical byte to byte (DiacriticalMap)
35 // There could be mapping from string to sortkeys, but
36 // for now there is none as such.
38 internal class Contraction
40 public readonly char [] Source;
41 // only either of them is used.
42 public readonly string Replacement;
43 public readonly byte [] SortKey;
45 public Contraction (char [] source,
46 string replacement, byte [] sortkey)
49 Replacement = replacement;
54 internal class ContractionComparer : IComparer
56 public static readonly ContractionComparer Instance =
57 new ContractionComparer ();
59 public int Compare (object o1, object o2)
61 Contraction c1 = (Contraction) o1;
62 Contraction c2 = (Contraction) o2;
63 char [] a1 = c1.Source;
64 char [] a2 = c2.Source;
65 int min = a1.Length > a2.Length ?
66 a2.Length : a1.Length;
67 for (int i = 0; i < min; i++)
69 return a1 [i] - a2 [i];
70 return a1.Length - a2.Length;
74 internal class Level2Map
79 public Level2Map (byte source, byte replace)
86 internal class Level2MapComparer : IComparer
88 public static readonly Level2MapComparer Instance =
89 new Level2MapComparer ();
91 public int Compare (object o1, object o2)
93 Level2Map m1 = (Level2Map) o1;
94 Level2Map m2 = (Level2Map) o2;
95 return (m1.Source - m2.Source);
101 unsafe internal class MSCompatUnicodeTable
103 public static int MaxExpansionLength = 3;
105 static readonly byte* ignorableFlags;
106 static readonly byte* categories;
107 static readonly byte* level1;
108 static readonly byte* level2;
109 static readonly byte* level3;
110 // static readonly ushort* widthCompat;
111 static byte* cjkCHScategory;
112 static byte* cjkCHTcategory;
113 static byte* cjkJAcategory;
114 static byte* cjkKOcategory;
115 static byte* cjkCHSlv1;
116 static byte* cjkCHTlv1;
117 static byte* cjkJAlv1;
118 static byte* cjkKOlv1;
119 static byte* cjkKOlv2;
121 const int ResourceVersionSize = 1;
123 public static TailoringInfo GetTailoringInfo (int lcid)
125 for (int i = 0; i < tailoringInfos.Length; i++)
126 if (tailoringInfos [i].LCID == lcid)
127 return tailoringInfos [i];
131 public static void BuildTailoringTables (CultureInfo culture,
133 ref Contraction [] contractions,
134 ref Level2Map [] diacriticals)
136 // collect tailoring entries.
137 ArrayList cmaps = new ArrayList ();
138 ArrayList dmaps = new ArrayList ();
139 char [] tarr = tailorings;
140 int idx = t.TailoringIndex;
141 int end = idx + t.TailoringCount;
145 switch (tarr [idx]) {
146 case '\x1': // SortKeyMap
148 while (tarr [ss] != 0)
150 src = new char [ss - idx];
151 Array.Copy (tarr, idx, src, 0, ss - idx);
152 byte [] sortkey = new byte [4];
153 for (int i = 0; i < 4; i++)
154 sortkey [i] = (byte) tarr [ss + 1 + i];
155 cmaps.Add (new Contraction (
156 src, null, sortkey));
160 case '\x2': // DiacriticalMap
161 dmaps.Add (new Level2Map (
162 (byte) tarr [idx + 1],
163 (byte) tarr [idx + 2]));
166 case '\x3': // ReplacementMap
168 while (tarr [ss] != 0)
170 src = new char [ss - idx];
171 Array.Copy (tarr, idx, src, 0, ss - idx);
174 while (tarr [l] != 0)
176 string r = new string (tarr, ss, l - ss);
177 cmaps.Add (new Contraction (
182 throw new NotImplementedException (String.Format ("Mono INTERNAL ERROR (Should not happen): Collation tailoring table is broken for culture {0} ({1}) at 0x{2:X}", culture.LCID, culture.Name, idx));
185 cmaps.Sort (ContractionComparer.Instance);
186 dmaps.Sort (Level2MapComparer.Instance);
187 contractions = cmaps.ToArray (typeof (Contraction))
189 diacriticals = dmaps.ToArray (typeof (Level2Map))
193 static void SetCJKReferences (string name,
194 ref CodePointIndexer cjkIndexer,
195 ref byte* catTable, ref byte* lv1Table,
196 ref CodePointIndexer lv2Indexer, ref byte* lv2Table)
198 // as a part of mscorlib.dll, this invocation is
199 // somewhat extraneous (pointers were already assigned).
203 catTable = cjkCHScategory;
204 lv1Table = cjkCHSlv1;
205 cjkIndexer = UUtil.CjkCHS;
208 catTable = cjkCHTcategory;
209 lv1Table = cjkCHTlv1;
210 cjkIndexer = UUtil.Cjk;
213 catTable = cjkJAcategory;
215 cjkIndexer = UUtil.Cjk;
218 catTable = cjkKOcategory;
221 cjkIndexer = UUtil.Cjk;
222 lv2Indexer = UUtil.Cjk;
227 public static byte Category (int cp)
229 return categories [UUtil.Category.ToIndex (cp)];
232 public static byte Level1 (int cp)
234 return level1 [UUtil.Level1.ToIndex (cp)];
237 public static byte Level2 (int cp)
239 return level2 [UUtil.Level2.ToIndex (cp)];
242 public static byte Level3 (int cp)
244 return level3 [UUtil.Level3.ToIndex (cp)];
247 public static bool IsIgnorable (int cp)
249 UnicodeCategory uc = Char.GetUnicodeCategory ((char) cp);
250 // This check eliminates some extraneous code areas
251 if (uc == UnicodeCategory.OtherNotAssigned)
253 // Some characters in Surrogate area are ignored.
254 if (0xD880 <= cp && cp < 0xDB80)
256 int i = UUtil.Ignorable.ToIndex (cp);
257 return i >= 0 && ignorableFlags [i] == 7;
260 // for (int i = 0; i <= char.MaxValue; i++)
261 // if (Char.GetUnicodeCategory ((char) i)
262 // == UnicodeCategory.OtherNotAssigned
263 // && ignorableFlags [i] != 7)
264 // Console.WriteLine ("{0:X04}", i);
266 public static bool IsIgnorableSymbol (int cp)
268 int i = UUtil.Ignorable.ToIndex (cp);
269 return i >= 0 && (ignorableFlags [i] & 0x2) != 0;
272 public static bool IsIgnorableNonSpacing (int cp)
274 int i = UUtil.Ignorable.ToIndex (cp);
275 return i >= 0 && (ignorableFlags [i] & 0x4) != 0;
276 // It could be implemented this way, but the above
278 // return categories [UUtil.Category.ToIndex (cp)] == 1;
281 public static int ToKanaTypeInsensitive (int i)
283 // Note that IgnoreKanaType does not treat half-width
284 // katakana as equivalent to full-width ones.
286 // Thus, it is so simple ;-)
287 return (0x3041 <= i && i <= 0x3094) ? i + 0x60 : i;
290 // Note that currently indexer optimizes this table a lot,
291 // which might have resulted in bugs.
292 public static int ToWidthCompat (int i)
298 return i - 0xFF00 + 0x20;
320 return 0xFFE9 - 0x2190 + i;
351 // Other Kana compat characters' width
352 // compatibility is considered in special weight.
357 if (i < 0x3164) { // Hangul compat
358 return i - 0x3130 + 0xFFA0;
362 // 0x32D0-0x32FE are Kana compat characters, whose
363 // width compatibility is considered in special weight.
367 #region Level 4 properties (Kana)
369 public static bool HasSpecialWeight (char c)
373 else if ('\uFF66' <= c && c < '\uFF9E')
375 else if ('\u3300' <= c)
377 else if (c < '\u309D')
378 return (c < '\u3099');
379 else if (c < '\u3100')
380 return c != '\u30FB';
381 else if (c < '\u32D0')
383 else if (c < '\u32FF')
388 // FIXME: it should be removed at some stage
389 // (will become unused).
390 public static byte GetJapaneseDashType (char c)
405 public static bool IsHalfWidthKana (char c)
407 return '\uFF66' <= c && c <= '\uFF9D';
410 public static bool IsHiragana (char c)
412 return '\u3041' <= c && c <= '\u3094';
415 public static bool IsJapaneseSmallLetter (char c)
417 if ('\uFF67' <= c && c <= '\uFF6F')
419 if ('\u3040' < c && c < '\u30FA') {
453 public static readonly bool IsReady = true; // always
455 static MSCompatUnicodeTable ()
457 fixed (byte* tmp = ignorableFlagsArr) {
458 ignorableFlags = tmp;
460 fixed (byte* tmp = categoriesArr) {
463 fixed (byte* tmp = level1Arr) {
466 fixed (byte* tmp = level2Arr) {
469 fixed (byte* tmp = level3Arr) {
472 // fixed (ushort* tmp = widthCompatArr) {
473 // widthCompat = tmp;
475 fixed (byte* tmp = cjkCHSArr) {
476 cjkCHScategory = tmp;
477 cjkCHSlv1 = tmp + cjkCHSArrLength;
479 fixed (byte* tmp = cjkCHTArr) {
480 cjkCHTcategory = tmp;
481 cjkCHTlv1 = tmp + cjkCHTArrLength;
483 fixed (byte* tmp = cjkJAArr) {
485 cjkJAlv1 = tmp + cjkJAArrLength;
487 fixed (byte* tmp = cjkKOArr) {
489 cjkKOlv1 = tmp + cjkKOArrLength;
491 fixed (byte* tmp = cjkKOlv2Arr) {
496 public static void FillCJK (string name,
497 ref CodePointIndexer cjkIndexer,
498 ref byte* catTable, ref byte* lv1Table,
499 ref CodePointIndexer cjkLv2Indexer,
502 SetCJKReferences (name, ref cjkIndexer,
503 ref catTable, ref lv1Table,
504 ref cjkLv2Indexer, ref lv2Table);
508 static readonly char [] tailorings;
509 static readonly TailoringInfo [] tailoringInfos;
510 static object forLock = new object ();
511 public static readonly bool isReady;
513 public static bool IsReady {
514 get { return isReady; }
517 #if USE_MANAGED_RESOURCE
518 static IntPtr GetResource (string name)
522 return Assembly.GetExecutingAssembly ().GetManifestResourceInternal (name, out size, out module);
525 static readonly string corlibPath = Assembly.GetExecutingAssembly ().Location;
527 const int CollationResourceCore = 0;
528 const int CollationResourceCJKCHS = 1;
529 const int CollationResourceCJKCHT = 2;
530 const int CollationResourceCJKJA = 3;
531 const int CollationResourceCJKKO = 4;
532 const int CollationResourceCJKKOlv2 = 5;
533 const int CollationResourceTailoring = 6;
535 [MethodImplAttribute (MethodImplOptions.InternalCall)]
536 static extern void load_collation_resource (string path, int resource_index, byte** data, int* size);
539 static uint UInt32FromBytePtr (byte* raw, uint idx)
541 return (uint) (raw [idx] + (raw [idx + 1] << 8)
542 + (raw [idx + 2] << 16) + (raw [idx + 3] << 24));
545 static MSCompatUnicodeTable ()
552 #if USE_MANAGED_RESOURCE
553 IntPtr ptr = GetResource ("collation.core.bin");
554 if (ptr == IntPtr.Zero)
556 raw = (byte*) ((void*) ptr);
557 ptr = GetResource ("collation.tailoring.bin");
558 if (ptr == IntPtr.Zero)
560 tailor = (byte*) ((void*) ptr);
566 load_collation_resource (corlibPath, CollationResourceCore, &raw, &rawsize);
567 load_collation_resource (corlibPath, CollationResourceTailoring, &tailor, &trawsize);
571 if (raw == null || tailor == null)
573 // check resource version
574 if (raw [0] != UUtil.ResourceVersion ||
575 tailor [0] != UUtil.ResourceVersion)
579 size = UInt32FromBytePtr (raw, idx);
581 ignorableFlags = raw + idx;
584 size = UInt32FromBytePtr (raw, idx);
586 categories = raw + idx;
589 size = UInt32FromBytePtr (raw, idx);
594 size = UInt32FromBytePtr (raw, idx);
599 size = UInt32FromBytePtr (raw, idx);
604 // size = UInt32FromBytePtr (raw, idx);
606 // widthCompat = (ushort*) (raw + idx);
610 uint count = UInt32FromBytePtr (tailor, idx);
612 tailoringInfos = new TailoringInfo [count];
613 for (int i = 0; i < count; i++) {
614 int i1 = (int) UInt32FromBytePtr (tailor, idx);
616 int i2 = (int) UInt32FromBytePtr (tailor, idx);
618 int i3 = (int) UInt32FromBytePtr (tailor, idx);
620 TailoringInfo ti = new TailoringInfo (
621 i1, i2, i3, tailor [idx++] != 0);
622 tailoringInfos [i] = ti;
626 count = UInt32FromBytePtr (tailor, idx);
628 tailorings = new char [count];
629 for (int i = 0; i < count; i++, idx += 2)
630 tailorings [i] = (char) (tailor [idx] + (tailor [idx + 1] << 8));
634 public static void FillCJK (string culture,
635 ref CodePointIndexer cjkIndexer,
638 ref CodePointIndexer lv2Indexer,
642 FillCJKCore (culture, ref cjkIndexer,
643 ref catTable, ref lv1Table,
644 ref lv2Indexer, ref lv2Table);
645 SetCJKReferences (culture, ref cjkIndexer,
646 ref catTable, ref lv1Table,
647 ref lv2Indexer, ref lv2Table);
651 static void FillCJKCore (string culture,
652 ref CodePointIndexer cjkIndexer,
653 ref byte* catTable, ref byte* lv1Table,
654 ref CodePointIndexer cjkLv2Indexer, ref byte* lv2Table)
663 catTable = cjkCHScategory;
664 lv1Table = cjkCHSlv1;
668 catTable = cjkCHTcategory;
669 lv1Table = cjkCHTlv1;
673 catTable = cjkJAcategory;
678 catTable = cjkKOcategory;
683 if (name == null || lv1Table != null)
687 #if USE_MANAGED_RESOURCE
689 String.Format ("collation.{0}.bin", name);
690 IntPtr ptr = GetResource (filename);
691 if (ptr == IntPtr.Zero)
693 raw = (byte*) ((void*) ptr);
698 case "zh-CHS": residx = CollationResourceCJKCHS; break;
699 case "zh-CHT": residx = CollationResourceCJKCHT; break;
700 case "ja": residx = CollationResourceCJKJA; break;
701 case "ko": residx = CollationResourceCJKKO; break;
705 load_collation_resource (corlibPath, residx, &raw, &size);
707 uint count = UInt32FromBytePtr (raw, ResourceVersionSize);
708 catTable = (byte*) raw + ResourceVersionSize + 4;
709 lv1Table = (byte*) raw + ResourceVersionSize + 4 + count;
713 cjkCHScategory = catTable;
714 cjkCHSlv1 = lv1Table;
717 cjkCHTcategory = catTable;
718 cjkCHTlv1 = lv1Table;
721 cjkJAcategory = catTable;
725 cjkKOcategory = catTable;
732 #if USE_MANAGED_RESOURCE
733 ptr = GetResource ("collation.cjkKOlv2.bin");
734 if (ptr == IntPtr.Zero)
736 raw = (byte*) ((void*) ptr);
738 load_collation_resource (corlibPath, CollationResourceCJKKOlv2, &raw, &size);
740 cjkKOlv2 = raw + ResourceVersionSize + 4;
748 // For "categories", 0 means no primary weight. 6 means
750 // For expanded character the value is never filled (i.e. 0).
751 // Those arrays will be split into blocks (<3400 and >F800)
752 // level 4 is computed.
754 // public static bool HasSpecialWeight (char c)
755 // { return level1 [(int) c] == 6; }
758 // autogenerated code or icall to fill array runs here