1 #define USE_MANAGED_RESOURCE
5 // MSCompatUnicodeTable.cs : Handles Windows-like sortket tables.
8 // Atsushi Enomoto <atsushi@ximian.com>
10 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
33 using System.Collections;
34 using System.Globalization;
35 using System.Reflection;
36 using System.Runtime.CompilerServices;
37 using System.Runtime.InteropServices;
39 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
41 namespace Mono.Globalization.Unicode
43 internal class TailoringInfo
45 public readonly int LCID;
46 public readonly int TailoringIndex;
47 public readonly int TailoringCount;
48 public readonly bool FrenchSort;
50 public TailoringInfo (int lcid, int tailoringIndex, int tailoringCount, bool frenchSort)
53 TailoringIndex = tailoringIndex;
54 TailoringCount = tailoringCount;
55 FrenchSort = frenchSort;
59 #region Tailoring support classes
60 // Possible mapping types are:
62 // - string to string (ReplacementMap)
63 // - string to SortKey (SortKeyMap)
64 // - diacritical byte to byte (DiacriticalMap)
66 // There could be mapping from string to sortkeys, but
67 // for now there is none as such.
69 internal class Contraction
71 public readonly char [] Source;
72 // only either of them is used.
73 public readonly string Replacement;
74 public readonly byte [] SortKey;
76 public Contraction (char [] source,
77 string replacement, byte [] sortkey)
80 Replacement = replacement;
85 internal class ContractionComparer : IComparer
87 public static readonly ContractionComparer Instance =
88 new ContractionComparer ();
90 public int Compare (object o1, object o2)
92 Contraction c1 = (Contraction) o1;
93 Contraction c2 = (Contraction) o2;
94 char [] a1 = c1.Source;
95 char [] a2 = c2.Source;
96 int min = a1.Length > a2.Length ?
97 a2.Length : a1.Length;
98 for (int i = 0; i < min; i++)
100 return a1 [i] - a2 [i];
101 return a1.Length - a2.Length;
105 internal class Level2Map
110 public Level2Map (byte source, byte replace)
117 internal class Level2MapComparer : IComparer
119 public static readonly Level2MapComparer Instance =
120 new Level2MapComparer ();
122 public int Compare (object o1, object o2)
124 Level2Map m1 = (Level2Map) o1;
125 Level2Map m2 = (Level2Map) o2;
126 return (m1.Source - m2.Source);
132 unsafe internal class MSCompatUnicodeTable
134 public static int MaxExpansionLength = 3;
136 static readonly byte* ignorableFlags;
137 static readonly byte* categories;
138 static readonly byte* level1;
139 static readonly byte* level2;
140 static readonly byte* level3;
141 // static readonly ushort* widthCompat;
143 static readonly char* tailoring;
145 static byte* cjkCHScategory;
146 static byte* cjkCHTcategory;
147 static byte* cjkJAcategory;
148 static byte* cjkKOcategory;
149 static byte* cjkCHSlv1;
150 static byte* cjkCHTlv1;
151 static byte* cjkJAlv1;
152 static byte* cjkKOlv1;
153 static byte* cjkKOlv2;
155 const int ResourceVersionSize = 1;
157 public static TailoringInfo GetTailoringInfo (int lcid)
159 for (int i = 0; i < tailoringInfos.Length; i++)
160 if (tailoringInfos [i].LCID == lcid)
161 return tailoringInfos [i];
165 unsafe public static void BuildTailoringTables (CultureInfo culture,
167 ref Contraction [] contractions,
168 ref Level2Map [] diacriticals)
170 // collect tailoring entries.
171 ArrayList cmaps = new ArrayList ();
172 ArrayList dmaps = new ArrayList ();
173 fixed (char* tarr = tailoringArr){
174 int idx = t.TailoringIndex;
175 int end = idx + t.TailoringCount;
179 switch (tarr [idx]) {
180 case '\x1': // SortKeyMap
182 while (tarr [ss] != 0)
184 src = new char [ss - idx];
185 // Array.Copy (tarr, idx, src, 0, ss - idx);
186 Marshal.Copy ((IntPtr) (tarr + idx), src, 0, ss - idx);
187 byte [] sortkey = new byte [4];
188 for (int i = 0; i < 4; i++)
189 sortkey [i] = (byte) tarr [ss + 1 + i];
190 cmaps.Add (new Contraction (
191 src, null, sortkey));
195 case '\x2': // DiacriticalMap
196 dmaps.Add (new Level2Map (
197 (byte) tarr [idx + 1],
198 (byte) tarr [idx + 2]));
201 case '\x3': // ReplacementMap
203 while (tarr [ss] != 0)
205 src = new char [ss - idx];
206 // Array.Copy (tarr, idx, src, 0, ss - idx);
207 Marshal.Copy ((IntPtr) (tarr + idx), src, 0, ss - idx);
210 while (tarr [l] != 0)
212 string r = new string (tarr, ss, l - ss);
213 cmaps.Add (new Contraction (
218 throw new NotImplementedException (String.Format ("Mono INTERNAL ERROR (Should not happen): Collation tailoring table is broken for culture {0} ({1}) at 0x{2:X}", culture.LCID, culture.Name, idx));
222 cmaps.Sort (ContractionComparer.Instance);
223 dmaps.Sort (Level2MapComparer.Instance);
224 contractions = cmaps.ToArray (typeof (Contraction))
226 diacriticals = dmaps.ToArray (typeof (Level2Map))
230 static void SetCJKReferences (string name,
231 ref CodePointIndexer cjkIndexer,
232 ref byte* catTable, ref byte* lv1Table,
233 ref CodePointIndexer lv2Indexer, ref byte* lv2Table)
235 // as a part of mscorlib.dll, this invocation is
236 // somewhat extraneous (pointers were already assigned).
240 catTable = cjkCHScategory;
241 lv1Table = cjkCHSlv1;
242 cjkIndexer = UUtil.CjkCHS;
245 catTable = cjkCHTcategory;
246 lv1Table = cjkCHTlv1;
247 cjkIndexer = UUtil.Cjk;
250 catTable = cjkJAcategory;
252 cjkIndexer = UUtil.Cjk;
255 catTable = cjkKOcategory;
258 cjkIndexer = UUtil.Cjk;
259 lv2Indexer = UUtil.Cjk;
264 public static byte Category (int cp)
266 return categories [UUtil.Category.ToIndex (cp)];
269 public static byte Level1 (int cp)
271 return level1 [UUtil.Level1.ToIndex (cp)];
274 public static byte Level2 (int cp)
276 return level2 [UUtil.Level2.ToIndex (cp)];
279 public static byte Level3 (int cp)
281 return level3 [UUtil.Level3.ToIndex (cp)];
284 public static bool IsSortable (string s)
286 foreach (char c in s)
292 public static bool IsSortable (int cp)
294 // LAMESPEC: they should strictly match with
295 // IsIgnorable() result, but sometimes it does not.
296 if (!IsIgnorable (cp))
304 return 0x180B <= cp && cp <= 0x180E ||
305 0x200C <= cp && cp <= 0x200F ||
306 0x202A <= cp && cp <= 0x202E ||
307 0x206A <= cp && cp <= 0x206F ||
308 0x200C <= cp && cp <= 0x200F ||
309 0xFFF9 <= cp && cp <= 0xFFFD;
312 public static bool IsIgnorable (int cp)
314 return IsIgnorable (cp, 1);
317 public static bool IsIgnorable (int cp, byte flag)
321 if ((flag & 1) != 0) {
322 UnicodeCategory uc = Char.GetUnicodeCategory ((char) cp);
323 // This check eliminates some extraneous code areas
324 if (uc == UnicodeCategory.OtherNotAssigned)
326 // Some characters in Surrogate area are ignored.
327 if (0xD880 <= cp && cp < 0xDB80)
330 int i = UUtil.Ignorable.ToIndex (cp);
331 return i >= 0 && (ignorableFlags [i] & flag) != 0;
334 // for (int i = 0; i <= char.MaxValue; i++)
335 // if (Char.GetUnicodeCategory ((char) i)
336 // == UnicodeCategory.OtherNotAssigned
337 // && ignorableFlags [i] != 7)
338 // Console.WriteLine ("{0:X04}", i);
340 public static bool IsIgnorableSymbol (int cp)
342 return IsIgnorable (cp, 2);
343 // int i = UUtil.Ignorable.ToIndex (cp);
344 // return i >= 0 && (ignorableFlags [i] & 0x2) != 0;
347 public static bool IsIgnorableNonSpacing (int cp)
349 return IsIgnorable (cp, 4);
350 // int i = UUtil.Ignorable.ToIndex (cp);
351 // return i >= 0 && (ignorableFlags [i] & 0x4) != 0;
353 // It could be implemented this way, but the above
355 // return categories [UUtil.Category.ToIndex (cp)] == 1;
358 public static int ToKanaTypeInsensitive (int i)
360 // Note that IgnoreKanaType does not treat half-width
361 // katakana as equivalent to full-width ones.
363 // Thus, it is so simple ;-)
364 return (0x3041 <= i && i <= 0x3094) ? i + 0x60 : i;
367 // Note that currently indexer optimizes this table a lot,
368 // which might have resulted in bugs.
369 public static int ToWidthCompat (int i)
375 return i - 0xFF00 + 0x20;
397 return 0xFFE9 - 0x2190 + i;
428 // Other Kana compat characters' width
429 // compatibility is considered in special weight.
434 if (i < 0x3164) { // Hangul compat
435 return i - 0x3130 + 0xFFA0;
439 // 0x32D0-0x32FE are Kana compat characters, whose
440 // width compatibility is considered in special weight.
444 #region Level 4 properties (Kana)
446 public static bool HasSpecialWeight (char c)
450 else if ('\uFF66' <= c && c < '\uFF9E')
452 else if ('\u3300' <= c)
454 else if (c < '\u309D')
455 return (c < '\u3099');
456 else if (c < '\u3100')
457 return c != '\u30FB';
458 else if (c < '\u32D0')
460 else if (c < '\u32FF')
465 // FIXME: it should be removed at some stage
466 // (will become unused).
467 public static byte GetJapaneseDashType (char c)
482 public static bool IsHalfWidthKana (char c)
484 return '\uFF66' <= c && c <= '\uFF9D';
487 public static bool IsHiragana (char c)
489 return '\u3041' <= c && c <= '\u3094';
492 public static bool IsJapaneseSmallLetter (char c)
494 if ('\uFF67' <= c && c <= '\uFF6F')
496 if ('\u3040' < c && c < '\u30FA') {
530 public static readonly bool IsReady = true; // always
532 static MSCompatUnicodeTable ()
534 throw new Exception ("This code should not be used");
536 fixed (byte* tmp = ignorableFlagsArr) {
537 ignorableFlags = tmp;
539 fixed (byte* tmp = categoriesArr) {
542 fixed (byte* tmp = level1Arr) {
545 fixed (byte* tmp = level2Arr) {
548 fixed (byte* tmp = level3Arr) {
551 // fixed (ushort* tmp = widthCompatArr) {
552 // widthCompat = tmp;
554 fixed (char* tmp = tailoringArr) {
557 fixed (byte* tmp = cjkCHSArr) {
558 cjkCHScategory = tmp;
559 cjkCHSlv1 = tmp + cjkCHSArrLength;
561 fixed (byte* tmp = cjkCHTArr) {
562 cjkCHTcategory = tmp;
563 cjkCHTlv1 = tmp + cjkCHTArrLength;
565 fixed (byte* tmp = cjkJAArr) {
567 cjkJAlv1 = tmp + cjkJAArrLength;
569 fixed (byte* tmp = cjkKOArr) {
571 cjkKOlv1 = tmp + cjkKOArrLength;
573 fixed (byte* tmp = cjkKOlv2Arr) {
578 public static void FillCJK (string name,
579 ref CodePointIndexer cjkIndexer,
580 ref byte* catTable, ref byte* lv1Table,
581 ref CodePointIndexer cjkLv2Indexer,
584 SetCJKReferences (name, ref cjkIndexer,
585 ref catTable, ref lv1Table,
586 ref cjkLv2Indexer, ref lv2Table);
591 static readonly char [] tailoringArr;
593 static readonly TailoringInfo [] tailoringInfos;
594 static object forLock = new object ();
595 public static readonly bool isReady;
597 public static bool IsReady {
598 get { return isReady; }
601 #if USE_MANAGED_RESOURCE
602 static IntPtr GetResource (string name)
606 return Assembly.GetExecutingAssembly ().GetManifestResourceInternal (name, out size, out module);
609 const int CollationTableIdxIgnorables = 0;
610 const int CollationTableIdxCategory = 1;
611 const int CollationTableIdxLevel1 = 2;
612 const int CollationTableIdxLevel2 = 3;
613 const int CollationTableIdxLevel3 = 4;
614 const int CollationTableIdxTailoringInfos = 5;
615 const int CollationTableIdxTailoringChars = 6;
616 const int CollationTableIdxCjkCHS = 7;
617 const int CollationTableIdxCjkCHT = 8;
618 const int CollationTableIdxCjkJA = 9;
619 const int CollationTableIdxCjkKO = 10;
620 const int CollationTableIdxCjkKOLv2 = 11;
622 [MethodImplAttribute (MethodImplOptions.InternalCall)]
623 static extern void load_collation_resource (int resource_index, byte** data);
625 static readonly string corlibPath = Assembly.GetExecutingAssembly ().Location;
627 const int CollationResourceCore = 0;
628 const int CollationResourceCJKCHS = 1;
629 const int CollationResourceCJKCHT = 2;
630 const int CollationResourceCJKJA = 3;
631 const int CollationResourceCJKKO = 4;
632 const int CollationResourceCJKKOlv2 = 5;
633 const int CollationResourceTailoring = 6;
635 [MethodImplAttribute (MethodImplOptions.InternalCall)]
636 static extern void load_collation_resource (string path, int resource_index, byte** data, int* size);
639 static uint UInt32FromBytePtr (byte* raw, uint idx)
641 return (uint) (raw [idx] + (raw [idx + 1] << 8)
642 + (raw [idx + 2] << 16) + (raw [idx + 3] << 24));
645 static MSCompatUnicodeTable ()
654 load_collation_resource (CollationTableIdxIgnorables, &raw);
655 ignorableFlags = raw;
656 load_collation_resource (CollationTableIdxCategory, &raw);
658 load_collation_resource (CollationTableIdxLevel1, &raw);
660 load_collation_resource (CollationTableIdxLevel2, &raw);
662 load_collation_resource (CollationTableIdxLevel3, &raw);
664 load_collation_resource (CollationTableIdxTailoringInfos, &raw);
665 tailor = (uint*) raw;
666 load_collation_resource (CollationTableIdxTailoringChars, &raw);
667 tailoring = (char*) raw;
671 uint count = tailor [idx++];
672 tailoringInfos = new TailoringInfo [count];
673 for (int i = 0; i < count; i++) {
674 int i1 = (int) tailor [idx++];
675 int i2 = (int) tailor [idx++];
676 int i3 = (int) tailor [idx++];
677 TailoringInfo ti = new TailoringInfo (
678 i1, i2, i3, tailor [idx++] != 0);
679 tailoringInfos [i] = ti;
690 #if USE_MANAGED_RESOURCE
691 IntPtr ptr = GetResource ("collation.core.bin");
692 if (ptr == IntPtr.Zero)
694 raw = (byte*) ((void*) ptr);
695 ptr = GetResource ("collation.tailoring.bin");
696 if (ptr == IntPtr.Zero)
698 tailor = (byte*) ((void*) ptr);
704 load_collation_resource (corlibPath, CollationResourceCore, &raw, &rawsize);
705 load_collation_resource (corlibPath, CollationResourceTailoring, &tailor, &trawsize);
706 load_collation_resource (corlibPath, CollationResourceTailoringChars, &tailorChars, &trawsize);
710 if (raw == null || tailor == null)
712 // check resource version
713 if (raw [0] != UUtil.ResourceVersion ||
714 tailor [0] != UUtil.ResourceVersion)
718 size = UInt32FromBytePtr (raw, idx);
720 ignorableFlags = raw + idx;
723 size = UInt32FromBytePtr (raw, idx);
725 categories = raw + idx;
728 size = UInt32FromBytePtr (raw, idx);
733 size = UInt32FromBytePtr (raw, idx);
738 size = UInt32FromBytePtr (raw, idx);
743 // size = UInt32FromBytePtr (raw, idx);
745 // widthCompat = (ushort*) (raw + idx);
751 uint count = UInt32FromBytePtr (tailor, idx);
753 tailoringInfos = new TailoringInfo [count];
754 for (int i = 0; i < count; i++) {
755 int i1 = (int) UInt32FromBytePtr (tailor, idx);
757 int i2 = (int) UInt32FromBytePtr (tailor, idx);
759 int i3 = (int) UInt32FromBytePtr (tailor, idx);
761 TailoringInfo ti = new TailoringInfo (
762 i1, i2, i3, tailor [idx++] != 0);
763 tailoringInfos [i] = ti;
767 count = UInt32FromBytePtr (tailor, idx);
770 tailoringArr = new char [count];
771 for (int i = 0; i < count; i++, idx += 2)
772 tailoringArr [i] = (char) (tailor [idx] + (tailor [idx + 1] << 8));
777 public static void FillCJK (string culture,
778 ref CodePointIndexer cjkIndexer,
781 ref CodePointIndexer lv2Indexer,
785 FillCJKCore (culture, ref cjkIndexer,
786 ref catTable, ref lv1Table,
787 ref lv2Indexer, ref lv2Table);
788 SetCJKReferences (culture, ref cjkIndexer,
789 ref catTable, ref lv1Table,
790 ref lv2Indexer, ref lv2Table);
794 static void FillCJKCore (string culture,
795 ref CodePointIndexer cjkIndexer,
796 ref byte* catTable, ref byte* lv1Table,
797 ref CodePointIndexer cjkLv2Indexer, ref byte* lv2Table)
806 catTable = cjkCHScategory;
807 lv1Table = cjkCHSlv1;
811 catTable = cjkCHTcategory;
812 lv1Table = cjkCHTlv1;
816 catTable = cjkJAcategory;
821 catTable = cjkKOcategory;
826 if (name == null || lv1Table != null)
831 #if USE_MANAGED_RESOURCE
833 String.Format ("collation.{0}.bin", name);
834 IntPtr ptr = GetResource (filename);
835 if (ptr == IntPtr.Zero)
837 raw = (byte*) ((void*) ptr);
838 idx += ResourceVersionSize;
842 case "zh-CHS": residx = CollationTableIdxCjkCHS; break;
843 case "zh-CHT": residx = CollationTableIdxCjkCHT; break;
844 case "ja": residx = CollationTableIdxCjkJA; break;
845 case "ko": residx = CollationTableIdxCjkKO; break;
849 load_collation_resource (residx, &raw);
854 case "zh-CHS": residx = CollationResourceCJKCHS; break;
855 case "zh-CHT": residx = CollationResourceCJKCHT; break;
856 case "ja": residx = CollationResourceCJKJA; break;
857 case "ko": residx = CollationResourceCJKKO; break;
861 load_collation_resource (corlibPath, residx, &raw, &size);
862 idx += ResourceVersionSize;
864 uint count = UInt32FromBytePtr (raw, idx);
866 catTable = (byte*) raw + idx;
867 lv1Table = (byte*) raw + idx + count;
871 cjkCHScategory = catTable;
872 cjkCHSlv1 = lv1Table;
875 cjkCHTcategory = catTable;
876 cjkCHTlv1 = lv1Table;
879 cjkJAcategory = catTable;
883 cjkKOcategory = catTable;
890 #if USE_MANAGED_RESOURCE
891 ptr = GetResource ("collation.cjkKOlv2.bin");
892 if (ptr == IntPtr.Zero)
894 raw = (byte*) ((void*) ptr);
895 idx = ResourceVersionSize + 4;
897 load_collation_resource (CollationTableIdxCjkKOLv2, &raw);
899 load_collation_resource (corlibPath, CollationResourceCJKKOlv2, &raw, &size);
900 idx = ResourceVersionSize + 4;
902 cjkKOlv2 = raw + idx;
910 // For "categories", 0 means no primary weight. 6 means
912 // For expanded character the value is never filled (i.e. 0).
913 // Those arrays will be split into blocks (<3400 and >F800)
914 // level 4 is computed.
916 // public static bool HasSpecialWeight (char c)
917 // { return level1 [(int) c] == 6; }
920 // autogenerated code or icall to fill array runs here