1 #define USE_MANAGED_RESOURCE
5 // MSCompatUnicodeTable.cs : Handles Windows-like sortket tables.
8 // Atsushi Enomoto <atsushi@ximian.com>
10 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
33 using System.Collections;
34 using System.Globalization;
35 using System.Reflection;
36 using System.Runtime.CompilerServices;
37 using System.Runtime.InteropServices;
39 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
41 namespace Mono.Globalization.Unicode
43 internal class TailoringInfo
45 public readonly int LCID;
46 public readonly int TailoringIndex;
47 public readonly int TailoringCount;
48 public readonly bool FrenchSort;
50 public TailoringInfo (int lcid, int tailoringIndex, int tailoringCount, bool frenchSort)
53 TailoringIndex = tailoringIndex;
54 TailoringCount = tailoringCount;
55 FrenchSort = frenchSort;
59 #region Tailoring support classes
60 // Possible mapping types are:
62 // - string to string (ReplacementMap)
63 // - string to SortKey (SortKeyMap)
64 // - diacritical byte to byte (DiacriticalMap)
66 // There could be mapping from string to sortkeys, but
67 // for now there is none as such.
69 internal class Contraction
72 public readonly char [] Source;
73 // only either of them is used.
74 public readonly string Replacement;
75 public readonly byte [] SortKey;
77 public Contraction (int index, char [] source,
78 string replacement, byte [] sortkey)
82 Replacement = replacement;
87 internal class ContractionComparer : IComparer
89 public static readonly ContractionComparer Instance =
90 new ContractionComparer ();
92 public int Compare (object o1, object o2)
94 Contraction c1 = (Contraction) o1;
95 Contraction c2 = (Contraction) o2;
96 char [] a1 = c1.Source;
97 char [] a2 = c2.Source;
98 int min = a1.Length > a2.Length ?
99 a2.Length : a1.Length;
100 for (int i = 0; i < min; i++)
101 if (a1 [i] != a2 [i])
102 return a1 [i] - a2 [i];
103 if (a1.Length != a2.Length)
104 return a1.Length - a2.Length;
105 // This makes the sorting stable, since we are using Array.Sort () which is
107 return c1.Index - c2.Index;
111 internal class Level2Map
116 public Level2Map (byte source, byte replace)
123 internal class Level2MapComparer : IComparer
125 public static readonly Level2MapComparer Instance =
126 new Level2MapComparer ();
128 public int Compare (object o1, object o2)
130 Level2Map m1 = (Level2Map) o1;
131 Level2Map m2 = (Level2Map) o2;
132 return (m1.Source - m2.Source);
138 unsafe internal class MSCompatUnicodeTable
140 public static int MaxExpansionLength = 3;
142 static readonly byte* ignorableFlags;
143 static readonly byte* categories;
144 static readonly byte* level1;
145 static readonly byte* level2;
146 static readonly byte* level3;
147 // static readonly ushort* widthCompat;
149 static readonly char* tailoring;
151 static byte* cjkCHScategory;
152 static byte* cjkCHTcategory;
153 static byte* cjkJAcategory;
154 static byte* cjkKOcategory;
155 static byte* cjkCHSlv1;
156 static byte* cjkCHTlv1;
157 static byte* cjkJAlv1;
158 static byte* cjkKOlv1;
159 static byte* cjkKOlv2;
161 const int ResourceVersionSize = 1;
163 public static TailoringInfo GetTailoringInfo (int lcid)
165 for (int i = 0; i < tailoringInfos.Length; i++)
166 if (tailoringInfos [i].LCID == lcid)
167 return tailoringInfos [i];
171 unsafe public static void BuildTailoringTables (CultureInfo culture,
173 ref Contraction [] contractions,
174 ref Level2Map [] diacriticals)
176 // collect tailoring entries.
177 ArrayList cmaps = new ArrayList ();
178 ArrayList dmaps = new ArrayList ();
180 fixed (char* tarr = tailoringArr){
181 int idx = t.TailoringIndex;
182 int end = idx + t.TailoringCount;
186 switch (tarr [idx]) {
187 case '\x1': // SortKeyMap
189 while (tarr [ss] != 0)
191 src = new char [ss - idx];
192 // Array.Copy (tarr, idx, src, 0, ss - idx);
193 Marshal.Copy ((IntPtr) (tarr + idx), src, 0, ss - idx);
194 byte [] sortkey = new byte [4];
195 for (int i = 0; i < 4; i++)
196 sortkey [i] = (byte) tarr [ss + 1 + i];
197 cmaps.Add (new Contraction (iindex,
198 src, null, sortkey));
203 case '\x2': // DiacriticalMap
204 dmaps.Add (new Level2Map (
205 (byte) tarr [idx + 1],
206 (byte) tarr [idx + 2]));
209 case '\x3': // ReplacementMap
211 while (tarr [ss] != 0)
213 src = new char [ss - idx];
214 // Array.Copy (tarr, idx, src, 0, ss - idx);
215 Marshal.Copy ((IntPtr) (tarr + idx), src, 0, ss - idx);
218 while (tarr [l] != 0)
220 string r = new string (tarr, ss, l - ss);
221 cmaps.Add (new Contraction (iindex,
227 throw new NotImplementedException (String.Format ("Mono INTERNAL ERROR (Should not happen): Collation tailoring table is broken for culture {0} ({1}) at 0x{2:X}", culture.LCID, culture.Name, idx));
231 cmaps.Sort (ContractionComparer.Instance);
232 dmaps.Sort (Level2MapComparer.Instance);
233 contractions = cmaps.ToArray (typeof (Contraction))
235 diacriticals = dmaps.ToArray (typeof (Level2Map))
239 static void SetCJKReferences (string name,
240 ref CodePointIndexer cjkIndexer,
241 ref byte* catTable, ref byte* lv1Table,
242 ref CodePointIndexer lv2Indexer, ref byte* lv2Table)
244 // as a part of mscorlib.dll, this invocation is
245 // somewhat extraneous (pointers were already assigned).
249 catTable = cjkCHScategory;
250 lv1Table = cjkCHSlv1;
251 cjkIndexer = UUtil.CjkCHS;
254 catTable = cjkCHTcategory;
255 lv1Table = cjkCHTlv1;
256 cjkIndexer = UUtil.Cjk;
259 catTable = cjkJAcategory;
261 cjkIndexer = UUtil.Cjk;
264 catTable = cjkKOcategory;
267 cjkIndexer = UUtil.Cjk;
268 lv2Indexer = UUtil.Cjk;
273 public static byte Category (int cp)
275 return categories [UUtil.Category.ToIndex (cp)];
278 public static byte Level1 (int cp)
280 return level1 [UUtil.Level1.ToIndex (cp)];
283 public static byte Level2 (int cp)
285 return level2 [UUtil.Level2.ToIndex (cp)];
288 public static byte Level3 (int cp)
290 return level3 [UUtil.Level3.ToIndex (cp)];
293 public static bool IsSortable (string s)
295 foreach (char c in s)
301 public static bool IsSortable (int cp)
303 // LAMESPEC: they should strictly match with
304 // IsIgnorable() result, but sometimes it does not.
305 if (!IsIgnorable (cp))
313 return 0x180B <= cp && cp <= 0x180E ||
314 0x200C <= cp && cp <= 0x200F ||
315 0x202A <= cp && cp <= 0x202E ||
316 0x206A <= cp && cp <= 0x206F ||
317 0x200C <= cp && cp <= 0x200F ||
318 0xFFF9 <= cp && cp <= 0xFFFD;
321 public static bool IsIgnorable (int cp)
323 return IsIgnorable (cp, 1);
326 public static bool IsIgnorable (int cp, byte flag)
330 if ((flag & 1) != 0) {
331 UnicodeCategory uc = Char.GetUnicodeCategory ((char) cp);
332 // This check eliminates some extraneous code areas
333 if (uc == UnicodeCategory.OtherNotAssigned)
335 // Some characters in Surrogate area are ignored.
336 if (0xD880 <= cp && cp < 0xDB80)
339 int i = UUtil.Ignorable.ToIndex (cp);
340 return i >= 0 && (ignorableFlags [i] & flag) != 0;
343 // for (int i = 0; i <= char.MaxValue; i++)
344 // if (Char.GetUnicodeCategory ((char) i)
345 // == UnicodeCategory.OtherNotAssigned
346 // && ignorableFlags [i] != 7)
347 // Console.WriteLine ("{0:X04}", i);
349 public static bool IsIgnorableSymbol (int cp)
351 return IsIgnorable (cp, 2);
352 // int i = UUtil.Ignorable.ToIndex (cp);
353 // return i >= 0 && (ignorableFlags [i] & 0x2) != 0;
356 public static bool IsIgnorableNonSpacing (int cp)
358 return IsIgnorable (cp, 4);
359 // int i = UUtil.Ignorable.ToIndex (cp);
360 // return i >= 0 && (ignorableFlags [i] & 0x4) != 0;
362 // It could be implemented this way, but the above
364 // return categories [UUtil.Category.ToIndex (cp)] == 1;
367 public static int ToKanaTypeInsensitive (int i)
369 // Note that IgnoreKanaType does not treat half-width
370 // katakana as equivalent to full-width ones.
372 // Thus, it is so simple ;-)
373 return (0x3041 <= i && i <= 0x3094) ? i + 0x60 : i;
376 // Note that currently indexer optimizes this table a lot,
377 // which might have resulted in bugs.
378 public static int ToWidthCompat (int i)
384 return i - 0xFF00 + 0x20;
406 return 0xFFE9 - 0x2190 + i;
437 // Other Kana compat characters' width
438 // compatibility is considered in special weight.
443 if (i < 0x3164) { // Hangul compat
444 return i - 0x3130 + 0xFFA0;
448 // 0x32D0-0x32FE are Kana compat characters, whose
449 // width compatibility is considered in special weight.
453 #region Level 4 properties (Kana)
455 public static bool HasSpecialWeight (char c)
459 else if ('\uFF66' <= c && c < '\uFF9E')
461 else if ('\u3300' <= c)
463 else if (c < '\u309D')
464 return (c < '\u3099');
465 else if (c < '\u3100')
466 return c != '\u30FB';
467 else if (c < '\u32D0')
469 else if (c < '\u32FF')
474 // FIXME: it should be removed at some stage
475 // (will become unused).
476 public static byte GetJapaneseDashType (char c)
491 public static bool IsHalfWidthKana (char c)
493 return '\uFF66' <= c && c <= '\uFF9D';
496 public static bool IsHiragana (char c)
498 return '\u3041' <= c && c <= '\u3094';
501 public static bool IsJapaneseSmallLetter (char c)
503 if ('\uFF67' <= c && c <= '\uFF6F')
505 if ('\u3040' < c && c < '\u30FA') {
539 public static readonly bool IsReady = true; // always
541 static MSCompatUnicodeTable ()
543 throw new Exception ("This code should not be used");
545 fixed (byte* tmp = ignorableFlagsArr) {
546 ignorableFlags = tmp;
548 fixed (byte* tmp = categoriesArr) {
551 fixed (byte* tmp = level1Arr) {
554 fixed (byte* tmp = level2Arr) {
557 fixed (byte* tmp = level3Arr) {
560 // fixed (ushort* tmp = widthCompatArr) {
561 // widthCompat = tmp;
563 fixed (char* tmp = tailoringArr) {
566 fixed (byte* tmp = cjkCHSArr) {
567 cjkCHScategory = tmp;
568 cjkCHSlv1 = tmp + cjkCHSArrLength;
570 fixed (byte* tmp = cjkCHTArr) {
571 cjkCHTcategory = tmp;
572 cjkCHTlv1 = tmp + cjkCHTArrLength;
574 fixed (byte* tmp = cjkJAArr) {
576 cjkJAlv1 = tmp + cjkJAArrLength;
578 fixed (byte* tmp = cjkKOArr) {
580 cjkKOlv1 = tmp + cjkKOArrLength;
582 fixed (byte* tmp = cjkKOlv2Arr) {
587 public static void FillCJK (string name,
588 ref CodePointIndexer cjkIndexer,
589 ref byte* catTable, ref byte* lv1Table,
590 ref CodePointIndexer cjkLv2Indexer,
593 SetCJKReferences (name, ref cjkIndexer,
594 ref catTable, ref lv1Table,
595 ref cjkLv2Indexer, ref lv2Table);
600 static readonly char [] tailoringArr;
602 static readonly TailoringInfo [] tailoringInfos;
603 static object forLock = new object ();
604 public static readonly bool isReady;
606 public static bool IsReady {
607 get { return isReady; }
610 #if USE_MANAGED_RESOURCE
611 static IntPtr GetResource (string name)
615 return Assembly.GetExecutingAssembly ().GetManifestResourceInternal (name, out size, out module);
618 const int CollationTableIdxIgnorables = 0;
619 const int CollationTableIdxCategory = 1;
620 const int CollationTableIdxLevel1 = 2;
621 const int CollationTableIdxLevel2 = 3;
622 const int CollationTableIdxLevel3 = 4;
623 const int CollationTableIdxTailoringInfos = 5;
624 const int CollationTableIdxTailoringChars = 6;
625 const int CollationTableIdxCjkCHS = 7;
626 const int CollationTableIdxCjkCHT = 8;
627 const int CollationTableIdxCjkJA = 9;
628 const int CollationTableIdxCjkKO = 10;
629 const int CollationTableIdxCjkKOLv2 = 11;
631 [MethodImplAttribute (MethodImplOptions.InternalCall)]
632 static extern void load_collation_resource (int resource_index, byte** data);
634 static readonly string corlibPath = Assembly.GetExecutingAssembly ().Location;
636 const int CollationResourceCore = 0;
637 const int CollationResourceCJKCHS = 1;
638 const int CollationResourceCJKCHT = 2;
639 const int CollationResourceCJKJA = 3;
640 const int CollationResourceCJKKO = 4;
641 const int CollationResourceCJKKOlv2 = 5;
642 const int CollationResourceTailoring = 6;
644 [MethodImplAttribute (MethodImplOptions.InternalCall)]
645 static extern void load_collation_resource (string path, int resource_index, byte** data, int* size);
648 static uint UInt32FromBytePtr (byte* raw, uint idx)
650 return (uint) (raw [idx] + (raw [idx + 1] << 8)
651 + (raw [idx + 2] << 16) + (raw [idx + 3] << 24));
654 static MSCompatUnicodeTable ()
663 load_collation_resource (CollationTableIdxIgnorables, &raw);
664 ignorableFlags = raw;
665 load_collation_resource (CollationTableIdxCategory, &raw);
667 load_collation_resource (CollationTableIdxLevel1, &raw);
669 load_collation_resource (CollationTableIdxLevel2, &raw);
671 load_collation_resource (CollationTableIdxLevel3, &raw);
673 load_collation_resource (CollationTableIdxTailoringInfos, &raw);
674 tailor = (uint*) raw;
675 load_collation_resource (CollationTableIdxTailoringChars, &raw);
676 tailoring = (char*) raw;
680 uint count = tailor [idx++];
681 tailoringInfos = new TailoringInfo [count];
682 for (int i = 0; i < count; i++) {
683 int i1 = (int) tailor [idx++];
684 int i2 = (int) tailor [idx++];
685 int i3 = (int) tailor [idx++];
686 TailoringInfo ti = new TailoringInfo (
687 i1, i2, i3, tailor [idx++] != 0);
688 tailoringInfos [i] = ti;
699 #if USE_MANAGED_RESOURCE
700 IntPtr ptr = GetResource ("collation.core.bin");
701 if (ptr == IntPtr.Zero)
703 raw = (byte*) ((void*) ptr);
704 ptr = GetResource ("collation.tailoring.bin");
705 if (ptr == IntPtr.Zero)
707 tailor = (byte*) ((void*) ptr);
713 load_collation_resource (corlibPath, CollationResourceCore, &raw, &rawsize);
714 load_collation_resource (corlibPath, CollationResourceTailoring, &tailor, &trawsize);
715 load_collation_resource (corlibPath, CollationResourceTailoringChars, &tailorChars, &trawsize);
719 if (raw == null || tailor == null)
721 // check resource version
722 if (raw [0] != UUtil.ResourceVersion ||
723 tailor [0] != UUtil.ResourceVersion)
727 size = UInt32FromBytePtr (raw, idx);
729 ignorableFlags = raw + idx;
732 size = UInt32FromBytePtr (raw, idx);
734 categories = raw + idx;
737 size = UInt32FromBytePtr (raw, idx);
742 size = UInt32FromBytePtr (raw, idx);
747 size = UInt32FromBytePtr (raw, idx);
752 // size = UInt32FromBytePtr (raw, idx);
754 // widthCompat = (ushort*) (raw + idx);
760 uint count = UInt32FromBytePtr (tailor, idx);
762 tailoringInfos = new TailoringInfo [count];
763 for (int i = 0; i < count; i++) {
764 int i1 = (int) UInt32FromBytePtr (tailor, idx);
766 int i2 = (int) UInt32FromBytePtr (tailor, idx);
768 int i3 = (int) UInt32FromBytePtr (tailor, idx);
770 TailoringInfo ti = new TailoringInfo (
771 i1, i2, i3, tailor [idx++] != 0);
772 tailoringInfos [i] = ti;
776 count = UInt32FromBytePtr (tailor, idx);
779 tailoringArr = new char [count];
780 for (int i = 0; i < count; i++, idx += 2)
781 tailoringArr [i] = (char) (tailor [idx] + (tailor [idx + 1] << 8));
786 public static void FillCJK (string culture,
787 ref CodePointIndexer cjkIndexer,
790 ref CodePointIndexer lv2Indexer,
794 FillCJKCore (culture, ref cjkIndexer,
795 ref catTable, ref lv1Table,
796 ref lv2Indexer, ref lv2Table);
797 SetCJKReferences (culture, ref cjkIndexer,
798 ref catTable, ref lv1Table,
799 ref lv2Indexer, ref lv2Table);
803 static void FillCJKCore (string culture,
804 ref CodePointIndexer cjkIndexer,
805 ref byte* catTable, ref byte* lv1Table,
806 ref CodePointIndexer cjkLv2Indexer, ref byte* lv2Table)
815 catTable = cjkCHScategory;
816 lv1Table = cjkCHSlv1;
820 catTable = cjkCHTcategory;
821 lv1Table = cjkCHTlv1;
825 catTable = cjkJAcategory;
830 catTable = cjkKOcategory;
835 if (name == null || lv1Table != null)
840 #if USE_MANAGED_RESOURCE
842 String.Format ("collation.{0}.bin", name);
843 IntPtr ptr = GetResource (filename);
844 if (ptr == IntPtr.Zero)
846 raw = (byte*) ((void*) ptr);
847 idx += ResourceVersionSize;
851 case "zh-CHS": residx = CollationTableIdxCjkCHS; break;
852 case "zh-CHT": residx = CollationTableIdxCjkCHT; break;
853 case "ja": residx = CollationTableIdxCjkJA; break;
854 case "ko": residx = CollationTableIdxCjkKO; break;
858 load_collation_resource (residx, &raw);
863 case "zh-CHS": residx = CollationResourceCJKCHS; break;
864 case "zh-CHT": residx = CollationResourceCJKCHT; break;
865 case "ja": residx = CollationResourceCJKJA; break;
866 case "ko": residx = CollationResourceCJKKO; break;
870 load_collation_resource (corlibPath, residx, &raw, &size);
871 idx += ResourceVersionSize;
873 uint count = UInt32FromBytePtr (raw, idx);
875 catTable = (byte*) raw + idx;
876 lv1Table = (byte*) raw + idx + count;
880 cjkCHScategory = catTable;
881 cjkCHSlv1 = lv1Table;
884 cjkCHTcategory = catTable;
885 cjkCHTlv1 = lv1Table;
888 cjkJAcategory = catTable;
892 cjkKOcategory = catTable;
899 #if USE_MANAGED_RESOURCE
900 ptr = GetResource ("collation.cjkKOlv2.bin");
901 if (ptr == IntPtr.Zero)
903 raw = (byte*) ((void*) ptr);
904 idx = ResourceVersionSize + 4;
906 load_collation_resource (CollationTableIdxCjkKOLv2, &raw);
908 load_collation_resource (corlibPath, CollationResourceCJKKOlv2, &raw, &size);
909 idx = ResourceVersionSize + 4;
911 cjkKOlv2 = raw + idx;
919 // For "categories", 0 means no primary weight. 6 means
921 // For expanded character the value is never filled (i.e. 0).
922 // Those arrays will be split into blocks (<3400 and >F800)
923 // level 4 is computed.
925 // public static bool HasSpecialWeight (char c)
926 // { return level1 [(int) c] == 6; }
929 // autogenerated code or icall to fill array runs here