2 using System.Globalization;
4 using System.Reflection;
6 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
8 namespace Mono.Globalization.Unicode
10 internal class TailoringInfo
12 public readonly int LCID;
13 public readonly int TailoringIndex;
14 public readonly int TailoringCount;
15 public readonly bool FrenchSort;
17 public TailoringInfo (int lcid, int tailoringIndex, int tailoringCount, bool frenchSort)
20 TailoringIndex = tailoringIndex;
21 TailoringCount = tailoringCount;
22 FrenchSort = frenchSort;
26 internal class MSCompatUnicodeTable
28 public static char [] TailoringValues {
29 get { return tailorings; }
32 public static ushort [] CjkCHS {
33 get { return cjkCHS; }
36 public static ushort [] CjkCHT {
37 get { return cjkCHT; }
40 public static ushort [] CjkJA {
44 public static ushort [] CjkKO {
48 public static byte [] CjkKOLv2 {
49 get { return cjkKOlv2; }
52 public static TailoringInfo GetTailoringInfo (int lcid)
54 for (int i = 0; i < tailoringInfos.Length; i++)
55 if (tailoringInfos [i].LCID == lcid)
56 return tailoringInfos [i];
60 public static byte Categories (int cp)
62 return categories [UUtil.Category.ToIndex (cp)];
65 public static byte Level1 (int cp)
67 return level1 [UUtil.Level1.ToIndex (cp)];
70 public static byte Level2 (int cp)
72 return level2 [UUtil.Level2.ToIndex (cp)];
75 public static byte Level3 (int cp)
77 return level3 [UUtil.Level3.ToIndex (cp)];
80 public static bool IsIgnorable (int cp)
82 UnicodeCategory uc = Char.GetUnicodeCategory ((char) cp);
83 // This check eliminates some extraneous code areas
84 if (uc == UnicodeCategory.OtherNotAssigned)
86 // Some characters in Surrogate area are ignored.
87 if (0xD880 <= cp && cp < 0xDB80)
89 int i = UUtil.Ignorable.ToIndex (cp);
90 return i >= 0 && ignorableFlags [i] == 7;
93 // for (int i = 0; i <= char.MaxValue; i++)
94 // if (Char.GetUnicodeCategory ((char) i)
95 // == UnicodeCategory.OtherNotAssigned
96 // && ignorableFlags [i] != 7)
97 // Console.WriteLine ("{0:X04}", i);
99 public static bool IsIgnorableSymbol (int cp)
101 int i = UUtil.Ignorable.ToIndex (cp);
102 return i >= 0 && (ignorableFlags [i] & 0x2) != 0;
105 public static bool IsIgnorableNonSpacing (int cp)
107 int i = UUtil.Ignorable.ToIndex (cp);
108 return i >= 0 && (ignorableFlags [i] & 0x4) != 0;
109 // It could be implemented this way, but the above
111 // return categories [UUtil.Category.ToIndex (cp)] == 1;
114 public static int ToKanaTypeInsensitive (int i)
116 // Note that IgnoreKanaType does not treat half-width
117 // katakana as equivalent to full-width ones.
119 // Thus, it is so simple ;-)
120 return (0x3041 <= i && i <= 0x3094) ? i + 0x60 : i;
123 public static int ToWidthCompat (int cp)
125 int i = UUtil.WidthCompat.ToIndex (cp);
126 int v = i >= 0 ? (int) widthCompat [i] : 0;
127 return v != 0 ? v : cp;
130 #region Level 4 properties (Kana)
132 public static bool HasSpecialWeight (char c)
136 else if ('\uFF66' <= c && c < '\uFF9E')
138 else if ('\u3300' <= c)
140 else if (c < '\u309D')
141 return (c < '\u3099');
142 else if (c < '\u3100')
143 return c != '\u30FB';
144 else if (c < '\u32D0')
146 else if (c < '\u32FF')
151 // FIXME: it should be removed at some stage
152 // (will become unused).
153 public static byte GetJapaneseDashType (char c)
168 public static bool IsHalfWidthKana (char c)
170 return '\uFF66' <= c && c <= '\uFF9D';
173 public static bool IsHiragana (char c)
175 return '\u3041' <= c && c <= '\u3094';
178 public static bool IsJapaneseSmallLetter (char c)
180 if ('\uFF67' <= c && c <= '\uFF6F')
182 if ('\u3040' < c && c < '\u30FA') {
216 public static readonly bool IsReady = true; // always
218 public static void FillCJK (string name) {}
221 static readonly char [] tailorings;
222 static readonly TailoringInfo [] tailoringInfos;
223 internal static readonly byte [] ignorableFlags;
224 internal static readonly byte [] categories;
225 internal static readonly byte [] level1;
226 internal static readonly byte [] level2;
227 internal static readonly byte [] level3;
228 internal static readonly ushort [] widthCompat;
229 static ushort [] cjkCHS;
230 static ushort [] cjkCHT;
231 static ushort [] cjkJA;
232 static ushort [] cjkKO;
233 static byte [] cjkKOlv2;
234 static string forLock = "forLock";
236 public static readonly bool IsReady = false;
238 static Stream GetResource (string name)
240 return Assembly.GetExecutingAssembly ()
241 .GetManifestResourceStream (name);
244 static MSCompatUnicodeTable ()
246 using (Stream s = GetResource ("collation.core.bin")) {
247 BinaryReader reader = new BinaryReader (s);
248 FillTable (reader, ref ignorableFlags);
249 FillTable (reader, ref categories);
250 FillTable (reader, ref level1);
251 FillTable (reader, ref level2);
252 FillTable (reader, ref level3);
254 int size = reader.ReadInt32 ();
255 widthCompat = new ushort [size];
256 for (int i = 0; i < size; i++)
257 widthCompat [i] = reader.ReadUInt16 ();
260 using (Stream s = GetResource ("collation.tailoring.bin")) {
261 BinaryReader reader = new BinaryReader (s);
263 int count = reader.ReadInt32 ();
264 HasSpecialWeight ((char) count); // dummy
265 tailoringInfos = new TailoringInfo [count];
266 for (int i = 0; i < count; i++) {
267 TailoringInfo ti = new TailoringInfo (
271 reader.ReadBoolean ());
272 tailoringInfos [i] = ti;
274 reader.ReadByte (); // dummy
275 IsHiragana ((char) reader.ReadByte ()); // dummy
277 count = reader.ReadInt32 ();
278 tailorings = new char [count];
279 for (int i = 0; i < count; i++)
280 tailorings [i] = (char) reader.ReadUInt16 ();
286 static void FillTable (BinaryReader reader, ref byte [] bytes)
288 int size = reader.ReadInt32 ();
289 bytes = new byte [size];
290 reader.Read (bytes, 0, size);
293 public static void FillCJK (string culture)
296 FillCJKCore (culture);
300 static void FillCJKCore (string culture)
303 ushort [] arr = null;
323 if (name == null || arr != null)
326 using (Stream s = GetResource (String.Format ("collation.{0}.bin", name))) {
327 BinaryReader reader = new BinaryReader (s);
328 int size = reader.ReadInt32 ();
329 arr = new ushort [size];
330 for (int i = 0; i < size; i++)
331 arr [i] = reader.ReadUInt16 ();
352 using (Stream s = GetResource ("collation.cjkKOlv2.bin")) {
353 BinaryReader reader = new BinaryReader (s);
354 FillTable (reader, ref cjkKOlv2);
362 // For "categories", 0 means no primary weight. 6 means
364 // For expanded character the value is never filled (i.e. 0).
365 // Those arrays will be split into blocks (<3400 and >F800)
366 // level 4 is computed.
368 // public static bool HasSpecialWeight (char c)
369 // { return level1 [(int) c] == 6; }
372 // autogenerated code or icall to fill array runs here