2 using System.Globalization;
4 using System.Reflection;
6 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
8 namespace Mono.Globalization.Unicode
10 internal class TailoringInfo
12 public readonly int LCID;
13 public readonly int TailoringIndex;
14 public readonly int TailoringCount;
15 public readonly bool FrenchSort;
17 public TailoringInfo (int lcid, int tailoringIndex, int tailoringCount, bool frenchSort)
20 TailoringIndex = tailoringIndex;
21 TailoringCount = tailoringCount;
22 FrenchSort = frenchSort;
26 internal class MSCompatUnicodeTable
28 public static char [] TailoringValues {
29 get { return tailorings; }
32 public static ushort [] CjkCHS {
33 get { return cjkCHS; }
36 public static ushort [] CjkCHT {
37 get { return cjkCHT; }
40 public static ushort [] CjkJA {
44 public static ushort [] CjkKO {
48 public static byte [] CjkKOLv2 {
49 get { return cjkKOlv2; }
52 public static TailoringInfo GetTailoringInfo (int lcid)
54 for (int i = 0; i < tailoringInfos.Length; i++)
55 if (tailoringInfos [i].LCID == lcid)
56 return tailoringInfos [i];
60 public static byte Categories (int cp)
62 return categories [UUtil.Category.ToIndex (cp)];
65 public static byte Level1 (int cp)
67 return level1 [UUtil.Level1.ToIndex (cp)];
70 public static byte Level2 (int cp)
72 return level2 [UUtil.Level2.ToIndex (cp)];
75 public static byte Level3 (int cp)
77 return level3 [UUtil.Level3.ToIndex (cp)];
80 public static bool IsIgnorable (int cp)
82 UnicodeCategory uc = Char.GetUnicodeCategory ((char) cp);
83 // This check eliminates some extraneous code areas
84 if (uc == UnicodeCategory.OtherNotAssigned)
86 // Some characters in Surrogate area are ignored.
87 if (0xD880 <= cp && cp < 0xDB80)
89 int i = UUtil.Ignorable.ToIndex (cp);
90 return i >= 0 && ignorableFlags [i] == 7;
93 // for (int i = 0; i <= char.MaxValue; i++)
94 // if (Char.GetUnicodeCategory ((char) i)
95 // == UnicodeCategory.OtherNotAssigned
96 // && ignorableFlags [i] != 7)
97 // Console.WriteLine ("{0:X04}", i);
99 public static bool IsIgnorableSymbol (int cp)
101 int i = UUtil.Ignorable.ToIndex (cp);
102 return i >= 0 && (ignorableFlags [i] & 0x2) != 0;
105 public static bool IsIgnorableNonSpacing (int cp)
107 int i = UUtil.Ignorable.ToIndex (cp);
108 return i >= 0 && (ignorableFlags [i] & 0x4) != 0;
109 // It could be implemented this way, but the above
111 // return categories [UUtil.Category.ToIndex (cp)] == 1;
114 public static int ToKanaTypeInsensitive (int i)
116 // Note that IgnoreKanaType does not treat half-width
117 // katakana as equivalent to full-width ones.
119 // Thus, it is so simple ;-)
120 return (0x3041 <= i && i <= 0x3094) ? i + 0x60 : i;
123 // Note that currently indexer optimizes this table a lot,
124 // which might have resulted in bugs.
125 public static int ToWidthCompat (int cp)
127 int i = UUtil.WidthCompat.ToIndex (cp);
128 int v = i >= 0 ? (int) widthCompat [i] : 0;
129 return v != 0 ? v : cp;
132 #region Level 4 properties (Kana)
134 public static bool HasSpecialWeight (char c)
138 else if ('\uFF66' <= c && c < '\uFF9E')
140 else if ('\u3300' <= c)
142 else if (c < '\u309D')
143 return (c < '\u3099');
144 else if (c < '\u3100')
145 return c != '\u30FB';
146 else if (c < '\u32D0')
148 else if (c < '\u32FF')
153 // FIXME: it should be removed at some stage
154 // (will become unused).
155 public static byte GetJapaneseDashType (char c)
170 public static bool IsHalfWidthKana (char c)
172 return '\uFF66' <= c && c <= '\uFF9D';
175 public static bool IsHiragana (char c)
177 return '\u3041' <= c && c <= '\u3094';
180 public static bool IsJapaneseSmallLetter (char c)
182 if ('\uFF67' <= c && c <= '\uFF6F')
184 if ('\u3040' < c && c < '\u30FA') {
218 public static readonly bool IsReady = true; // always
220 public static void FillCJK (string name) {}
223 static readonly char [] tailorings;
224 static readonly TailoringInfo [] tailoringInfos;
225 internal static readonly byte [] ignorableFlags;
226 internal static readonly byte [] categories;
227 internal static readonly byte [] level1;
228 internal static readonly byte [] level2;
229 internal static readonly byte [] level3;
230 internal static readonly ushort [] widthCompat;
231 static ushort [] cjkCHS;
232 static ushort [] cjkCHT;
233 static ushort [] cjkJA;
234 static ushort [] cjkKO;
235 static byte [] cjkKOlv2;
236 static string forLock = "forLock";
238 public static readonly bool IsReady = false;
240 static Stream GetResource (string name)
242 return Assembly.GetExecutingAssembly ()
243 .GetManifestResourceStream (name);
246 static MSCompatUnicodeTable ()
248 using (Stream s = GetResource ("collation.core.bin")) {
249 // FIXME: remove those lines later.
250 // actually this line should not be required,
251 // but when we switch from the corlib that
252 // does not have resources to the corlib that
253 // do have, it tries to read resource from
254 // the corlib that runtime kicked and returns
255 // null (because old one does not have it).
256 // In such cases managed collation won't work.
260 BinaryReader reader = new BinaryReader (s);
261 FillTable (reader, ref ignorableFlags);
262 FillTable (reader, ref categories);
263 FillTable (reader, ref level1);
264 FillTable (reader, ref level2);
265 FillTable (reader, ref level3);
267 int size = reader.ReadInt32 ();
268 widthCompat = new ushort [size];
269 for (int i = 0; i < size; i++)
270 widthCompat [i] = reader.ReadUInt16 ();
273 using (Stream s = GetResource ("collation.tailoring.bin")) {
274 if (s == null) // see FIXME above.
277 BinaryReader reader = new BinaryReader (s);
279 int count = reader.ReadInt32 ();
280 HasSpecialWeight ((char) count); // dummy
281 tailoringInfos = new TailoringInfo [count];
282 for (int i = 0; i < count; i++) {
283 TailoringInfo ti = new TailoringInfo (
287 reader.ReadBoolean ());
288 tailoringInfos [i] = ti;
290 reader.ReadByte (); // dummy
291 IsHiragana ((char) reader.ReadByte ()); // dummy
293 count = reader.ReadInt32 ();
294 tailorings = new char [count];
295 for (int i = 0; i < count; i++)
296 tailorings [i] = (char) reader.ReadUInt16 ();
302 static void FillTable (BinaryReader reader, ref byte [] bytes)
304 int size = reader.ReadInt32 ();
305 bytes = new byte [size];
306 reader.Read (bytes, 0, size);
309 public static void FillCJK (string culture)
312 FillCJKCore (culture);
316 static void FillCJKCore (string culture)
322 ushort [] arr = null;
342 if (name == null || arr != null)
345 using (Stream s = GetResource (String.Format ("collation.{0}.bin", name))) {
346 BinaryReader reader = new BinaryReader (s);
347 int size = reader.ReadInt32 ();
348 arr = new ushort [size];
349 for (int i = 0; i < size; i++)
350 arr [i] = reader.ReadUInt16 ();
371 using (Stream s = GetResource ("collation.cjkKOlv2.bin")) {
372 BinaryReader reader = new BinaryReader (s);
373 FillTable (reader, ref cjkKOlv2);
381 // For "categories", 0 means no primary weight. 6 means
383 // For expanded character the value is never filled (i.e. 0).
384 // Those arrays will be split into blocks (<3400 and >F800)
385 // level 4 is computed.
387 // public static bool HasSpecialWeight (char c)
388 // { return level1 [(int) c] == 6; }
391 // autogenerated code or icall to fill array runs here