2005-07-15 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / MSCompatUnicodeTable.cs
1 using System;
2 using System.Globalization;
3 using System.IO;
4 using System.Reflection;
5
6 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
7
8 namespace Mono.Globalization.Unicode
9 {
10         internal class TailoringInfo
11         {
12                 public readonly int LCID;
13                 public readonly int TailoringIndex;
14                 public readonly int TailoringCount;
15                 public readonly bool FrenchSort;
16
17                 public TailoringInfo (int lcid, int tailoringIndex, int tailoringCount, bool frenchSort)
18                 {
19                         LCID = lcid;
20                         TailoringIndex = tailoringIndex;
21                         TailoringCount = tailoringCount;
22                         FrenchSort = frenchSort;
23                 }
24         }
25
26         internal class MSCompatUnicodeTable
27         {
28                 public static char [] TailoringValues {
29                         get { return tailorings; }
30                 }
31
32                 public static ushort [] CjkCHS {
33                         get { return cjkCHS; }
34                 }
35
36                 public static ushort [] CjkCHT {
37                         get { return cjkCHT; }
38                 }
39
40                 public static ushort [] CjkJA {
41                         get { return cjkJA; }
42                 }
43
44                 public static ushort [] CjkKO {
45                         get { return cjkKO; }
46                 }
47
48                 public static byte [] CjkKOLv2 {
49                         get { return cjkKOlv2; }
50                 }
51
52                 public static TailoringInfo GetTailoringInfo (int lcid)
53                 {
54                         for (int i = 0; i < tailoringInfos.Length; i++)
55                                 if (tailoringInfos [i].LCID == lcid)
56                                         return tailoringInfos [i];
57                         return null;
58                 }
59
60                 public static byte Categories (int cp)
61                 {
62                         return categories [UUtil.Category.ToIndex (cp)];
63                 }
64
65                 public static byte Level1 (int cp)
66                 {
67                         return level1 [UUtil.Level1.ToIndex (cp)];
68                 }
69
70                 public static byte Level2 (int cp)
71                 {
72                         return level2 [UUtil.Level2.ToIndex (cp)];
73                 }
74
75                 public static byte Level3 (int cp)
76                 {
77                         return level3 [UUtil.Level3.ToIndex (cp)];
78                 }
79
80                 public static bool IsIgnorable (int cp)
81                 {
82                         UnicodeCategory uc = Char.GetUnicodeCategory ((char) cp);
83                         // This check eliminates some extraneous code areas
84                         if (uc == UnicodeCategory.OtherNotAssigned)
85                                 return true;
86                         // Some characters in Surrogate area are ignored.
87                         if (0xD880 <= cp && cp < 0xDB80)
88                                 return true;
89                         int i = UUtil.Ignorable.ToIndex (cp);
90                         return i >= 0 && ignorableFlags [i] == 7;
91                 }
92                 // Verifier:
93                 // for (int i = 0; i <= char.MaxValue; i++)
94                 //      if (Char.GetUnicodeCategory ((char) i)
95                 //              == UnicodeCategory.OtherNotAssigned 
96                 //              && ignorableFlags [i] != 7)
97                 //              Console.WriteLine ("{0:X04}", i);
98
99                 public static bool IsIgnorableSymbol (int cp)
100                 {
101                         int i = UUtil.Ignorable.ToIndex (cp);
102                         return i >= 0 && (ignorableFlags [i] & 0x2) != 0;
103                 }
104
105                 public static bool IsIgnorableNonSpacing (int cp)
106                 {
107                         int i = UUtil.Ignorable.ToIndex (cp);
108                         return i >= 0 && (ignorableFlags [i] & 0x4) != 0;
109                         // It could be implemented this way, but the above
110                         // is faster.
111 //                      return categories [UUtil.Category.ToIndex (cp)] == 1;
112                 }
113
114                 public static int ToKanaTypeInsensitive (int i)
115                 {
116                         // Note that IgnoreKanaType does not treat half-width
117                         // katakana as equivalent to full-width ones.
118
119                         // Thus, it is so simple ;-)
120                         return (0x3041 <= i && i <= 0x3094) ? i + 0x60 : i;
121                 }
122
123                 public static int ToWidthCompat (int cp)
124                 {
125                         int i = UUtil.WidthCompat.ToIndex (cp);
126                         int v = i >= 0 ? (int) widthCompat [i] : 0;
127                         return v != 0 ? v : cp;
128                 }
129
130                 #region Level 4 properties (Kana)
131
132                 public static bool HasSpecialWeight (char c)
133                 {
134                         if (c < '\u3041')
135                                 return false;
136                         else if ('\uFF66' <= c && c < '\uFF9E')
137                                 return true;
138                         else if ('\u3300' <= c)
139                                 return false;
140                         else if (c < '\u309D')
141                                 return (c < '\u3099');
142                         else if (c < '\u3100')
143                                 return c != '\u30FB';
144                         else if (c < '\u32D0')
145                                 return false;
146                         else if (c < '\u32FF')
147                                 return true;
148                         return false;
149                 }
150
151                 // FIXME: it should be removed at some stage
152                 // (will become unused).
153                 public static byte GetJapaneseDashType (char c)
154                 {
155                         switch (c) {
156                         case '\u309D':
157                         case '\u309E':
158                         case '\u30FD':
159                         case '\u30FE':
160                         case '\uFF70':
161                                 return 4;
162                         case '\u30FC':
163                                 return 5;
164                         }
165                         return 3;
166                 }
167
168                 public static bool IsHalfWidthKana (char c)
169                 {
170                         return '\uFF66' <= c && c <= '\uFF9D';
171                 }
172
173                 public static bool IsHiragana (char c)
174                 {
175                         return '\u3041' <= c && c <= '\u3094';
176                 }
177
178                 public static bool IsJapaneseSmallLetter (char c)
179                 {
180                         if ('\uFF67' <= c && c <= '\uFF6F')
181                                 return true;
182                         if ('\u3040' < c && c < '\u30FA') {
183                                 switch (c) {
184                                 case '\u3041':
185                                 case '\u3043':
186                                 case '\u3045':
187                                 case '\u3047':
188                                 case '\u3049':
189                                 case '\u3063':
190                                 case '\u3083':
191                                 case '\u3085':
192                                 case '\u3087':
193                                 case '\u308E':
194                                 case '\u30A1':
195                                 case '\u30A3':
196                                 case '\u30A5':
197                                 case '\u30A7':
198                                 case '\u30A9':
199                                 case '\u30C3':
200                                 case '\u30E3':
201                                 case '\u30E5':
202                                 case '\u30E7':
203                                 case '\u30EE':
204                                 case '\u30F5':
205                                 case '\u30F6':
206                                         return true;
207                                 }
208                         }
209                         return false;
210                 }
211
212                 #endregion
213
214 #if GENERATE_TABLE
215
216                 public static readonly bool IsReady = true; // always
217
218                 public static void FillCJK (string name) {}
219 #else
220
221                 static readonly char [] tailorings;
222                 static readonly TailoringInfo [] tailoringInfos;
223                 internal static readonly byte [] ignorableFlags;
224                 internal static readonly byte [] categories;
225                 internal static readonly byte [] level1;
226                 internal static readonly byte [] level2;
227                 internal static readonly byte [] level3;
228                 internal static readonly ushort [] widthCompat;
229                 static ushort [] cjkCHS;
230                 static ushort [] cjkCHT;
231                 static ushort [] cjkJA;
232                 static ushort [] cjkKO;
233                 static byte [] cjkKOlv2;
234                 static string forLock = "forLock";
235
236                 public static readonly bool IsReady = false;
237
238                 static Stream GetResource (string name)
239                 {
240                         return Assembly.GetExecutingAssembly ()
241                                 .GetManifestResourceStream (name);
242                 }
243
244                 static MSCompatUnicodeTable ()
245                 {
246                         using (Stream s = GetResource ("collation.core.bin")) {
247                                 BinaryReader reader = new BinaryReader (s);
248                                 FillTable (reader, ref ignorableFlags);
249                                 FillTable (reader, ref categories);
250                                 FillTable (reader, ref level1);
251                                 FillTable (reader, ref level2);
252                                 FillTable (reader, ref level3);
253
254                                 int size = reader.ReadInt32 ();
255                                 widthCompat = new ushort [size];
256                                 for (int i = 0; i < size; i++)
257                                         widthCompat [i] = reader.ReadUInt16 ();
258                         }
259
260                         using (Stream s = GetResource ("collation.tailoring.bin")) {
261                                 BinaryReader reader = new BinaryReader (s);
262                                 // tailoringInfos
263                                 int count = reader.ReadInt32 ();
264                                 HasSpecialWeight ((char) count); // dummy
265                                 tailoringInfos = new TailoringInfo [count];
266                                 for (int i = 0; i < count; i++) {
267                                         TailoringInfo ti = new TailoringInfo (
268                                                 reader.ReadInt32 (),
269                                                 reader.ReadInt32 (),
270                                                 reader.ReadInt32 (),
271                                                 reader.ReadBoolean ());
272                                         tailoringInfos [i] = ti;
273                                 }
274                                 reader.ReadByte (); // dummy
275                                 IsHiragana ((char) reader.ReadByte ()); // dummy
276                                 // tailorings
277                                 count = reader.ReadInt32 ();
278                                 tailorings = new char [count];
279                                 for (int i = 0; i < count; i++)
280                                         tailorings [i] = (char) reader.ReadUInt16 ();
281                         }
282
283                         IsReady = true;
284                 }
285
286                 static void FillTable (BinaryReader reader, ref byte [] bytes)
287                 {
288                         int size = reader.ReadInt32 ();
289                         bytes = new byte [size];
290                         reader.Read (bytes, 0, size);
291                 }
292
293                 public static void FillCJK (string culture)
294                 {
295                         lock (forLock) {
296                                 FillCJKCore (culture);
297                         }
298                 }
299
300                 static void FillCJKCore (string culture)
301                 {
302                         string name = null;
303                         ushort [] arr = null;
304                         switch (culture) {
305                         case "zh-CHS":
306                                 name = "cjkCHS";
307                                 arr = cjkCHS;
308                                 break;
309                         case "zh-CHT":
310                                 name = "cjkCHT";
311                                 arr = cjkCHT;
312                                 break;
313                         case "ja":
314                                 name = "cjkJA";
315                                 arr = cjkJA;
316                                 break;
317                         case "ko":
318                                 name = "cjkKO";
319                                 arr = cjkKO;
320                                 break;
321                         }
322
323                         if (name == null || arr != null)
324                                 return;
325
326                         using (Stream s = GetResource (String.Format ("collation.{0}.bin", name))) {
327                                 BinaryReader reader = new BinaryReader (s);
328                                 int size = reader.ReadInt32 ();
329                                 arr = new ushort [size];
330                                 for (int i = 0; i < size; i++)
331                                         arr [i] = reader.ReadUInt16 ();
332                         }
333
334                         switch (culture) {
335                         case "zh-CHS":
336                                 cjkCHS = arr;
337                                 break;
338                         case "zh-CHT":
339                                 cjkCHT = arr;
340                                 break;
341                         case "ja":
342                                 cjkJA = arr;
343                                 break;
344                         case "ko":
345                                 cjkKO = arr;
346                                 break;
347                         }
348
349                         if (name != "cjkKO")
350                                 return;
351
352                         using (Stream s = GetResource ("collation.cjkKOlv2.bin")) {
353                                 BinaryReader reader = new BinaryReader (s);
354                                 FillTable (reader, ref cjkKOlv2);
355                         }
356                 }
357         }
358 }
359 #endif
360
361
362                 // For "categories", 0 means no primary weight. 6 means 
363                 // variable weight
364                 // For expanded character the value is never filled (i.e. 0).
365                 // Those arrays will be split into blocks (<3400 and >F800)
366                 // level 4 is computed.
367
368                 // public static bool HasSpecialWeight (char c)
369                 // { return level1 [(int) c] == 6; }
370
371                 //
372                 // autogenerated code or icall to fill array runs here
373                 //
374