2005-07-21 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / MSCompatUnicodeTable.cs
1 using System;
2 using System.Globalization;
3 using System.IO;
4 using System.Reflection;
5
6 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
7
8 namespace Mono.Globalization.Unicode
9 {
10         internal class TailoringInfo
11         {
12                 public readonly int LCID;
13                 public readonly int TailoringIndex;
14                 public readonly int TailoringCount;
15                 public readonly bool FrenchSort;
16
17                 public TailoringInfo (int lcid, int tailoringIndex, int tailoringCount, bool frenchSort)
18                 {
19                         LCID = lcid;
20                         TailoringIndex = tailoringIndex;
21                         TailoringCount = tailoringCount;
22                         FrenchSort = frenchSort;
23                 }
24         }
25
26         internal class MSCompatUnicodeTable
27         {
28                 public static char [] TailoringValues {
29                         get { return tailorings; }
30                 }
31
32                 public static ushort [] CjkCHS {
33                         get { return cjkCHS; }
34                 }
35
36                 public static ushort [] CjkCHT {
37                         get { return cjkCHT; }
38                 }
39
40                 public static ushort [] CjkJA {
41                         get { return cjkJA; }
42                 }
43
44                 public static ushort [] CjkKO {
45                         get { return cjkKO; }
46                 }
47
48                 public static byte [] CjkKOLv2 {
49                         get { return cjkKOlv2; }
50                 }
51
52                 public static TailoringInfo GetTailoringInfo (int lcid)
53                 {
54                         for (int i = 0; i < tailoringInfos.Length; i++)
55                                 if (tailoringInfos [i].LCID == lcid)
56                                         return tailoringInfos [i];
57                         return null;
58                 }
59
60                 public static byte Categories (int cp)
61                 {
62                         return categories [UUtil.Category.ToIndex (cp)];
63                 }
64
65                 public static byte Level1 (int cp)
66                 {
67                         return level1 [UUtil.Level1.ToIndex (cp)];
68                 }
69
70                 public static byte Level2 (int cp)
71                 {
72                         return level2 [UUtil.Level2.ToIndex (cp)];
73                 }
74
75                 public static byte Level3 (int cp)
76                 {
77                         return level3 [UUtil.Level3.ToIndex (cp)];
78                 }
79
80                 public static bool IsIgnorable (int cp)
81                 {
82                         UnicodeCategory uc = Char.GetUnicodeCategory ((char) cp);
83                         // This check eliminates some extraneous code areas
84                         if (uc == UnicodeCategory.OtherNotAssigned)
85                                 return true;
86                         // Some characters in Surrogate area are ignored.
87                         if (0xD880 <= cp && cp < 0xDB80)
88                                 return true;
89                         int i = UUtil.Ignorable.ToIndex (cp);
90                         return i >= 0 && ignorableFlags [i] == 7;
91                 }
92                 // Verifier:
93                 // for (int i = 0; i <= char.MaxValue; i++)
94                 //      if (Char.GetUnicodeCategory ((char) i)
95                 //              == UnicodeCategory.OtherNotAssigned 
96                 //              && ignorableFlags [i] != 7)
97                 //              Console.WriteLine ("{0:X04}", i);
98
99                 public static bool IsIgnorableSymbol (int cp)
100                 {
101                         int i = UUtil.Ignorable.ToIndex (cp);
102                         return i >= 0 && (ignorableFlags [i] & 0x2) != 0;
103                 }
104
105                 public static bool IsIgnorableNonSpacing (int cp)
106                 {
107                         int i = UUtil.Ignorable.ToIndex (cp);
108                         return i >= 0 && (ignorableFlags [i] & 0x4) != 0;
109                         // It could be implemented this way, but the above
110                         // is faster.
111 //                      return categories [UUtil.Category.ToIndex (cp)] == 1;
112                 }
113
114                 public static int ToKanaTypeInsensitive (int i)
115                 {
116                         // Note that IgnoreKanaType does not treat half-width
117                         // katakana as equivalent to full-width ones.
118
119                         // Thus, it is so simple ;-)
120                         return (0x3041 <= i && i <= 0x3094) ? i + 0x60 : i;
121                 }
122
123                 // Note that currently indexer optimizes this table a lot,
124                 // which might have resulted in bugs.
125                 public static int ToWidthCompat (int cp)
126                 {
127                         int i = UUtil.WidthCompat.ToIndex (cp);
128                         int v = i >= 0 ? (int) widthCompat [i] : 0;
129                         return v != 0 ? v : cp;
130                 }
131
132                 #region Level 4 properties (Kana)
133
134                 public static bool HasSpecialWeight (char c)
135                 {
136                         if (c < '\u3041')
137                                 return false;
138                         else if ('\uFF66' <= c && c < '\uFF9E')
139                                 return true;
140                         else if ('\u3300' <= c)
141                                 return false;
142                         else if (c < '\u309D')
143                                 return (c < '\u3099');
144                         else if (c < '\u3100')
145                                 return c != '\u30FB';
146                         else if (c < '\u32D0')
147                                 return false;
148                         else if (c < '\u32FF')
149                                 return true;
150                         return false;
151                 }
152
153                 // FIXME: it should be removed at some stage
154                 // (will become unused).
155                 public static byte GetJapaneseDashType (char c)
156                 {
157                         switch (c) {
158                         case '\u309D':
159                         case '\u309E':
160                         case '\u30FD':
161                         case '\u30FE':
162                         case '\uFF70':
163                                 return 4;
164                         case '\u30FC':
165                                 return 5;
166                         }
167                         return 3;
168                 }
169
170                 public static bool IsHalfWidthKana (char c)
171                 {
172                         return '\uFF66' <= c && c <= '\uFF9D';
173                 }
174
175                 public static bool IsHiragana (char c)
176                 {
177                         return '\u3041' <= c && c <= '\u3094';
178                 }
179
180                 public static bool IsJapaneseSmallLetter (char c)
181                 {
182                         if ('\uFF67' <= c && c <= '\uFF6F')
183                                 return true;
184                         if ('\u3040' < c && c < '\u30FA') {
185                                 switch (c) {
186                                 case '\u3041':
187                                 case '\u3043':
188                                 case '\u3045':
189                                 case '\u3047':
190                                 case '\u3049':
191                                 case '\u3063':
192                                 case '\u3083':
193                                 case '\u3085':
194                                 case '\u3087':
195                                 case '\u308E':
196                                 case '\u30A1':
197                                 case '\u30A3':
198                                 case '\u30A5':
199                                 case '\u30A7':
200                                 case '\u30A9':
201                                 case '\u30C3':
202                                 case '\u30E3':
203                                 case '\u30E5':
204                                 case '\u30E7':
205                                 case '\u30EE':
206                                 case '\u30F5':
207                                 case '\u30F6':
208                                         return true;
209                                 }
210                         }
211                         return false;
212                 }
213
214                 #endregion
215
216 #if GENERATE_TABLE
217
218                 public static readonly bool IsReady = true; // always
219
220                 public static void FillCJK (string name) {}
221 #else
222
223                 static readonly char [] tailorings;
224                 static readonly TailoringInfo [] tailoringInfos;
225                 internal static readonly byte [] ignorableFlags;
226                 internal static readonly byte [] categories;
227                 internal static readonly byte [] level1;
228                 internal static readonly byte [] level2;
229                 internal static readonly byte [] level3;
230                 internal static readonly ushort [] widthCompat;
231                 static ushort [] cjkCHS;
232                 static ushort [] cjkCHT;
233                 static ushort [] cjkJA;
234                 static ushort [] cjkKO;
235                 static byte [] cjkKOlv2;
236                 static string forLock = "forLock";
237
238                 public static readonly bool IsReady = false;
239
240                 static Stream GetResource (string name)
241                 {
242                         return Assembly.GetExecutingAssembly ()
243                                 .GetManifestResourceStream (name);
244                 }
245
246                 static MSCompatUnicodeTable ()
247                 {
248                         using (Stream s = GetResource ("collation.core.bin")) {
249                                 // FIXME: remove those lines later.
250                                 // actually this line should not be required,
251                                 // but when we switch from the corlib that
252                                 // does not have resources to the corlib that
253                                 // do have, it tries to read resource from
254                                 // the corlib that runtime kicked and returns
255                                 // null (because old one does not have it).
256                                 // In such cases managed collation won't work.
257                                 if (s == null)
258                                         return;
259
260                                 BinaryReader reader = new BinaryReader (s);
261                                 FillTable (reader, ref ignorableFlags);
262                                 FillTable (reader, ref categories);
263                                 FillTable (reader, ref level1);
264                                 FillTable (reader, ref level2);
265                                 FillTable (reader, ref level3);
266
267                                 int size = reader.ReadInt32 ();
268                                 widthCompat = new ushort [size];
269                                 for (int i = 0; i < size; i++)
270                                         widthCompat [i] = reader.ReadUInt16 ();
271                         }
272
273                         using (Stream s = GetResource ("collation.tailoring.bin")) {
274                                 if (s == null) // see FIXME above.
275                                         return;
276
277                                 BinaryReader reader = new BinaryReader (s);
278                                 // tailoringInfos
279                                 int count = reader.ReadInt32 ();
280                                 HasSpecialWeight ((char) count); // dummy
281                                 tailoringInfos = new TailoringInfo [count];
282                                 for (int i = 0; i < count; i++) {
283                                         TailoringInfo ti = new TailoringInfo (
284                                                 reader.ReadInt32 (),
285                                                 reader.ReadInt32 (),
286                                                 reader.ReadInt32 (),
287                                                 reader.ReadBoolean ());
288                                         tailoringInfos [i] = ti;
289                                 }
290                                 reader.ReadByte (); // dummy
291                                 IsHiragana ((char) reader.ReadByte ()); // dummy
292                                 // tailorings
293                                 count = reader.ReadInt32 ();
294                                 tailorings = new char [count];
295                                 for (int i = 0; i < count; i++)
296                                         tailorings [i] = (char) reader.ReadUInt16 ();
297                         }
298
299                         IsReady = true;
300                 }
301
302                 static void FillTable (BinaryReader reader, ref byte [] bytes)
303                 {
304                         int size = reader.ReadInt32 ();
305                         bytes = new byte [size];
306                         reader.Read (bytes, 0, size);
307                 }
308
309                 public static void FillCJK (string culture)
310                 {
311                         lock (forLock) {
312                                 FillCJKCore (culture);
313                         }
314                 }
315
316                 static void FillCJKCore (string culture)
317                 {
318                         if (!IsReady)
319                                 return;
320
321                         string name = null;
322                         ushort [] arr = null;
323                         switch (culture) {
324                         case "zh-CHS":
325                                 name = "cjkCHS";
326                                 arr = cjkCHS;
327                                 break;
328                         case "zh-CHT":
329                                 name = "cjkCHT";
330                                 arr = cjkCHT;
331                                 break;
332                         case "ja":
333                                 name = "cjkJA";
334                                 arr = cjkJA;
335                                 break;
336                         case "ko":
337                                 name = "cjkKO";
338                                 arr = cjkKO;
339                                 break;
340                         }
341
342                         if (name == null || arr != null)
343                                 return;
344
345                         using (Stream s = GetResource (String.Format ("collation.{0}.bin", name))) {
346                                 BinaryReader reader = new BinaryReader (s);
347                                 int size = reader.ReadInt32 ();
348                                 arr = new ushort [size];
349                                 for (int i = 0; i < size; i++)
350                                         arr [i] = reader.ReadUInt16 ();
351                         }
352
353                         switch (culture) {
354                         case "zh-CHS":
355                                 cjkCHS = arr;
356                                 break;
357                         case "zh-CHT":
358                                 cjkCHT = arr;
359                                 break;
360                         case "ja":
361                                 cjkJA = arr;
362                                 break;
363                         case "ko":
364                                 cjkKO = arr;
365                                 break;
366                         }
367
368                         if (name != "cjkKO")
369                                 return;
370
371                         using (Stream s = GetResource ("collation.cjkKOlv2.bin")) {
372                                 BinaryReader reader = new BinaryReader (s);
373                                 FillTable (reader, ref cjkKOlv2);
374                         }
375                 }
376         }
377 }
378 #endif
379
380
381                 // For "categories", 0 means no primary weight. 6 means 
382                 // variable weight
383                 // For expanded character the value is never filled (i.e. 0).
384                 // Those arrays will be split into blocks (<3400 and >F800)
385                 // level 4 is computed.
386
387                 // public static bool HasSpecialWeight (char c)
388                 // { return level1 [(int) c] == 6; }
389
390                 //
391                 // autogenerated code or icall to fill array runs here
392                 //
393