2005-07-19 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / MSCompatUnicodeTable.cs
1 using System;
2 using System.Globalization;
3 using System.IO;
4 using System.Reflection;
5
6 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
7
8 namespace Mono.Globalization.Unicode
9 {
10         internal class TailoringInfo
11         {
12                 public readonly int LCID;
13                 public readonly int TailoringIndex;
14                 public readonly int TailoringCount;
15                 public readonly bool FrenchSort;
16
17                 public TailoringInfo (int lcid, int tailoringIndex, int tailoringCount, bool frenchSort)
18                 {
19                         LCID = lcid;
20                         TailoringIndex = tailoringIndex;
21                         TailoringCount = tailoringCount;
22                         FrenchSort = frenchSort;
23                 }
24         }
25
26         internal class MSCompatUnicodeTable
27         {
28                 public static char [] TailoringValues {
29                         get { return tailorings; }
30                 }
31
32                 public static ushort [] CjkCHS {
33                         get { return cjkCHS; }
34                 }
35
36                 public static ushort [] CjkCHT {
37                         get { return cjkCHT; }
38                 }
39
40                 public static ushort [] CjkJA {
41                         get { return cjkJA; }
42                 }
43
44                 public static ushort [] CjkKO {
45                         get { return cjkKO; }
46                 }
47
48                 public static byte [] CjkKOLv2 {
49                         get { return cjkKOlv2; }
50                 }
51
52                 public static TailoringInfo GetTailoringInfo (int lcid)
53                 {
54                         for (int i = 0; i < tailoringInfos.Length; i++)
55                                 if (tailoringInfos [i].LCID == lcid)
56                                         return tailoringInfos [i];
57                         return null;
58                 }
59
60                 public static byte Categories (int cp)
61                 {
62                         return categories [UUtil.Category.ToIndex (cp)];
63                 }
64
65                 public static byte Level1 (int cp)
66                 {
67                         return level1 [UUtil.Level1.ToIndex (cp)];
68                 }
69
70                 public static byte Level2 (int cp)
71                 {
72                         return level2 [UUtil.Level2.ToIndex (cp)];
73                 }
74
75                 public static byte Level3 (int cp)
76                 {
77                         return level3 [UUtil.Level3.ToIndex (cp)];
78                 }
79
80                 public static bool IsIgnorable (int cp)
81                 {
82                         UnicodeCategory uc = Char.GetUnicodeCategory ((char) cp);
83                         // This check eliminates some extraneous code areas
84                         if (uc == UnicodeCategory.OtherNotAssigned)
85                                 return true;
86                         // Some characters in Surrogate area are ignored.
87                         if (0xD880 <= cp && cp < 0xDB80)
88                                 return true;
89                         int i = UUtil.Ignorable.ToIndex (cp);
90                         return i >= 0 && ignorableFlags [i] == 7;
91                 }
92                 // Verifier:
93                 // for (int i = 0; i <= char.MaxValue; i++)
94                 //      if (Char.GetUnicodeCategory ((char) i)
95                 //              == UnicodeCategory.OtherNotAssigned 
96                 //              && ignorableFlags [i] != 7)
97                 //              Console.WriteLine ("{0:X04}", i);
98
99                 public static bool IsIgnorableSymbol (int cp)
100                 {
101                         int i = UUtil.Ignorable.ToIndex (cp);
102                         return i >= 0 && (ignorableFlags [i] & 0x2) != 0;
103                 }
104
105                 public static bool IsIgnorableNonSpacing (int cp)
106                 {
107                         int i = UUtil.Ignorable.ToIndex (cp);
108                         return i >= 0 && (ignorableFlags [i] & 0x4) != 0;
109                         // It could be implemented this way, but the above
110                         // is faster.
111 //                      return categories [UUtil.Category.ToIndex (cp)] == 1;
112                 }
113
114                 public static int ToKanaTypeInsensitive (int i)
115                 {
116                         // Note that IgnoreKanaType does not treat half-width
117                         // katakana as equivalent to full-width ones.
118
119                         // Thus, it is so simple ;-)
120                         return (0x3041 <= i && i <= 0x3094) ? i + 0x60 : i;
121                 }
122
123                 public static int ToWidthCompat (int cp)
124                 {
125                         int i = UUtil.WidthCompat.ToIndex (cp);
126                         int v = i >= 0 ? (int) widthCompat [i] : 0;
127                         return v != 0 ? v : cp;
128                 }
129
130                 #region Level 4 properties (Kana)
131
132                 public static bool HasSpecialWeight (char c)
133                 {
134                         if (c < '\u3041')
135                                 return false;
136                         else if ('\uFF66' <= c && c < '\uFF9E')
137                                 return true;
138                         else if ('\u3300' <= c)
139                                 return false;
140                         else if (c < '\u309D')
141                                 return (c < '\u3099');
142                         else if (c < '\u3100')
143                                 return c != '\u30FB';
144                         else if (c < '\u32D0')
145                                 return false;
146                         else if (c < '\u32FF')
147                                 return true;
148                         return false;
149                 }
150
151                 // FIXME: it should be removed at some stage
152                 // (will become unused).
153                 public static byte GetJapaneseDashType (char c)
154                 {
155                         switch (c) {
156                         case '\u309D':
157                         case '\u309E':
158                         case '\u30FD':
159                         case '\u30FE':
160                         case '\uFF70':
161                                 return 4;
162                         case '\u30FC':
163                                 return 5;
164                         }
165                         return 3;
166                 }
167
168                 public static bool IsHalfWidthKana (char c)
169                 {
170                         return '\uFF66' <= c && c <= '\uFF9D';
171                 }
172
173                 public static bool IsHiragana (char c)
174                 {
175                         return '\u3041' <= c && c <= '\u3094';
176                 }
177
178                 public static bool IsJapaneseSmallLetter (char c)
179                 {
180                         if ('\uFF67' <= c && c <= '\uFF6F')
181                                 return true;
182                         if ('\u3040' < c && c < '\u30FA') {
183                                 switch (c) {
184                                 case '\u3041':
185                                 case '\u3043':
186                                 case '\u3045':
187                                 case '\u3047':
188                                 case '\u3049':
189                                 case '\u3063':
190                                 case '\u3083':
191                                 case '\u3085':
192                                 case '\u3087':
193                                 case '\u308E':
194                                 case '\u30A1':
195                                 case '\u30A3':
196                                 case '\u30A5':
197                                 case '\u30A7':
198                                 case '\u30A9':
199                                 case '\u30C3':
200                                 case '\u30E3':
201                                 case '\u30E5':
202                                 case '\u30E7':
203                                 case '\u30EE':
204                                 case '\u30F5':
205                                 case '\u30F6':
206                                         return true;
207                                 }
208                         }
209                         return false;
210                 }
211
212                 #endregion
213
214 #if GENERATE_TABLE
215
216                 public static readonly bool IsReady = true; // always
217
218                 public static void FillCJK (string name) {}
219 #else
220
221                 static readonly char [] tailorings;
222                 static readonly TailoringInfo [] tailoringInfos;
223                 internal static readonly byte [] ignorableFlags;
224                 internal static readonly byte [] categories;
225                 internal static readonly byte [] level1;
226                 internal static readonly byte [] level2;
227                 internal static readonly byte [] level3;
228                 internal static readonly ushort [] widthCompat;
229                 static ushort [] cjkCHS;
230                 static ushort [] cjkCHT;
231                 static ushort [] cjkJA;
232                 static ushort [] cjkKO;
233                 static byte [] cjkKOlv2;
234                 static string forLock = "forLock";
235
236                 public static readonly bool IsReady = false;
237
238                 static Stream GetResource (string name)
239                 {
240                         return Assembly.GetExecutingAssembly ()
241                                 .GetManifestResourceStream (name);
242                 }
243
244                 static MSCompatUnicodeTable ()
245                 {
246                         using (Stream s = GetResource ("collation.core.bin")) {
247                                 // FIXME: remove those lines later.
248                                 // actually this line should not be required,
249                                 // but when we switch from the corlib that
250                                 // does not have resources to the corlib that
251                                 // do have, it tries to read resource from
252                                 // the corlib that runtime kicked and returns
253                                 // null (because old one does not have it).
254                                 // In such cases managed collation won't work.
255                                 if (s == null)
256                                         return;
257
258                                 BinaryReader reader = new BinaryReader (s);
259                                 FillTable (reader, ref ignorableFlags);
260                                 FillTable (reader, ref categories);
261                                 FillTable (reader, ref level1);
262                                 FillTable (reader, ref level2);
263                                 FillTable (reader, ref level3);
264
265                                 int size = reader.ReadInt32 ();
266                                 widthCompat = new ushort [size];
267                                 for (int i = 0; i < size; i++)
268                                         widthCompat [i] = reader.ReadUInt16 ();
269                         }
270
271                         using (Stream s = GetResource ("collation.tailoring.bin")) {
272                                 if (s == null) // see FIXME above.
273                                         return;
274
275                                 BinaryReader reader = new BinaryReader (s);
276                                 // tailoringInfos
277                                 int count = reader.ReadInt32 ();
278                                 HasSpecialWeight ((char) count); // dummy
279                                 tailoringInfos = new TailoringInfo [count];
280                                 for (int i = 0; i < count; i++) {
281                                         TailoringInfo ti = new TailoringInfo (
282                                                 reader.ReadInt32 (),
283                                                 reader.ReadInt32 (),
284                                                 reader.ReadInt32 (),
285                                                 reader.ReadBoolean ());
286                                         tailoringInfos [i] = ti;
287                                 }
288                                 reader.ReadByte (); // dummy
289                                 IsHiragana ((char) reader.ReadByte ()); // dummy
290                                 // tailorings
291                                 count = reader.ReadInt32 ();
292                                 tailorings = new char [count];
293                                 for (int i = 0; i < count; i++)
294                                         tailorings [i] = (char) reader.ReadUInt16 ();
295                         }
296
297                         IsReady = true;
298                 }
299
300                 static void FillTable (BinaryReader reader, ref byte [] bytes)
301                 {
302                         int size = reader.ReadInt32 ();
303                         bytes = new byte [size];
304                         reader.Read (bytes, 0, size);
305                 }
306
307                 public static void FillCJK (string culture)
308                 {
309                         lock (forLock) {
310                                 FillCJKCore (culture);
311                         }
312                 }
313
314                 static void FillCJKCore (string culture)
315                 {
316                         if (!IsReady)
317                                 return;
318
319                         string name = null;
320                         ushort [] arr = null;
321                         switch (culture) {
322                         case "zh-CHS":
323                                 name = "cjkCHS";
324                                 arr = cjkCHS;
325                                 break;
326                         case "zh-CHT":
327                                 name = "cjkCHT";
328                                 arr = cjkCHT;
329                                 break;
330                         case "ja":
331                                 name = "cjkJA";
332                                 arr = cjkJA;
333                                 break;
334                         case "ko":
335                                 name = "cjkKO";
336                                 arr = cjkKO;
337                                 break;
338                         }
339
340                         if (name == null || arr != null)
341                                 return;
342
343                         using (Stream s = GetResource (String.Format ("collation.{0}.bin", name))) {
344                                 BinaryReader reader = new BinaryReader (s);
345                                 int size = reader.ReadInt32 ();
346                                 arr = new ushort [size];
347                                 for (int i = 0; i < size; i++)
348                                         arr [i] = reader.ReadUInt16 ();
349                         }
350
351                         switch (culture) {
352                         case "zh-CHS":
353                                 cjkCHS = arr;
354                                 break;
355                         case "zh-CHT":
356                                 cjkCHT = arr;
357                                 break;
358                         case "ja":
359                                 cjkJA = arr;
360                                 break;
361                         case "ko":
362                                 cjkKO = arr;
363                                 break;
364                         }
365
366                         if (name != "cjkKO")
367                                 return;
368
369                         using (Stream s = GetResource ("collation.cjkKOlv2.bin")) {
370                                 BinaryReader reader = new BinaryReader (s);
371                                 FillTable (reader, ref cjkKOlv2);
372                         }
373                 }
374         }
375 }
376 #endif
377
378
379                 // For "categories", 0 means no primary weight. 6 means 
380                 // variable weight
381                 // For expanded character the value is never filled (i.e. 0).
382                 // Those arrays will be split into blocks (<3400 and >F800)
383                 // level 4 is computed.
384
385                 // public static bool HasSpecialWeight (char c)
386                 // { return level1 [(int) c] == 6; }
387
388                 //
389                 // autogenerated code or icall to fill array runs here
390                 //
391