Extended CloneCopy() test with ReadOnly field
[mono.git] / mcs / class / I18N / CJK / GB18030Source.cs
1 //
2 // GB18030Encoding.cs
3 //
4 // Author:
5 //      Atsushi Enomoto  <atsushi@ximian.com>
6 //
7 using System;
8 using System.Globalization;
9 using System.Reflection;
10 using System.Text;
11 #if BUILD_GENERATOR
12 using System.IO;
13 using System.Xml;
14 #endif
15
16 namespace I18N.CJK
17 {
18         internal unsafe class GB18030Source
19         {
20                 class GB18030Map
21                 {
22                         public readonly int UStart;
23                         public readonly int UEnd;
24                         public readonly long GStart;
25                         public readonly long GEnd;
26                         public readonly bool Dummy; // This range is actually not usable.
27
28                         public GB18030Map (
29                                 int ustart, int uend, long gstart, long gend, bool dummy)
30                         {
31                                 this.UStart = ustart;
32                                 this.UEnd = uend;
33                                 this.GStart = gstart;
34                                 this.GEnd = gend;
35                                 this.Dummy = dummy;
36                         }
37                 }
38
39                 private GB18030Source ()
40                 {
41                 }
42
43                 static readonly byte *gbx2uni;
44                 static readonly byte *uni2gbx;
45                 static readonly int gbx2uniSize, uni2gbxSize;
46
47                 static GB18030Source ()
48                 {
49                         MethodInfo mi = typeof (Assembly).GetMethod (
50                                 "GetManifestResourceInternal",
51                                 BindingFlags.NonPublic | BindingFlags.Instance);
52                         int size = 0;
53                         Module mod = null;
54                         IntPtr ret = (IntPtr) mi.Invoke (
55                                 Assembly.GetExecutingAssembly (),
56                                 new object [] {"gb18030.table", size, mod});
57                         if (ret != IntPtr.Zero) {
58                                 gbx2uni = (byte*) ((void*) ret);
59                                 gbx2uniSize =
60                                         (gbx2uni [0] << 24) + (gbx2uni [1] << 16) +
61                                         (gbx2uni [2] << 8) + (gbx2uni [3]);
62                                 gbx2uni += 4;
63                                 uni2gbx = gbx2uni + gbx2uniSize;
64                                 uni2gbxSize =
65                                         (uni2gbx [0] << 24) + (uni2gbx [1] << 16) +
66                                         (uni2gbx [2] << 8) + (uni2gbx [3]);
67                                 uni2gbx += 4;
68                         }
69                 }
70
71                 static readonly long gbxBase =
72                         FromGBXRaw (0x81, 0x30, 0x81, 0x30, false);
73                 static readonly long gbxSuppBase =
74                         FromGBXRaw (0x90, 0x30, 0x81, 0x30, false);
75
76                 // See http://icu.sourceforge.net/docs/papers/gb18030.html
77                 // and referenced XML mapping table.
78                 static readonly GB18030Map [] ranges = new GB18030Map [] {
79                         // rawmap: 0x0080-0x0451
80                         new GB18030Map (0x0452, 0x200F, FromGBXRaw (0x81, 0x30, 0xD3, 0x30, false), FromGBXRaw (0x81, 0x36, 0xA5, 0x31, false), false),
81                         // rawmap: 0x2010-0x2642
82                         new GB18030Map (0x2643, 0x2E80, FromGBXRaw (0x81, 0x37, 0xA8, 0x39, false), FromGBXRaw (0x81, 0x38, 0xFD, 0x38, false), false),
83                         // rawmap: 0x2E81-0x361A
84                         new GB18030Map (0x361B, 0x3917, FromGBXRaw (0x82, 0x30, 0xA6, 0x33, false), FromGBXRaw (0x82, 0x30, 0xF2, 0x37, false), false),
85                         // rawmap: 0x3918-0x3CE0
86                         new GB18030Map (0x3CE1, 0x4055, FromGBXRaw (0x82, 0x31, 0xD4, 0x38, false), FromGBXRaw (0x82, 0x32, 0xAF, 0x32, false), false),
87                         // rawmap: 0x4056-0x415F
88                         new GB18030Map (0x4160, 0x4336, FromGBXRaw (0x82, 0x32, 0xC9, 0x37, false), FromGBXRaw (0x82, 0x32, 0xF8, 0x37, false), false),
89                         // rawmap: 4337-0x44D6
90                         new GB18030Map (0x44D7, 0x464B, FromGBXRaw (0x82, 0x33, 0xA3, 0x39, false), FromGBXRaw (0x82, 0x33, 0xC9, 0x31, false), false),
91                         // rawmap: 0x464C-0x478D
92                         new GB18030Map (0x478E, 0x4946, FromGBXRaw (0x82, 0x33, 0xE8, 0x38, false), FromGBXRaw (0x82, 0x34, 0x96, 0x38, false), false),
93                         // rawmap: 0x4947-0x49B7
94                         new GB18030Map (0x49B8, 0x4C76, FromGBXRaw (0x82, 0x34, 0xA1, 0x31, false), FromGBXRaw (0x82, 0x34, 0xE7, 0x33, false), false),
95                         // rawmap: 0x4C77-0x4DFF
96
97                         // 4E00-9FA5 are all mapped in GB2312
98                         new GB18030Map (0x4E00, 0x9FA5, 0, 0, true),
99
100                         new GB18030Map (0x9FA6, 0xD7FF, FromGBXRaw (0x82, 0x35, 0x8F, 0x33, false), FromGBXRaw (0x83, 0x36, 0xC7, 0x38, false), false),
101
102                         // D800-DFFF are ignored (surrogate)
103                         // E000-E76B are all mapped in GB2312.
104                         new GB18030Map (0xD800, 0xE76B, 0, 0, true),
105
106                         // rawmap: 0xE76C-E884
107                         new GB18030Map (0xE865, 0xF92B, FromGBXRaw (0x83, 0x36, 0xD0, 0x30, false), FromGBXRaw (0x84, 0x30, 0x85, 0x34, false), false),
108                         // rawmap: 0xF92C-FA29
109                         new GB18030Map (0xFA2A, 0xFE2F, FromGBXRaw (0x84, 0x30, 0x9C, 0x38, false), FromGBXRaw (0x84, 0x31, 0x85, 0x37, false), false),
110                         // rawmap: 0xFE30-FFE5
111                         new GB18030Map (0xFFE6, 0xFFFF, FromGBXRaw (0x84, 0x31, 0xA2, 0x34, false), FromGBXRaw (0x84, 0x31, 0xA4, 0x39, false), false),
112                         };
113
114                 public static void Unlinear (byte [] bytes, int start, long gbx)
115                 {
116                         fixed (byte* bptr = bytes) {
117                                 Unlinear (bptr + start, gbx);
118                         }
119                 }
120
121                 public static unsafe void Unlinear (byte* bytes, long gbx)
122                 {
123                         bytes [3] = (byte) (gbx % 10 + 0x30);
124                         gbx /= 10;
125                         bytes [2] = (byte) (gbx % 126 + 0x81);
126                         gbx /= 126;
127                         bytes [1] = (byte) (gbx % 10 + 0x30);
128                         gbx /= 10;
129                         bytes [0] = (byte) (gbx + 0x81);
130                 }
131
132                 // negative (invalid) or positive (valid)
133                 public static long FromGBX (byte [] bytes, int start)
134                 {
135                         byte b1 = bytes [start];
136                         byte b2 = bytes [start + 1];
137                         byte b3 = bytes [start + 2];
138                         byte b4 = bytes [start + 3];
139                         if (b1 < 0x81 || b1 == 0xFF)
140                                 return -1;
141                         if (b2 < 0x30 || b2 > 0x39)
142                                 return -2;
143                         if (b3 < 0x81 || b3 == 0xFF)
144                                 return -3;
145                         if (b4 < 0x30 || b4 > 0x39)
146                                 return -4;
147                         if (b1 >= 0x90)
148                                 return FromGBXRaw (b1, b2, b3, b4, true);
149                         long linear = FromGBXRaw (b1, b2, b3, b4, false);
150
151                         long rawOffset = 0;
152                         long startIgnore = 0;
153
154                         for (int i = 0; i < ranges.Length; i++) {
155                                 GB18030Map m = ranges [i];
156                                 if (linear < m.GStart)
157                                         return ToUcsRaw ((int) (linear
158                                                 - startIgnore + rawOffset));
159                                 if (linear <= m.GEnd)
160                                         return linear - gbxBase - m.GStart
161                                                 + m.UStart;
162                                 if (m.GStart != 0) {
163                                         rawOffset += m.GStart - startIgnore;
164                                         startIgnore = m.GEnd + 1;
165                                 }
166                         }
167 //                      return ToUcsRaw ((int) (linear - gbxBase));
168                         throw new SystemException (String.Format ("GB18030 INTERNAL ERROR (should not happen): GBX {0:x02} {1:x02} {2:x02} {3:x02}", b1, b2, b3, b4));
169                 }
170
171                 public static long FromUCSSurrogate (int cp)
172                 {
173                         return cp + gbxSuppBase;
174                 }
175
176                 public static long FromUCS (int cp)
177                 {
178                         long rawOffset = 0;
179                         long startIgnore = 0x80;
180                         for (int i = 0; i < ranges.Length; i++) {
181                                 GB18030Map m = ranges [i];
182                                 if (cp < m.UStart)
183                                         return ToGbxRaw ((int) (cp
184                                                 - startIgnore + rawOffset));
185                                 if (cp <= m.UEnd)
186                                         return cp - m.UStart + m.GStart;
187                                 if (m.GStart != 0) {
188                                         rawOffset += m.UStart - startIgnore;
189                                         startIgnore = m.UEnd + 1;
190                                 }
191                         }
192                         throw new SystemException (String.Format ("GB18030 INTERNAL ERROR (should not happen): UCS {0:x06}", cp));
193                 }
194
195                 static long FromGBXRaw (
196                         byte b1, byte b2, byte b3, byte b4, bool supp)
197                 {
198                         // 126 = 0xFE - 0x80
199                         return (((b1 - (supp ? 0x90 : 0x81)) * 10 +
200                                 (b2 - 0x30)) * 126 +
201                                 (b3 - 0x81)) * 10 +
202                                 b4 - 0x30 + (supp ? 0x10000 : 0);
203                 }
204
205                 static int ToUcsRaw (int idx)
206                 {
207                         return gbx2uni [idx * 2] * 0x100 +
208                                 gbx2uni [idx * 2 + 1];
209                 }
210
211                 static long ToGbxRaw (int idx)
212                 {
213                         if (idx < 0 || idx * 2 + 1 >= uni2gbxSize)
214                                 return -1;
215                         return gbxBase + uni2gbx [idx * 2] * 0x100 + uni2gbx [idx * 2 + 1];
216                 }
217
218
219 #if BUILD_GENERATOR
220                 public static void Main ()
221                 {
222                         new GB18030Source ().Run ();
223                 }
224
225                 byte [] uni2gbxMap;
226                 byte [] gbx2uniMap;
227
228                 void Run ()
229                 {
230                         int ustart = 0x80;
231                         long gstart = 0;
232                         int ucount = 0;
233                         long gcount = 0;
234                         bool skip = false;
235                         for (int i = 0; i < ranges.Length; i++) {
236                                 GB18030Map m = ranges [i];
237                                 if (!skip) {
238 //Console.WriteLine ("---- adding {0:X04} umap. {1:X04} gmap, skip range between {2:X04} and {3:X04}", m.UStart - ustart, m.GStart != 0 ? m.GStart - gstart : 0, m.UStart, m.UEnd);
239                                         ucount += m.UStart - ustart;
240                                 }
241                                 if (m.GStart != 0)
242                                         gcount += m.GStart - gstart;
243                                 skip = m.GStart == 0;
244                                 ustart = m.UEnd + 1;
245                                 if (m.GStart != 0)
246                                         gstart = m.GEnd + 1;
247                         }
248
249 Console.Error.WriteLine ("Total UCS codepoints: {0} ({1:X04})", ucount, ucount);
250 Console.Error.WriteLine ("Total GBX codepoints: {0} ({1:X04})", gcount, gcount);
251
252                         uni2gbxMap = new byte [ucount * 2];
253                         gbx2uniMap = new byte [gcount * 2];
254
255                         XmlDocument doc = new XmlDocument ();
256                         doc.XmlResolver = null;
257                         doc.Load ("gb-18030-2000.xml");
258                         foreach (XmlElement e in doc.SelectNodes (
259                                 "/characterMapping/assignments/a"))
260                                 AddMap (e);
261
262                         using (FileStream fs = File.Create ("gb18030.table")) {
263                                 byte [] size = new byte [4];
264                                 for (int i = 0, len = gbx2uniMap.Length;
265                                         i < 4; i++, len >>= 8)
266                                         size [3 - i] = (byte) (len % 0x100);
267                                 fs.Write (size, 0, 4);
268                                 fs.Write (gbx2uniMap, 0, gbx2uniMap.Length);
269                                 fs.Write (uni2gbxMap, 0, uni2gbxMap.Length);
270                         }
271 Console.WriteLine ("done.");
272                 }
273
274                 void AddMap (XmlElement e)
275                 {
276                         int u = int.Parse (e.GetAttribute ("u"),
277                                 NumberStyles.HexNumber);
278                         byte [] b = new byte [4];
279                         int idx = 0;
280                         foreach (string s in e.GetAttribute ("b").Split (' '))
281                                 b [idx++] =
282                                         byte.Parse (s, NumberStyles.HexNumber);
283                         if (idx != 4)
284                                 return;
285
286                         AddMap (u, b);
287                 }
288
289                 void AddMap (int u, byte [] b)
290                 {
291                         int gbx = (int) (FromGBXRaw (
292                                 b [0], b [1], b [2], b [3], false) - gbxBase);
293                         if (u > 0x10000 || gbx > 0x10000)
294                                 throw new Exception (String.Format (
295                                         "should not happen: {0:X04} {1:X04}",
296                                         u, gbx));
297
298                         int uidx = IndexForUcs (u);
299 //Console.WriteLine ("U: {0:x04} for {1:x04} [{2:x02} {3:x02}]", uidx, u, (byte) (gbx / 0x100), (byte) (gbx % 0x100));
300                         uni2gbxMap [uidx * 2] = (byte) (gbx / 0x100);
301                         uni2gbxMap [uidx * 2 + 1] = (byte) (gbx % 0x100);
302
303                         int gidx = IndexForGbx (gbx);
304 //Console.WriteLine ("G: {0:x04} for {1:x04} ({2:x02} {3:x02} {4:x02} {5:x02})", gidx, gbx, b [0], b [1], b [2], b [3]);
305                         gbx2uniMap [gidx * 2] = (byte) (u / 0x100);
306                         gbx2uniMap [gidx * 2 + 1] = (byte) (u % 0x100);
307                 }
308
309                 static int IndexForUcs (int ucs)
310                 {
311                         int start = 0x80;
312                         int count = 0;
313                         bool skip = false;
314                         for (int i = 0; i < ranges.Length; i++) {
315                                 GB18030Map m = ranges [i];
316                                 if (!skip) {
317                                         if (ucs < m.UStart)
318                                                 return count + ucs - start;
319                                         count += m.UStart - start;
320                                 }
321                                 skip = m.GStart == 0;
322                                 start = m.UEnd + 1;
323                         }
324                         return -1;
325                 }
326
327                 static int IndexForGbx (int gbx)
328                 {
329                         long start = 0;
330                         long count = 0;
331                         for (int i = 0; i < ranges.Length; i++) {
332                                 GB18030Map m = ranges [i];
333                                 if (m.GStart == 0)
334                                         continue;
335                                 if (gbx < m.GStart)
336                                         return (int) (count + gbx - start);
337                                 count += m.GStart - start;
338                                 start = m.GEnd + 1;
339                         }
340                         return -1;
341                 }
342
343 #endif
344
345
346         }
347
348 }