5 // Atsushi Enomoto <atsushi@ximian.com>
8 using System.Globalization;
9 using System.Reflection;
18 internal unsafe class GB18030Source
22 public readonly int UStart;
23 public readonly int UEnd;
24 public readonly long GStart;
25 public readonly long GEnd;
26 public readonly bool Dummy; // This range is actually not usable.
29 int ustart, int uend, long gstart, long gend, bool dummy)
39 private GB18030Source ()
43 static readonly byte *gbx2uni;
44 static readonly byte *uni2gbx;
45 static readonly int gbx2uniSize, uni2gbxSize;
47 static GB18030Source ()
49 MethodInfo mi = typeof (Assembly).GetMethod (
50 "GetManifestResourceInternal",
51 BindingFlags.NonPublic | BindingFlags.Instance);
54 IntPtr ret = (IntPtr) mi.Invoke (
55 Assembly.GetExecutingAssembly (),
56 new object [] {"gb18030.table", size, mod});
57 if (ret != IntPtr.Zero) {
58 gbx2uni = (byte*) ((void*) ret);
60 (gbx2uni [0] << 24) + (gbx2uni [1] << 16) +
61 (gbx2uni [2] << 8) + (gbx2uni [3]);
63 uni2gbx = gbx2uni + gbx2uniSize;
65 (uni2gbx [0] << 24) + (uni2gbx [1] << 16) +
66 (uni2gbx [2] << 8) + (uni2gbx [3]);
71 static readonly long gbxBase =
72 FromGBXRaw (0x81, 0x30, 0x81, 0x30, false);
73 static readonly long gbxSuppBase =
74 FromGBXRaw (0x90, 0x30, 0x81, 0x30, false);
76 // See http://icu.sourceforge.net/docs/papers/gb18030.html
77 // and referenced XML mapping table.
78 static readonly GB18030Map [] ranges = new GB18030Map [] {
79 // rawmap: 0x0080-0x0451
80 new GB18030Map (0x0452, 0x200F, FromGBXRaw (0x81, 0x30, 0xD3, 0x30, false), FromGBXRaw (0x81, 0x36, 0xA5, 0x31, false), false),
81 // rawmap: 0x2010-0x2642
82 new GB18030Map (0x2643, 0x2E80, FromGBXRaw (0x81, 0x37, 0xA8, 0x39, false), FromGBXRaw (0x81, 0x38, 0xFD, 0x38, false), false),
83 // rawmap: 0x2E81-0x361A
84 new GB18030Map (0x361B, 0x3917, FromGBXRaw (0x82, 0x30, 0xA6, 0x33, false), FromGBXRaw (0x82, 0x30, 0xF2, 0x37, false), false),
85 // rawmap: 0x3918-0x3CE0
86 new GB18030Map (0x3CE1, 0x4055, FromGBXRaw (0x82, 0x31, 0xD4, 0x38, false), FromGBXRaw (0x82, 0x32, 0xAF, 0x32, false), false),
87 // rawmap: 0x4056-0x415F
88 new GB18030Map (0x4160, 0x4336, FromGBXRaw (0x82, 0x32, 0xC9, 0x37, false), FromGBXRaw (0x82, 0x32, 0xF8, 0x37, false), false),
89 // rawmap: 4337-0x44D6
90 new GB18030Map (0x44D7, 0x464B, FromGBXRaw (0x82, 0x33, 0xA3, 0x39, false), FromGBXRaw (0x82, 0x33, 0xC9, 0x31, false), false),
91 // rawmap: 0x464C-0x478D
92 new GB18030Map (0x478E, 0x4946, FromGBXRaw (0x82, 0x33, 0xE8, 0x38, false), FromGBXRaw (0x82, 0x34, 0x96, 0x38, false), false),
93 // rawmap: 0x4947-0x49B7
94 new GB18030Map (0x49B8, 0x4C76, FromGBXRaw (0x82, 0x34, 0xA1, 0x31, false), FromGBXRaw (0x82, 0x34, 0xE7, 0x33, false), false),
95 // rawmap: 0x4C77-0x4DFF
97 // 4E00-9FA5 are all mapped in GB2312
98 new GB18030Map (0x4E00, 0x9FA5, 0, 0, true),
100 new GB18030Map (0x9FA6, 0xD7FF, FromGBXRaw (0x82, 0x35, 0x8F, 0x33, false), FromGBXRaw (0x83, 0x36, 0xC7, 0x38, false), false),
102 // D800-DFFF are ignored (surrogate)
103 // E000-E76B are all mapped in GB2312.
104 new GB18030Map (0xD800, 0xE76B, 0, 0, true),
106 // rawmap: 0xE76C-E884
107 new GB18030Map (0xE865, 0xF92B, FromGBXRaw (0x83, 0x36, 0xD0, 0x30, false), FromGBXRaw (0x84, 0x30, 0x85, 0x34, false), false),
108 // rawmap: 0xF92C-FA29
109 new GB18030Map (0xFA2A, 0xFE2F, FromGBXRaw (0x84, 0x30, 0x9C, 0x38, false), FromGBXRaw (0x84, 0x31, 0x85, 0x37, false), false),
110 // rawmap: 0xFE30-FFE5
111 new GB18030Map (0xFFE6, 0xFFFF, FromGBXRaw (0x84, 0x31, 0xA2, 0x34, false), FromGBXRaw (0x84, 0x31, 0xA4, 0x39, false), false),
114 public static void Unlinear (byte [] bytes, int start, long gbx)
116 fixed (byte* bptr = bytes) {
117 Unlinear (bptr + start, gbx);
121 public static unsafe void Unlinear (byte* bytes, long gbx)
123 bytes [3] = (byte) (gbx % 10 + 0x30);
125 bytes [2] = (byte) (gbx % 126 + 0x81);
127 bytes [1] = (byte) (gbx % 10 + 0x30);
129 bytes [0] = (byte) (gbx + 0x81);
132 // negative (invalid) or positive (valid)
133 public static long FromGBX (byte [] bytes, int start)
135 byte b1 = bytes [start];
136 byte b2 = bytes [start + 1];
137 byte b3 = bytes [start + 2];
138 byte b4 = bytes [start + 3];
139 if (b1 < 0x81 || b1 == 0xFF)
141 if (b2 < 0x30 || b2 > 0x39)
143 if (b3 < 0x81 || b3 == 0xFF)
145 if (b4 < 0x30 || b4 > 0x39)
148 return FromGBXRaw (b1, b2, b3, b4, true);
149 long linear = FromGBXRaw (b1, b2, b3, b4, false);
152 long startIgnore = 0;
154 for (int i = 0; i < ranges.Length; i++) {
155 GB18030Map m = ranges [i];
156 if (linear < m.GStart)
157 return ToUcsRaw ((int) (linear
158 - startIgnore + rawOffset));
159 if (linear <= m.GEnd)
160 return linear - gbxBase - m.GStart
163 rawOffset += m.GStart - startIgnore;
164 startIgnore = m.GEnd + 1;
167 // return ToUcsRaw ((int) (linear - gbxBase));
168 throw new SystemException (String.Format ("GB18030 INTERNAL ERROR (should not happen): GBX {0:x02} {1:x02} {2:x02} {3:x02}", b1, b2, b3, b4));
171 public static long FromUCSSurrogate (int cp)
173 return cp + gbxSuppBase;
176 public static long FromUCS (int cp)
179 long startIgnore = 0x80;
180 for (int i = 0; i < ranges.Length; i++) {
181 GB18030Map m = ranges [i];
183 return ToGbxRaw ((int) (cp
184 - startIgnore + rawOffset));
186 return cp - m.UStart + m.GStart;
188 rawOffset += m.UStart - startIgnore;
189 startIgnore = m.UEnd + 1;
192 throw new SystemException (String.Format ("GB18030 INTERNAL ERROR (should not happen): UCS {0:x06}", cp));
195 static long FromGBXRaw (
196 byte b1, byte b2, byte b3, byte b4, bool supp)
199 return (((b1 - (supp ? 0x90 : 0x81)) * 10 +
202 b4 - 0x30 + (supp ? 0x10000 : 0);
205 static int ToUcsRaw (int idx)
207 return gbx2uni [idx * 2] * 0x100 +
208 gbx2uni [idx * 2 + 1];
211 static long ToGbxRaw (int idx)
213 if (idx < 0 || idx * 2 + 1 >= uni2gbxSize)
215 return gbxBase + uni2gbx [idx * 2] * 0x100 + uni2gbx [idx * 2 + 1];
220 public static void Main ()
222 new GB18030Source ().Run ();
235 for (int i = 0; i < ranges.Length; i++) {
236 GB18030Map m = ranges [i];
238 //Console.WriteLine ("---- adding {0:X04} umap. {1:X04} gmap, skip range between {2:X04} and {3:X04}", m.UStart - ustart, m.GStart != 0 ? m.GStart - gstart : 0, m.UStart, m.UEnd);
239 ucount += m.UStart - ustart;
242 gcount += m.GStart - gstart;
243 skip = m.GStart == 0;
249 Console.Error.WriteLine ("Total UCS codepoints: {0} ({1:X04})", ucount, ucount);
250 Console.Error.WriteLine ("Total GBX codepoints: {0} ({1:X04})", gcount, gcount);
252 uni2gbxMap = new byte [ucount * 2];
253 gbx2uniMap = new byte [gcount * 2];
255 XmlDocument doc = new XmlDocument ();
256 doc.XmlResolver = null;
257 doc.Load ("gb-18030-2000.xml");
258 foreach (XmlElement e in doc.SelectNodes (
259 "/characterMapping/assignments/a"))
262 using (FileStream fs = File.Create ("gb18030.table")) {
263 byte [] size = new byte [4];
264 for (int i = 0, len = gbx2uniMap.Length;
265 i < 4; i++, len >>= 8)
266 size [3 - i] = (byte) (len % 0x100);
267 fs.Write (size, 0, 4);
268 fs.Write (gbx2uniMap, 0, gbx2uniMap.Length);
269 fs.Write (uni2gbxMap, 0, uni2gbxMap.Length);
271 Console.WriteLine ("done.");
274 void AddMap (XmlElement e)
276 int u = int.Parse (e.GetAttribute ("u"),
277 NumberStyles.HexNumber);
278 byte [] b = new byte [4];
280 foreach (string s in e.GetAttribute ("b").Split (' '))
282 byte.Parse (s, NumberStyles.HexNumber);
289 void AddMap (int u, byte [] b)
291 int gbx = (int) (FromGBXRaw (
292 b [0], b [1], b [2], b [3], false) - gbxBase);
293 if (u > 0x10000 || gbx > 0x10000)
294 throw new Exception (String.Format (
295 "should not happen: {0:X04} {1:X04}",
298 int uidx = IndexForUcs (u);
299 //Console.WriteLine ("U: {0:x04} for {1:x04} [{2:x02} {3:x02}]", uidx, u, (byte) (gbx / 0x100), (byte) (gbx % 0x100));
300 uni2gbxMap [uidx * 2] = (byte) (gbx / 0x100);
301 uni2gbxMap [uidx * 2 + 1] = (byte) (gbx % 0x100);
303 int gidx = IndexForGbx (gbx);
304 //Console.WriteLine ("G: {0:x04} for {1:x04} ({2:x02} {3:x02} {4:x02} {5:x02})", gidx, gbx, b [0], b [1], b [2], b [3]);
305 gbx2uniMap [gidx * 2] = (byte) (u / 0x100);
306 gbx2uniMap [gidx * 2 + 1] = (byte) (u % 0x100);
309 static int IndexForUcs (int ucs)
314 for (int i = 0; i < ranges.Length; i++) {
315 GB18030Map m = ranges [i];
318 return count + ucs - start;
319 count += m.UStart - start;
321 skip = m.GStart == 0;
327 static int IndexForGbx (int gbx)
331 for (int i = 0; i < ranges.Length; i++) {
332 GB18030Map m = ranges [i];
336 return (int) (count + gbx - start);
337 count += m.GStart - start;