5 // Atsushi Enomoto <atsushi@ximian.com>
8 using System.Reflection;
15 internal class ENCgb18030 : GB18030Encoding
17 public ENCgb18030 (): base () {}
21 public class CP54936 : GB18030Encoding { }
24 public class GB18030Encoding : MonoEncoding
27 public GB18030Encoding ()
32 public override string EncodingName {
33 get { return "Chinese Simplified (GB18030)"; }
36 public override string HeaderName {
37 get { return "GB18030"; }
40 public override string BodyName {
41 get { return "GB18030"; }
44 public override string WebName {
45 get { return "GB18030"; }
48 public override bool IsMailNewsDisplay {
52 public override bool IsMailNewsSave {
56 public override bool IsBrowserDisplay {
60 public override bool IsBrowserSave {
64 public override int GetMaxByteCount (int len)
66 // non-GB2312 characters in \u0080 - \uFFFF
70 public override int GetMaxCharCount (int len)
75 public override int GetByteCount (char [] chars, int index, int length)
77 return new GB18030Encoder (this).GetByteCount (chars, index, length, true);
80 public unsafe override int GetByteCountImpl (char* chars, int count)
82 return new GB18030Encoder (this).GetByteCountImpl (chars, count, true);
85 public unsafe override int GetBytesImpl (char* chars, int charCount, byte* bytes, int byteCount)
87 return new GB18030Encoder (this).GetBytesImpl (chars, charCount, bytes, byteCount, true);
90 public override int GetCharCount (byte [] bytes, int start, int len)
92 return new GB18030Decoder ().GetCharCount (bytes, start, len);
95 public override int GetChars (byte [] bytes, int byteIdx, int srclen, char [] chars, int charIdx)
97 return new GB18030Decoder ().GetChars (bytes, byteIdx, srclen, chars, charIdx);
100 public override Encoder GetEncoder ()
102 return new GB18030Encoder (this);
105 public override Decoder GetDecoder ()
107 return new GB18030Decoder ();
111 class GB18030Decoder : DbcsEncoding.DbcsDecoder
113 static DbcsConvert gb2312 = DbcsConvert.Gb2312;
114 // for now incomplete block is not supported - should we?
115 // int incomplete1 = -1, incomplete2 = -1, incomplete3 = -1;
117 public GB18030Decoder ()
122 public override int GetCharCount (byte [] bytes, int start, int len)
124 CheckRange (bytes, start, len);
126 int end = start + len;
128 while (start < end) {
129 if (bytes [start] < 0x80) {
134 else if (bytes [start] == 0x80) {
135 // Euro sign - actually it is obsolete,
136 // now it's just reserved but not used
141 else if (bytes [start] == 0xFF) {
142 // invalid data - fill '?'
147 else if (start + 1 >= end) {
148 // incomplete1 = bytes [start];
152 break; // incomplete tail.
155 byte second = bytes [start + 1];
156 if (second == 0x7F || second == 0xFF) {
162 else if (0x30 <= second && second <= 0x39) {
164 if (start + 3 >= end) {
166 // incomplete1 = bytes [start];
167 // incomplete2 = bytes [start + 1];
168 // if (start + 3 == end)
169 // incomplete3 = bytes [start + 2];
170 ret += start + 3 == end ? 3 : 2;
173 long value = GB18030Source.FromGBX (bytes, start);
177 start -= (int) value;
178 } else if (value >= 0x10000) {
196 public override int GetChars (byte [] bytes, int byteIndex, int byteCount, char [] chars, int charIndex)
198 CheckRange (bytes, byteIndex, byteCount, chars, charIndex);
200 int byteEnd = byteIndex + byteCount;
201 int charStart = charIndex;
203 while (byteIndex < byteEnd) {
204 if (bytes [byteIndex] < 0x80) {
205 chars [charIndex++] = (char) bytes [byteIndex++];
208 else if (bytes [byteIndex] == 0x80) {
209 // Euro sign - actually it is obsolete,
210 // now it's just reserved but not used
211 chars [charIndex++] = '\u20AC';
215 else if (bytes [byteIndex] == 0xFF) {
216 // invalid data - fill '?'
217 chars [charIndex++] = '?';
221 else if (byteIndex + 1 >= byteEnd) {
222 //incomplete1 = bytes [byteIndex++];
225 break; // incomplete tail.
228 byte second = bytes [byteIndex + 1];
229 if (second == 0x7F || second == 0xFF) {
231 chars [charIndex++] = '?';
234 else if (0x30 <= second && second <= 0x39) {
236 if (byteIndex + 3 >= byteEnd) {
238 //incomplete1 = bytes [byteIndex];
239 //incomplete2 = bytes [byteIndex + 1];
240 //if (byteIndex + 3 == byteEnd)
241 // incomplete3 = bytes [byteIndex + 2];
244 long value = GB18030Source.FromGBX (bytes, byteIndex);
247 chars [charIndex++] = '?';
248 byteIndex -= (int) value;
249 } else if (value >= 0x10000) {
252 chars [charIndex++] = (char) (value / 0x400 + 0xD800);
253 chars [charIndex++] = (char) (value % 0x400 + 0xDC00);
257 chars [charIndex++] = (char) value;
261 byte first = bytes [byteIndex];
262 int ord = ((first - 0x81) * 191 + second - 0x40) * 2;
263 char c1 = ord < 0 || ord >= gb2312.n2u.Length ?
264 '\0' : (char) (gb2312.n2u [ord] + gb2312.n2u [ord + 1] * 256);
266 chars [charIndex++] = '?';
268 chars [charIndex++] = c1;
273 return charIndex - charStart;
277 class GB18030Encoder : MonoEncoder
279 static DbcsConvert gb2312 = DbcsConvert.Gb2312;
281 public GB18030Encoder (MonoEncoding owner)
286 char incomplete_byte_count;
287 char incomplete_bytes;
289 public unsafe override int GetByteCountImpl (char* chars, int count, bool refresh)
294 while (start < end) {
295 char ch = chars [start];
301 } else if (Char.IsSurrogate (ch)) {
303 if (start + 1 == end) {
304 incomplete_byte_count = ch;
313 if (ch < 0x80 || ch == 0xFF) {
320 byte b1 = gb2312.u2n [((int) ch) * 2 + 1];
321 byte b2 = gb2312.u2n [((int) ch) * 2];
322 if (b1 != 0 && b2 != 0) {
330 long value = GB18030Source.FromUCS (ch);
339 if (incomplete_byte_count != char.MinValue)
341 incomplete_byte_count = char.MinValue;
346 public unsafe override int GetBytesImpl (char* chars, int charCount, byte* bytes, int byteCount, bool refresh)
351 int charEnd = charIndex + charCount;
352 int byteStart = byteIndex;
353 char ch = incomplete_bytes;
355 while (charIndex < charEnd) {
356 if (incomplete_bytes == char.MinValue)
357 ch = chars [charIndex++];
359 incomplete_bytes = char.MinValue;
363 bytes [byteIndex++] = (byte) ch;
365 } else if (Char.IsSurrogate (ch)) {
367 if (charIndex == charEnd) {
368 incomplete_bytes = ch;
371 char ch2 = chars [charIndex++];
372 if (!Char.IsSurrogate (ch2)) {
376 chars, ref charIndex, ref charCount,
377 bytes, ref byteIndex, ref byteCount);
379 bytes [byteIndex++] = (byte) '?';
383 int cp = (ch - 0xD800) * 0x400 + ch2 - 0xDC00;
384 GB18030Source.Unlinear (bytes + byteIndex, GB18030Source.FromUCSSurrogate (cp));
390 if (ch <= 0x80 || ch == 0xFF) {
391 // Character maps to itself
392 bytes [byteIndex++] = (byte) ch;
396 byte b1 = gb2312.u2n [((int) ch) * 2 + 1];
397 byte b2 = gb2312.u2n [((int) ch) * 2];
398 if (b1 != 0 && b2 != 0) {
399 bytes [byteIndex++] = b1;
400 bytes [byteIndex++] = b2;
404 long value = GB18030Source.FromUCS (ch);
406 bytes [byteIndex++] = 0x3F; // invalid(?)
409 GB18030Source.Unlinear (bytes + byteIndex, value);
415 if (incomplete_bytes != char.MinValue)
416 bytes [byteIndex++] = 0x3F; // incomplete
417 incomplete_bytes = char.MinValue;
420 return byteIndex - byteStart;