5 // Atsushi Enomoto <atsushi@ximian.com>
8 using System.Reflection;
13 using MonoEncoder = I18N.Common.MonoSafeEncoder;
14 using MonoEncoding = I18N.Common.MonoSafeEncoding;
20 internal class ENCgb18030 : GB18030Encoding
22 public ENCgb18030 (): base () {}
26 public class CP54936 : GB18030Encoding { }
29 public class GB18030Encoding : MonoEncoding
32 public GB18030Encoding ()
37 public override string EncodingName {
38 get { return "Chinese Simplified (GB18030)"; }
41 public override string HeaderName {
42 get { return "GB18030"; }
45 public override string BodyName {
46 get { return "GB18030"; }
49 public override string WebName {
50 get { return "GB18030"; }
53 public override bool IsMailNewsDisplay {
57 public override bool IsMailNewsSave {
61 public override bool IsBrowserDisplay {
65 public override bool IsBrowserSave {
69 public override int GetMaxByteCount (int len)
71 // non-GB2312 characters in \u0080 - \uFFFF
75 public override int GetMaxCharCount (int len)
81 public unsafe override int GetByteCountImpl (char* chars, int count)
83 return new GB18030Encoder (this).GetByteCountImpl (chars, count, true);
86 public unsafe override int GetBytesImpl (char* chars, int charCount, byte* bytes, int byteCount)
88 return new GB18030Encoder (this).GetBytesImpl (chars, charCount, bytes, byteCount, true);
91 public override int GetByteCount (char [] chars, int index, int length)
93 return new GB18030Encoder (this).GetByteCount (chars, index, length, true);
96 public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
98 return new GB18030Encoder (this).GetBytes (chars, charIndex, charCount, bytes, byteIndex, true);
102 public override int GetCharCount (byte [] bytes, int start, int len)
104 return new GB18030Decoder ().GetCharCount (bytes, start, len);
107 public override int GetChars (byte [] bytes, int byteIdx, int srclen, char [] chars, int charIdx)
109 return new GB18030Decoder ().GetChars (bytes, byteIdx, srclen, chars, charIdx);
112 public override Encoder GetEncoder ()
114 return new GB18030Encoder (this);
117 public override Decoder GetDecoder ()
119 return new GB18030Decoder ();
123 class GB18030Decoder : DbcsEncoding.DbcsDecoder
125 static DbcsConvert gb2312 = DbcsConvert.Gb2312;
126 // for now incomplete block is not supported - should we?
127 // int incomplete1 = -1, incomplete2 = -1, incomplete3 = -1;
129 public GB18030Decoder ()
134 public override int GetCharCount (byte [] bytes, int start, int len)
136 CheckRange (bytes, start, len);
138 int end = start + len;
140 while (start < end) {
141 if (bytes [start] < 0x80) {
146 else if (bytes [start] == 0x80) {
147 // Euro sign - actually it is obsolete,
148 // now it's just reserved but not used
153 else if (bytes [start] == 0xFF) {
154 // invalid data - fill '?'
159 else if (start + 1 >= end) {
160 // incomplete1 = bytes [start];
164 break; // incomplete tail.
167 byte second = bytes [start + 1];
168 if (second == 0x7F || second == 0xFF) {
174 else if (0x30 <= second && second <= 0x39) {
176 if (start + 3 >= end) {
178 // incomplete1 = bytes [start];
179 // incomplete2 = bytes [start + 1];
180 // if (start + 3 == end)
181 // incomplete3 = bytes [start + 2];
182 ret += start + 3 == end ? 3 : 2;
185 long value = GB18030Source.FromGBX (bytes, start);
189 start -= (int) value;
190 } else if (value >= 0x10000) {
208 public override int GetChars (byte [] bytes, int byteIndex, int byteCount, char [] chars, int charIndex)
210 CheckRange (bytes, byteIndex, byteCount, chars, charIndex);
212 int byteEnd = byteIndex + byteCount;
213 int charStart = charIndex;
215 while (byteIndex < byteEnd) {
216 if (bytes [byteIndex] < 0x80) {
217 chars [charIndex++] = (char) bytes [byteIndex++];
220 else if (bytes [byteIndex] == 0x80) {
221 // Euro sign - actually it is obsolete,
222 // now it's just reserved but not used
223 chars [charIndex++] = '\u20AC';
227 else if (bytes [byteIndex] == 0xFF) {
228 // invalid data - fill '?'
229 chars [charIndex++] = '?';
233 else if (byteIndex + 1 >= byteEnd) {
234 //incomplete1 = bytes [byteIndex++];
237 break; // incomplete tail.
240 byte second = bytes [byteIndex + 1];
241 if (second == 0x7F || second == 0xFF) {
243 chars [charIndex++] = '?';
246 else if (0x30 <= second && second <= 0x39) {
248 if (byteIndex + 3 >= byteEnd) {
250 //incomplete1 = bytes [byteIndex];
251 //incomplete2 = bytes [byteIndex + 1];
252 //if (byteIndex + 3 == byteEnd)
253 // incomplete3 = bytes [byteIndex + 2];
256 long value = GB18030Source.FromGBX (bytes, byteIndex);
259 chars [charIndex++] = '?';
260 byteIndex -= (int) value;
261 } else if (value >= 0x10000) {
264 chars [charIndex++] = (char) (value / 0x400 + 0xD800);
265 chars [charIndex++] = (char) (value % 0x400 + 0xDC00);
269 chars [charIndex++] = (char) value;
273 byte first = bytes [byteIndex];
274 int ord = ((first - 0x81) * 191 + second - 0x40) * 2;
275 char c1 = ord < 0 || ord >= gb2312.n2u.Length ?
276 '\0' : (char) (gb2312.n2u [ord] + gb2312.n2u [ord + 1] * 256);
278 chars [charIndex++] = '?';
280 chars [charIndex++] = c1;
285 return charIndex - charStart;
289 class GB18030Encoder : MonoEncoder
291 static DbcsConvert gb2312 = DbcsConvert.Gb2312;
293 public GB18030Encoder (MonoEncoding owner)
298 char incomplete_byte_count;
299 char incomplete_bytes;
302 public unsafe override int GetByteCountImpl (char* chars, int count, bool refresh)
307 while (start < end) {
308 char ch = chars [start];
314 } else if (Char.IsSurrogate (ch)) {
316 if (start + 1 == end) {
317 incomplete_byte_count = ch;
326 if (ch < 0x80 || ch == 0xFF) {
333 byte b1 = gb2312.u2n [((int) ch) * 2 + 1];
334 byte b2 = gb2312.u2n [((int) ch) * 2];
335 if (b1 != 0 && b2 != 0) {
343 long value = GB18030Source.FromUCS (ch);
352 if (incomplete_byte_count != char.MinValue)
354 incomplete_byte_count = char.MinValue;
359 public unsafe override int GetBytesImpl (char* chars, int charCount, byte* bytes, int byteCount, bool refresh)
364 int charEnd = charIndex + charCount;
365 int byteStart = byteIndex;
366 char ch = incomplete_bytes;
368 while (charIndex < charEnd) {
369 if (incomplete_bytes == char.MinValue)
370 ch = chars [charIndex++];
372 incomplete_bytes = char.MinValue;
376 bytes [byteIndex++] = (byte) ch;
378 } else if (Char.IsSurrogate (ch)) {
380 if (charIndex == charEnd) {
381 incomplete_bytes = ch;
384 char ch2 = chars [charIndex++];
385 if (!Char.IsSurrogate (ch2)) {
389 chars, ref charIndex, ref charCount,
390 bytes, ref byteIndex, ref byteCount, null);
392 bytes [byteIndex++] = (byte) '?';
396 int cp = (ch - 0xD800) * 0x400 + ch2 - 0xDC00;
397 GB18030Source.Unlinear (bytes + byteIndex, GB18030Source.FromUCSSurrogate (cp));
403 if (ch <= 0x80 || ch == 0xFF) {
404 // Character maps to itself
405 bytes [byteIndex++] = (byte) ch;
409 byte b1 = gb2312.u2n [((int) ch) * 2 + 1];
410 byte b2 = gb2312.u2n [((int) ch) * 2];
411 if (b1 != 0 && b2 != 0) {
412 bytes [byteIndex++] = b1;
413 bytes [byteIndex++] = b2;
417 long value = GB18030Source.FromUCS (ch);
419 bytes [byteIndex++] = 0x3F; // invalid(?)
422 GB18030Source.Unlinear (bytes + byteIndex, value);
428 if (incomplete_bytes != char.MinValue)
429 bytes [byteIndex++] = 0x3F; // incomplete
430 incomplete_bytes = char.MinValue;
433 return byteIndex - byteStart;
437 public override int GetByteCount(char[] chars, int index, int count, bool refresh)
444 char ch = chars[start];
452 else if (Char.IsSurrogate(ch))
455 if (start + 1 == end)
457 incomplete_byte_count = ch;
468 if (ch < 0x80 || ch == 0xFF)
476 byte b1 = gb2312.u2n[((int)ch) * 2 + 1];
477 byte b2 = gb2312.u2n[((int)ch) * 2];
478 if (b1 != 0 && b2 != 0)
487 long value = GB18030Source.FromUCS(ch);
497 if (incomplete_byte_count != char.MinValue)
499 incomplete_byte_count = char.MinValue;
504 public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex, bool refresh)
506 int byteCount = bytes.Length;
507 int charEnd = charIndex + charCount;
508 int byteStart = byteIndex;
509 char ch = incomplete_bytes;
511 while (charIndex < charEnd)
513 if (incomplete_bytes == char.MinValue)
514 ch = chars[charIndex++];
516 incomplete_bytes = char.MinValue;
521 bytes[byteIndex++] = (byte)ch;
524 else if (Char.IsSurrogate(ch))
527 if (charIndex == charEnd)
529 incomplete_bytes = ch;
532 char ch2 = chars[charIndex++];
533 if (!Char.IsSurrogate(ch2))
537 HandleFallback (chars, ref charIndex, ref charCount,
538 bytes, ref byteIndex, ref byteCount, null);
540 bytes [byteIndex++] = (byte) '?';
544 int cp = (ch - 0xD800) * 0x400 + ch2 - 0xDC00;
545 GB18030Source.Unlinear(bytes, byteIndex, GB18030Source.FromUCSSurrogate(cp));
551 if (ch <= 0x80 || ch == 0xFF)
553 // Character maps to itself
554 bytes[byteIndex++] = (byte)ch;
558 byte b1 = gb2312.u2n[((int)ch) * 2 + 1];
559 byte b2 = gb2312.u2n[((int)ch) * 2];
560 if (b1 != 0 && b2 != 0)
562 bytes[byteIndex++] = b1;
563 bytes[byteIndex++] = b2;
567 long value = GB18030Source.FromUCS(ch);
569 bytes[byteIndex++] = 0x3F; // invalid(?)
573 GB18030Source.Unlinear(bytes, byteIndex, value);
580 if (incomplete_bytes != char.MinValue)
581 bytes[byteIndex++] = 0x3F; // incomplete
582 incomplete_bytes = char.MinValue;
585 return byteIndex - byteStart;