5 // Atsushi Enomoto <atsushi@ximian.com>
8 using System.Reflection;
15 internal class ENCgb18030 : GB18030Encoding
17 public ENCgb18030 (): base () {}
21 public class CP54936 : GB18030Encoding { }
24 public class GB18030Encoding : MonoEncoding
27 public GB18030Encoding ()
32 public override string EncodingName {
33 get { return "Chinese Simplified (GB18030)"; }
36 public override string WebName {
37 get { return "GB18030"; }
40 public override int GetMaxByteCount (int len)
42 // non-GB2312 characters in \u0080 - \uFFFF
46 public override int GetMaxCharCount (int len)
51 public override int GetByteCount (char [] chars, int index, int length)
53 return new GB18030Encoder (this).GetByteCount (chars, index, length, true);
56 public unsafe override int GetBytesImpl (char* chars, int charCount, byte* bytes, int byteCount)
58 return new GB18030Encoder (this).GetBytesImpl (chars, charCount, bytes, byteCount, true);
61 public override int GetCharCount (byte [] bytes, int start, int len)
63 return new GB18030Decoder ().GetCharCount (bytes, start, len);
66 public override int GetChars (byte [] bytes, int byteIdx, int srclen, char [] chars, int charIdx)
68 return new GB18030Decoder ().GetChars (bytes, byteIdx, srclen, chars, charIdx);
71 public override Encoder GetEncoder ()
73 return new GB18030Encoder (this);
76 public override Decoder GetDecoder ()
78 return new GB18030Decoder ();
82 class GB18030Decoder : DbcsEncoding.DbcsDecoder
84 static DbcsConvert gb2312 = DbcsConvert.Gb2312;
85 // for now incomplete block is not supported - should we?
86 // int incomplete1 = -1, incomplete2 = -1, incomplete3 = -1;
88 public GB18030Decoder ()
93 public override int GetCharCount (byte [] bytes, int start, int len)
95 CheckRange (bytes, start, len);
97 int end = start + len;
100 if (bytes [start] < 0x80) {
105 else if (bytes [start] == 0x80) {
106 // Euro sign - actually it is obsolete,
107 // now it's just reserved but not used
112 else if (bytes [start] == 0xFF) {
113 // invalid data - fill '?'
118 else if (start + 1 >= end) {
119 // incomplete1 = bytes [start];
123 break; // incomplete tail.
126 byte second = bytes [start + 1];
127 if (second == 0x7F || second == 0xFF) {
133 else if (0x30 <= second && second <= 0x39) {
135 if (start + 3 >= end) {
137 // incomplete1 = bytes [start];
138 // incomplete2 = bytes [start + 1];
139 // if (start + 3 == end)
140 // incomplete3 = bytes [start + 2];
141 ret += start + 3 == end ? 3 : 2;
144 long value = GB18030Source.FromGBX (bytes, start);
148 start -= (int) value;
149 } else if (value >= 0x10000) {
167 public override int GetChars (byte [] bytes, int byteIndex, int byteCount, char [] chars, int charIndex)
169 CheckRange (bytes, byteIndex, byteCount, chars, charIndex);
171 int byteEnd = byteIndex + byteCount;
172 int charStart = charIndex;
174 while (byteIndex < byteEnd) {
175 if (bytes [byteIndex] < 0x80) {
176 chars [charIndex++] = (char) bytes [byteIndex++];
179 else if (bytes [byteIndex] == 0x80) {
180 // Euro sign - actually it is obsolete,
181 // now it's just reserved but not used
182 chars [charIndex++] = '\u20AC';
186 else if (bytes [byteIndex] == 0xFF) {
187 // invalid data - fill '?'
188 chars [charIndex++] = '?';
192 else if (byteIndex + 1 >= byteEnd) {
193 //incomplete1 = bytes [byteIndex++];
196 break; // incomplete tail.
199 byte second = bytes [byteIndex + 1];
200 if (second == 0x7F || second == 0xFF) {
202 chars [charIndex++] = '?';
205 else if (0x30 <= second && second <= 0x39) {
207 if (byteIndex + 3 >= byteEnd) {
209 //incomplete1 = bytes [byteIndex];
210 //incomplete2 = bytes [byteIndex + 1];
211 //if (byteIndex + 3 == byteEnd)
212 // incomplete3 = bytes [byteIndex + 2];
215 long value = GB18030Source.FromGBX (bytes, byteIndex);
218 chars [charIndex++] = '?';
219 byteIndex -= (int) value;
220 } else if (value >= 0x10000) {
223 chars [charIndex++] = (char) (value / 0x400 + 0xD800);
224 chars [charIndex++] = (char) (value % 0x400 + 0xDC00);
228 chars [charIndex++] = (char) value;
232 byte first = bytes [byteIndex];
233 int ord = ((first - 0x81) * 191 + second - 0x40) * 2;
234 char c1 = ord < 0 || ord >= gb2312.n2u.Length ?
235 '\0' : (char) (gb2312.n2u [ord] + gb2312.n2u [ord + 1] * 256);
237 chars [charIndex++] = '?';
239 chars [charIndex++] = c1;
244 return charIndex - charStart;
248 class GB18030Encoder : MonoEncoding.MonoEncoder
250 static DbcsConvert gb2312 = DbcsConvert.Gb2312;
252 public GB18030Encoder (MonoEncoding owner)
257 char incomplete_byte_count;
258 char incomplete_bytes;
260 public override int GetByteCount (char [] chars, int start, int len, bool refresh)
263 throw new ArgumentNullException ("chars");
264 if (start < 0 || start > chars.Length)
265 throw new ArgumentOutOfRangeException ("index");
266 if (len < 0 || start + len > chars.Length)
267 throw new ArgumentOutOfRangeException ("count");
269 int end = start + len;
271 while (start < end) {
272 char ch = chars [start];
278 } else if (Char.IsSurrogate (ch)) {
280 if (start + 1 == end) {
281 incomplete_byte_count = ch;
290 if (ch < 0x80 || ch == 0xFF) {
297 byte b1 = gb2312.u2n [((int) ch) * 2 + 1];
298 byte b2 = gb2312.u2n [((int) ch) * 2];
299 if (b1 != 0 && b2 != 0) {
307 long value = GB18030Source.FromUCS (ch);
316 if (incomplete_byte_count != char.MinValue)
318 incomplete_byte_count = char.MinValue;
323 public unsafe override int GetBytesImpl (char* chars, int charCount, byte* bytes, int byteCount, bool refresh)
328 EncoderFallbackBuffer buffer = null;
331 int charEnd = charIndex + charCount;
332 int byteStart = byteIndex;
333 char ch = incomplete_bytes;
335 while (charIndex < charEnd) {
336 if (incomplete_bytes == char.MinValue)
337 ch = chars [charIndex++];
339 incomplete_bytes = char.MinValue;
343 bytes [byteIndex++] = (byte) ch;
345 } else if (Char.IsSurrogate (ch)) {
347 if (charIndex == charEnd) {
348 incomplete_bytes = ch;
351 char ch2 = chars [charIndex++];
352 if (!Char.IsSurrogate (ch2)) {
356 chars, ref charIndex, ref charCount,
357 bytes, ref byteIndex, ref byteCount);
359 bytes [byteIndex++] = (byte) '?';
363 int cp = (ch - 0xD800) * 0x400 + ch2 - 0xDC00;
364 GB18030Source.Unlinear (bytes + byteIndex, GB18030Source.FromUCSSurrogate (cp));
370 if (ch <= 0x80 || ch == 0xFF) {
371 // Character maps to itself
372 bytes [byteIndex++] = (byte) ch;
376 byte b1 = gb2312.u2n [((int) ch) * 2 + 1];
377 byte b2 = gb2312.u2n [((int) ch) * 2];
378 if (b1 != 0 && b2 != 0) {
379 bytes [byteIndex++] = b1;
380 bytes [byteIndex++] = b2;
384 long value = GB18030Source.FromUCS (ch);
386 bytes [byteIndex++] = 0x3F; // invalid(?)
389 GB18030Source.Unlinear (bytes + byteIndex, value);
395 if (incomplete_bytes != char.MinValue)
396 bytes [byteIndex++] = 0x3F; // incomplete
397 incomplete_bytes = char.MinValue;
400 return byteIndex - byteStart;