5 // Atsushi Enomoto <atsushi@ximian.com>
8 using System.Reflection;
13 internal class ENCgb18030 : GB18030Encoding
15 public ENCgb18030 (): base () {}
18 public class CP54936 : GB18030Encoding { }
20 public class GB18030Encoding : Encoding
23 public GB18030Encoding ()
28 public override string EncodingName {
29 get { return "Chinese Simplified (GB18030)"; }
32 public override string WebName {
33 get { return "GB18030"; }
36 public override int GetMaxByteCount (int len)
38 // non-GB2312 characters in \u0080 - \uFFFF
42 public override int GetMaxCharCount (int len)
47 public override int GetByteCount (char [] chars, int index, int length)
49 return new GB18030Encoder ().GetByteCount (chars, index, length, true);
52 public override int GetBytes (char [] chars, int charIdx, int srclen, byte [] bytes, int byteIdx)
54 return new GB18030Encoder ().GetBytes (chars, charIdx, srclen, bytes, byteIdx, true);
57 public override int GetCharCount (byte [] bytes, int start, int len)
59 return new GB18030Decoder ().GetCharCount (bytes, start, len);
62 public override int GetChars (byte [] bytes, int byteIdx, int srclen, char [] chars, int charIdx)
64 return new GB18030Decoder ().GetChars (bytes, byteIdx, srclen, chars, charIdx);
68 class GB18030Decoder : Decoder
70 Gb2312Convert gb2312 = Gb2312Convert.Convert;
71 // for now incomplete block is not supported - should we?
72 // int incomplete1 = -1, incomplete2 = -1, incomplete3 = -1;
74 public override int GetCharCount (byte [] bytes, int start, int len)
77 throw new ArgumentNullException ("bytes");
78 if (start < 0 || start > bytes.Length)
79 throw new ArgumentOutOfRangeException ("start");
80 if (len < 0 || start + len > bytes.Length)
81 throw new ArgumentOutOfRangeException ("len");
83 int end = start + len;
86 if (bytes [start] < 0x80) {
91 else if (bytes [start] == 0x80) {
92 // Euro sign - actually it is obsolete,
93 // now it's just reserved but not used
98 else if (bytes [start] == 0xFF) {
99 // invalid data - fill '?'
104 else if (start + 1 >= end) {
105 // incomplete1 = bytes [start];
109 break; // incomplete tail.
112 byte second = bytes [start + 1];
113 if (second == 0x7F || second == 0xFF) {
119 else if (0x30 <= second && second <= 0x39) {
121 if (start + 3 >= end) {
123 // incomplete1 = bytes [start];
124 // incomplete2 = bytes [start + 1];
125 // if (start + 3 == end)
126 // incomplete3 = bytes [start + 2];
127 ret += start + 3 == end ? 3 : 2;
130 long value = GB18030Source.FromGBX (bytes, start);
134 start -= (int) value;
135 } else if (value >= 0x10000) {
153 public override int GetChars (byte [] bytes, int byteIndex, int byteCount, char [] chars, int charIndex)
156 throw new ArgumentNullException ("bytes");
158 throw new ArgumentNullException ("chars");
159 if (byteIndex < 0 || byteIndex > bytes.Length)
160 throw new ArgumentOutOfRangeException ("byteIndex");
161 if (byteCount < 0 || byteIndex + byteCount > bytes.Length)
162 throw new ArgumentOutOfRangeException ("byteCount");
163 if (charIndex < 0 || charIndex > chars.Length)
164 throw new ArgumentOutOfRangeException ("charIndex");
166 int byteEnd = byteIndex + byteCount;
167 int charStart = charIndex;
169 while (byteIndex < byteEnd) {
170 if (bytes [byteIndex] < 0x80) {
171 chars [charIndex++] = (char) bytes [byteIndex++];
174 else if (bytes [byteIndex] == 0x80) {
175 // Euro sign - actually it is obsolete,
176 // now it's just reserved but not used
177 chars [charIndex++] = '\u20AC';
181 else if (bytes [byteIndex] == 0xFF) {
182 // invalid data - fill '?'
183 chars [charIndex++] = '?';
187 else if (byteIndex + 1 >= byteEnd) {
188 //incomplete1 = bytes [byteIndex++];
191 break; // incomplete tail.
194 byte second = bytes [byteIndex + 1];
195 if (second == 0x7F || second == 0xFF) {
197 chars [charIndex++] = '?';
200 else if (0x30 <= second && second <= 0x39) {
202 if (byteIndex + 3 >= byteEnd) {
204 //incomplete1 = bytes [byteIndex];
205 //incomplete2 = bytes [byteIndex + 1];
206 //if (byteIndex + 3 == byteEnd)
207 // incomplete3 = bytes [byteIndex + 2];
210 long value = GB18030Source.FromGBX (bytes, byteIndex);
213 chars [charIndex++] = '?';
214 byteIndex -= (int) value;
215 } else if (value >= 0x10000) {
218 chars [charIndex++] = (char) (value / 0x400 + 0xD800);
219 chars [charIndex++] = (char) (value % 0x400 + 0xDC00);
223 chars [charIndex++] = (char) value;
227 // GB2312 mapping, or invalid.
228 // ('second' is always valid here).
229 int head = bytes [byteIndex];
230 char c = gb2312.BytePairToChar (ref head, second);
232 chars [charIndex++] = c == char.MinValue ? '?' : c;
236 return charIndex - charStart;
240 class GB18030Encoder : Encoder
242 Gb2312Convert gb2312 = Gb2312Convert.Convert;
245 public override int GetByteCount (char [] chars, int start, int len, bool refresh)
248 incomplete = char.MinValue;
251 throw new ArgumentNullException ("chars");
252 if (start < 0 || start > chars.Length)
253 throw new ArgumentOutOfRangeException ("index");
254 if (len < 0 || start + len > chars.Length)
255 throw new ArgumentOutOfRangeException ("count");
257 int end = start + len;
259 while (start < end) {
260 char ch = chars [start];
266 } else if (Char.IsSurrogate (ch)) {
268 if (start + 1 == end)
275 if (ch < 0x80 || ch == 0xFF) {
281 long value = gb2312.UcsToGbk (ch);
296 public override int GetBytes (char [] chars, int charIndex, int charCount, byte [] bytes, int byteIndex, bool refresh)
299 throw new ArgumentNullException ("chars");
301 throw new ArgumentNullException ("bytes");
302 if (charIndex < 0 || charIndex > chars.Length)
303 throw new ArgumentOutOfRangeException ("charIndex");
304 if (charCount < 0 || charIndex + charCount > chars.Length)
305 throw new ArgumentOutOfRangeException ("charCount");
306 if (byteIndex < 0 || byteIndex > bytes.Length)
307 throw new ArgumentOutOfRangeException ("byteIndex");
309 int charEnd = charIndex + charCount;
310 int byteStart = byteIndex;
311 char ch = incomplete;
313 while (charIndex < charEnd) {
314 if (incomplete == char.MinValue)
315 ch = chars [charIndex++];
317 incomplete = char.MinValue;
321 bytes [byteIndex++] = (byte) ch;
323 } else if (Char.IsSurrogate (ch)) {
325 if (charIndex == charEnd) {
329 char ch2 = chars [charIndex++];
330 if (!Char.IsSurrogate (ch2)) {
332 bytes [byteIndex++] = (byte) '?';
335 int cp = (ch - 0xD800) * 0x400 + ch2 - 0xDC00;
336 GB18030Source.Unlinear (bytes, byteIndex, GB18030Source.FromUCSSurrogate (cp));
342 if (ch <= 0x80 || ch == 0xFF) {
343 // Character maps to itself
344 bytes [byteIndex++] = (byte) ch;
348 long value = gb2312.UcsToGbk (ch);
350 bytes [byteIndex++] = (byte) (value / 0x100);
351 bytes [byteIndex++] = (byte) (value % 0x100);
355 value = GB18030Source.FromUCS (ch);
357 GB18030Source.Unlinear (bytes, byteIndex, value);
360 return byteIndex - byteStart;