5 // Atsushi Enomoto <atsushi@ximian.com>
7 // (new implementation based on CP950.)
17 internal class CP936 : DbcsEncoding
19 // Magic number used by Windows for the Gb2312 code page.
20 private const int GB2312_CODE_PAGE = 936;
23 public CP936() : base(GB2312_CODE_PAGE) {
26 internal override DbcsConvert GetConvert ()
28 return DbcsConvert.Gb2312;
32 // Get the bytes that result from encoding a character buffer.
33 public unsafe override int GetByteCountImpl (char* chars, int count)
35 return GetBytesImpl(chars, count, null, 0);
38 // Get the bytes that result from encoding a character buffer.
39 public unsafe override int GetBytesImpl (char* chars, int charCount, byte* bytes, int byteCount)
41 DbcsConvert gb2312 = GetConvert ();
46 EncoderFallbackBuffer buffer = null;
49 int origIndex = byteIndex;
50 for (int i = charIndex; i < end; i++, charCount--) {
52 if (c <= 0x80 || c == 0xFF) { // ASCII
53 int offset = byteIndex++;
54 if (bytes != null) bytes[offset] = (byte)c;
57 byte b1 = gb2312.u2n[((int)c) * 2 + 1];
58 byte b2 = gb2312.u2n[((int)c) * 2];
59 if (b1 == 0 && b2 == 0) {
61 HandleFallback (ref buffer, chars,
63 bytes, ref byteIndex, ref byteCount, null);
65 int offset = byteIndex++;
66 if (bytes != null) bytes[offset] = (byte)'?';
71 bytes[byteIndex++] = b1;
72 bytes[byteIndex++] = b2;
80 return byteIndex - origIndex;
83 protected int GetBytesInternal(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
85 int origIndex = byteIndex;
86 int end = charIndex + charCount;
87 int byteCount = bytes != null ? bytes.Length : 0;
89 DbcsConvert gb2312 = GetConvert();
91 EncoderFallbackBuffer buffer = null;
93 for (int i = charIndex; i < end; i++, charCount--)
96 if (c <= 0x80 || c == 0xFF)
98 int offset = byteIndex++;
99 if (bytes != null) bytes[offset] = (byte)c;
102 byte b1 = gb2312.u2n[((int)c) * 2 + 1];
103 byte b2 = gb2312.u2n[((int)c) * 2];
104 if (b1 == 0 && b2 == 0)
107 HandleFallback (ref buffer, chars, ref i, ref charCount,
108 bytes, ref byteIndex, ref byteCount, null);
110 int offset = byteIndex++;
111 if (bytes != null) bytes[] = (byte)'?';
118 bytes[byteIndex++] = b1;
119 bytes[byteIndex++] = b2;
127 return byteIndex - origIndex;
130 // Get the bytes that result from encoding a character buffer.
131 public override int GetByteCount(char[] chars, int index, int count)
133 return GetBytes(chars, index, count, null, 0);
136 // Get the bytes that result from encoding a character buffer.
137 public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
139 return GetBytesInternal(chars, charIndex, charCount, bytes, byteIndex);
142 // Get the characters that result from decoding a byte buffer.
143 public override int GetCharCount (byte [] bytes, int index, int count)
145 return GetDecoder ().GetCharCount (bytes, index, count);
148 // Get the characters that result from decoding a byte buffer.
149 public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
150 char[] chars, int charIndex)
152 return GetDecoder ().GetChars (
153 bytes, byteIndex, byteCount, chars, charIndex);
156 // Get a decoder that handles a rolling Gb2312 state.
157 public override Decoder GetDecoder()
159 return new CP936Decoder(GetConvert ());
162 // Get the mail body name for this encoding.
163 public override String BodyName
165 get { return("gb2312"); }
168 // Get the human-readable name for this encoding.
169 public override String EncodingName
171 get { return("Chinese Simplified (GB2312)"); }
174 // Get the mail agent header name for this encoding.
175 public override String HeaderName
177 get { return("gb2312"); }
180 // Determine if this encoding can be displayed in a Web browser.
181 public override bool IsBrowserDisplay
183 get { return(true); }
186 // Determine if this encoding can be saved from a Web browser.
187 public override bool IsBrowserSave
189 get { return(true); }
192 // Determine if this encoding can be displayed in a mail/news agent.
193 public override bool IsMailNewsDisplay
195 get { return(true); }
198 // Determine if this encoding can be saved from a mail/news agent.
199 public override bool IsMailNewsSave
201 get { return(true); }
204 // Get the IANA-preferred Web name for this encoding.
205 public override String WebName
207 get { return("gb2312"); }
211 // Decoder that handles a rolling Gb2312 state.
212 sealed class CP936Decoder : DbcsEncoding.DbcsDecoder
215 public CP936Decoder (DbcsConvert convert)
220 int last_byte_count, last_byte_bytes;
222 // Get the characters that result from decoding a byte buffer.
223 public override int GetCharCount (byte [] bytes, int index, int count)
225 return GetCharCount (bytes, index, count, false);
231 int GetCharCount (byte [] bytes, int index, int count, bool refresh)
233 CheckRange (bytes, index, count);
235 int lastByte = last_byte_count;
238 while (count-- > 0) {
239 int b = bytes [index++];
241 if (b <= 0x80 || b == 0xFF) { // ASCII
259 last_byte_count = lastByte;
265 public override int GetChars (byte[] bytes, int byteIndex, int byteCount,
266 char[] chars, int charIndex)
268 return GetChars (bytes, byteIndex, byteCount, chars, charIndex, false);
274 int GetChars (byte [] bytes, int byteIndex, int byteCount,
275 char [] chars, int charIndex, bool refresh)
277 CheckRange (bytes, byteIndex, byteCount, chars, charIndex);
279 int origIndex = charIndex;
280 int lastByte = last_byte_bytes;
282 while (byteCount-- > 0) {
283 int b = bytes[byteIndex++];
285 if (b <= 0x80 || b == 0xFF) { // ASCII
286 chars[charIndex++] = (char)b;
288 } else if (b < 0x81 || b >= 0xFF) {
295 int ord = ((lastByte - 0x81) * 191 + b - 0x40) * 2;
296 char c1 = ord < 0 || ord >= convert.n2u.Length ?
297 '\0' : (char) (convert.n2u[ord] + convert.n2u[ord + 1] * 256);
299 chars[charIndex++] = '?';
301 chars[charIndex++] = c1;
307 // FIXME: handle fallback
308 chars [charIndex++] = '?';
312 last_byte_bytes = lastByte;
315 return charIndex - origIndex;
320 internal class ENCgb2312 : CP936
322 public ENCgb2312(): base () {}