Merge branch 'alexischr/nursery-canaries-managed-alloc'
[mono.git] / mcs / class / I18N / CJK / CP936.cs
1 //
2 // I18N.CJK.CP936.cs
3 //
4 // Author:
5 //      Atsushi Enomoto <atsushi@ximian.com>
6 //
7 // (new implementation based on CP950.)
8 //
9
10 using System;
11 using System.Text;
12 using I18N.Common;
13
14 namespace I18N.CJK
15 {
16         [Serializable]
17         internal class CP936 : DbcsEncoding
18         {
19                 // Magic number used by Windows for the Gb2312 code page.
20                 private const int GB2312_CODE_PAGE = 936;
21                 
22                 // Constructor.
23                 public CP936() : base(GB2312_CODE_PAGE) {
24                 }
25
26                 internal override DbcsConvert GetConvert ()
27                 {
28                         return DbcsConvert.Gb2312;
29                 }
30
31 #if !DISABLE_UNSAFE
32                 // Get the bytes that result from encoding a character buffer.
33                 public unsafe override int GetByteCountImpl (char* chars, int count)
34                 {
35                         return GetBytesImpl(chars, count, null, 0);
36                 }
37
38                 // Get the bytes that result from encoding a character buffer.
39                 public unsafe override int GetBytesImpl (char* chars, int charCount, byte* bytes, int byteCount)
40                 {
41                         DbcsConvert gb2312 = GetConvert ();
42                         int charIndex = 0;
43                         int byteIndex = 0;
44                         int end = charCount;
45                         EncoderFallbackBuffer buffer = null;
46
47                         int origIndex = byteIndex;
48                         for (int i = charIndex; i < end; i++, charCount--) {
49                                 char c = chars[i];
50                                 if (c <= 0x80 || c == 0xFF) { // ASCII
51                                         int offset = byteIndex++;
52                                         if (bytes != null) bytes[offset] = (byte)c;
53                                         continue;
54                                 }
55                                 byte b1 = gb2312.u2n[((int)c) * 2 + 1];
56                                 byte b2 = gb2312.u2n[((int)c) * 2];
57                                 if (b1 == 0 && b2 == 0) {
58                                         HandleFallback (ref buffer, chars,
59                                                 ref i, ref charCount,
60                                                 bytes, ref byteIndex, ref byteCount, null);
61                                 } else {
62                                         if (bytes != null)
63                                         {
64                                                 bytes[byteIndex++] = b1;
65                                                 bytes[byteIndex++] = b2;
66                                         }
67                                         else
68                                         {
69                                                 byteIndex += 2;
70                                         }
71                                 }
72                         }
73                         return byteIndex - origIndex;
74                 }
75 #else
76                 protected int GetBytesInternal(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
77                 {
78                         int origIndex = byteIndex;
79                         int end = charIndex + charCount;
80                         int byteCount = bytes != null ? bytes.Length : 0;
81
82                         DbcsConvert gb2312 = GetConvert();
83                         EncoderFallbackBuffer buffer = null;
84                         for (int i = charIndex; i < end; i++, charCount--)
85                         {
86                                 char c = chars[i];
87                                 if (c <= 0x80 || c == 0xFF)
88                                 { // ASCII
89                                         int offset = byteIndex++;
90                                         if (bytes != null) bytes[offset] = (byte)c;
91                                         continue;
92                                 }
93                                 byte b1 = gb2312.u2n[((int)c) * 2 + 1];
94                                 byte b2 = gb2312.u2n[((int)c) * 2];
95                                 if (b1 == 0 && b2 == 0)
96                                 {
97                                         HandleFallback (ref buffer, chars, ref i, ref charCount,
98                                                 bytes, ref byteIndex, ref byteCount, null);
99                                 }
100                                 else
101                                 {
102                                         if (bytes != null)
103                                         {
104                                                 bytes[byteIndex++] = b1;
105                                                 bytes[byteIndex++] = b2;
106                                         }
107                                         else
108                                         {
109                                                 byteIndex += 2;
110                                         }
111                                 }
112                         }
113                         return byteIndex - origIndex;
114                 }
115
116                 // Get the bytes that result from encoding a character buffer.
117                 public override int GetByteCount(char[] chars, int index, int count)
118                 {
119                         return GetBytes(chars, index, count, null, 0);
120                 }
121
122                 // Get the bytes that result from encoding a character buffer.
123                 public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
124                 {
125                         return GetBytesInternal(chars, charIndex, charCount, bytes, byteIndex);
126                 }
127 #endif
128                 // Get the characters that result from decoding a byte buffer.
129                 public override int GetCharCount (byte [] bytes, int index, int count)
130                 {
131                         return GetDecoder ().GetCharCount (bytes, index, count);
132                 }
133
134                 // Get the characters that result from decoding a byte buffer.
135                 public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
136                                              char[] chars, int charIndex)
137                 {
138                         return GetDecoder ().GetChars (
139                                 bytes, byteIndex, byteCount, chars, charIndex);
140                 }
141                 
142                 // Get a decoder that handles a rolling Gb2312 state.
143                 public override Decoder GetDecoder()
144                 {
145                         return new CP936Decoder(GetConvert ());
146                 }
147                 
148                 // Get the mail body name for this encoding.
149                 public override String BodyName
150                 {
151                         get { return("gb2312"); }
152                 }
153                 
154                 // Get the human-readable name for this encoding.
155                 public override String EncodingName
156                 {
157                         get { return("Chinese Simplified (GB2312)"); }
158                 }
159                 
160                 // Get the mail agent header name for this encoding.
161                 public override String HeaderName
162                 {
163                         get { return("gb2312"); }
164                 }
165                 
166                 // Determine if this encoding can be displayed in a Web browser.
167                 public override bool IsBrowserDisplay
168                 {
169                         get { return(true); }
170                 }
171                 
172                 // Determine if this encoding can be saved from a Web browser.
173                 public override bool IsBrowserSave
174                 {
175                         get { return(true); }
176                 }
177                 
178                 // Determine if this encoding can be displayed in a mail/news agent.
179                 public override bool IsMailNewsDisplay
180                 {
181                         get { return(true); }
182                 }
183                 
184                 // Determine if this encoding can be saved from a mail/news agent.
185                 public override bool IsMailNewsSave
186                 {
187                         get { return(true); }
188                 }
189                 
190                 // Get the IANA-preferred Web name for this encoding.
191                 public override String WebName
192                 {
193                         get { return("gb2312"); }
194                 }
195         }
196
197         // Decoder that handles a rolling Gb2312 state.
198         sealed class CP936Decoder : DbcsEncoding.DbcsDecoder
199         {
200                 // Constructor.
201                 public CP936Decoder (DbcsConvert convert)
202                         : base (convert)
203                 {
204                 }
205
206                 int last_byte_count, last_byte_bytes;
207
208                 // Get the characters that result from decoding a byte buffer.
209                 public override int GetCharCount (byte [] bytes, int index, int count)
210                 {
211                         return GetCharCount (bytes, index, count, false);
212                 }
213
214                 public override
215                 int GetCharCount (byte [] bytes, int index, int count, bool refresh)
216                 {
217                         CheckRange (bytes, index, count);
218
219                         int lastByte = last_byte_count;
220                         last_byte_count = 0;
221                         int length = 0;
222                         while (count-- > 0) {
223                                 int b = bytes [index++];
224                                 if (lastByte == 0) {
225                                         if (b <= 0x80 || b == 0xFF) { // ASCII
226                                                 length++;
227                                                 continue;
228                                         } else {
229                                                 lastByte = b;
230                                                 continue;
231                                         }
232                                 }
233                                 length++;
234                                 lastByte = 0;
235                         }
236
237                         if (lastByte != 0) {
238                                 if (refresh) {
239                                         length++;
240                                         last_byte_count = 0;
241                                 }
242                                 else
243                                         last_byte_count = lastByte;
244                         }
245
246                         return length;
247                 }
248
249                 public override int GetChars (byte[] bytes, int byteIndex, int byteCount,
250                                              char[] chars, int charIndex)
251                 {
252                         return GetChars (bytes, byteIndex, byteCount, chars, charIndex, false);
253                 }
254
255                 public override
256                 int GetChars (byte [] bytes, int byteIndex, int byteCount,
257                               char [] chars, int charIndex, bool refresh)
258                 {
259                         CheckRange (bytes, byteIndex, byteCount, chars, charIndex);
260
261                         int origIndex = charIndex;
262                         int lastByte = last_byte_bytes;
263                         last_byte_bytes = 0;
264                         while (byteCount-- > 0) {
265                                 int b = bytes[byteIndex++];
266                                 if (lastByte == 0) {
267                                         if (b <= 0x80 || b == 0xFF) { // ASCII
268                                                 chars[charIndex++] = (char)b;
269                                                 continue;
270                                         } else if (b < 0x81 || b >= 0xFF) {
271                                                 continue;
272                                         } else {
273                                                 lastByte = b;
274                                                 continue;
275                                         }
276                                 }
277                                 int ord = ((lastByte - 0x81) * 191 + b - 0x40) * 2;
278                                 char c1 = ord < 0 || ord >= convert.n2u.Length ?
279                                         '\0' : (char) (convert.n2u[ord] + convert.n2u[ord + 1] * 256);
280                                 if (c1 == 0)
281                                         chars[charIndex++] = '?';
282                                 else
283                                         chars[charIndex++] = c1;
284                                 lastByte = 0;
285                         }
286
287                         if (lastByte != 0) {
288                                 if (refresh) {
289                                         // FIXME: handle fallback
290                                         chars [charIndex++] = '?';
291                                         last_byte_bytes = 0;
292                                 }
293                                 else
294                                         last_byte_bytes = lastByte;
295                         }
296
297                         return charIndex - origIndex;
298                 }
299         }
300         
301         [Serializable]
302         internal class ENCgb2312 : CP936
303         {
304                 public ENCgb2312(): base () {}
305         }
306 }