Merge pull request #571 from igotti-google/jt2
[mono.git] / mcs / class / I18N / CJK / CP950.cs
1 //
2 // I18N.CJK.CP950
3 //
4 // Author:
5 //   Alan Tam Siu Lung (Tam@SiuLung.com)
6 //   Atsushi Enomoto  <atsushi@ximian.com>
7 //
8
9 using System;
10 using System.Text;
11 using I18N.Common;
12
13 namespace I18N.CJK
14 {
15         [Serializable]
16         internal class CP950 : DbcsEncoding
17         {
18                 // Magic number used by Windows for the Big5 code page.
19                 private const int BIG5_CODE_PAGE = 950;
20                 
21                 // Constructor.
22                 public CP950() : base(BIG5_CODE_PAGE) {
23                 }
24
25                 internal override DbcsConvert GetConvert ()
26                 {
27                         return DbcsConvert.Big5;
28                 }
29
30 #if !DISABLE_UNSAFE
31                 // Get the bytes that result from encoding a character buffer.
32                 public unsafe override int GetByteCountImpl (char* chars, int count)
33                 {
34                         DbcsConvert convert = GetConvert ();
35                         int index = 0;
36                         int length = 0;
37
38                         while (count-- > 0) {
39                                 char c = chars[index++];
40                                 if (c <= 0x80 || c == 0xFF) { // ASCII
41                                         length++;
42                                         continue;
43                                 }
44                                 byte b1 = convert.u2n[((int)c) * 2 + 1];
45                                 byte b2 = convert.u2n[((int)c) * 2];
46                                 if (b1 == 0 && b2 == 0) {
47 #if NET_2_0
48                                         // FIXME: handle fallback for GetByteCountImpl().
49                                         length++;
50 #else
51                                         length++;
52 #endif
53                                 }
54                                 else
55                                         length += 2;
56                         }
57                         return length;
58                 }
59
60                 // Get the bytes that result from encoding a character buffer.
61                 public unsafe override int GetBytesImpl (char* chars, int charCount,
62                                              byte* bytes, int byteCount)
63                 {
64                         DbcsConvert convert = GetConvert ();
65                         int charIndex = 0;
66                         int byteIndex = 0;
67                         int end = charCount;
68 #if NET_2_0
69                         EncoderFallbackBuffer buffer = null;
70 #endif
71
72                         int origIndex = byteIndex;
73                         for (int i = charIndex; i < end; i++, charCount--) 
74                         {
75                                 char c = chars[i];
76                                 if (c <= 0x80 || c == 0xFF) { // ASCII
77                                         bytes[byteIndex++] = (byte)c;
78                                         continue;
79                                 }
80                                 byte b1 = convert.u2n[((int)c) * 2 + 1];
81                                 byte b2 = convert.u2n[((int)c) * 2];
82                                 if (b1 == 0 && b2 == 0) {
83 #if NET_2_0
84                                         HandleFallback (ref buffer, chars,
85                                                 ref i, ref charCount,
86                                                 bytes, ref byteIndex, ref byteCount, null);
87 #else
88                                         bytes[byteIndex++] = (byte)'?';
89 #endif
90                                 } else {
91                                         bytes[byteIndex++] = b1;
92                                         bytes[byteIndex++] = b2;
93                                 }
94                         }
95                         return byteIndex - origIndex;
96                 }
97 #else
98                 // Get the bytes that result from encoding a character buffer.
99                 public override int GetByteCount(char[] chars, int index, int count)
100                 {
101                         DbcsConvert convert = GetConvert();
102                         int length = 0;
103
104                         while (count-- > 0)
105                         {
106                                 char c = chars[index++];
107                                 if (c <= 0x80 || c == 0xFF)
108                                 { // ASCII
109                                         length++;
110                                         continue;
111                                 }
112                                 byte b1 = convert.u2n[((int)c) * 2 + 1];
113                                 byte b2 = convert.u2n[((int)c) * 2];
114                                 if (b1 == 0 && b2 == 0)
115                                 {
116 #if NET_2_0
117                                         // FIXME: handle fallback for GetByteCountImpl().
118                                         length++;
119 #else
120                                         length++;
121 #endif
122                                 }
123                                 else
124                                         length += 2;
125                         }
126                         return length;
127                 }
128
129                 // Get the bytes that result from encoding a character buffer.
130                 public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
131                 {
132                         int byteCount = bytes.Length;
133                         int end = charIndex + charCount;
134
135                         DbcsConvert convert = GetConvert();
136 #if NET_2_0
137                         EncoderFallbackBuffer buffer = null;
138 #endif
139
140                         int origIndex = byteIndex;
141                         for (int i = charIndex; i < end; i++, charCount--)
142                         {
143                                 char c = chars[i];
144                                 if (c <= 0x80 || c == 0xFF)
145                                 { // ASCII
146                                         bytes[byteIndex++] = (byte)c;
147                                         continue;
148                                 }
149                                 byte b1 = convert.u2n[((int)c) * 2 + 1];
150                                 byte b2 = convert.u2n[((int)c) * 2];
151                                 if (b1 == 0 && b2 == 0)
152                                 {
153 #if NET_2_0
154                                         HandleFallback (ref buffer, chars, ref i, ref charCount,
155                                                 bytes, ref byteIndex, ref byteCount, null);
156 #else
157                                         bytes[byteIndex++] = (byte)'?';
158 #endif
159                                 }
160                                 else
161                                 {
162                                         bytes[byteIndex++] = b1;
163                                         bytes[byteIndex++] = b2;
164                                 }
165                         }
166                         return byteIndex - origIndex;
167                 }
168 #endif
169                 // Get the characters that result from decoding a byte buffer.
170                 public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
171                                              char[] chars, int charIndex)
172                 {
173                         /*
174                         DbcsConvert convert = GetConvert ();
175                         // A1 40 - FA FF
176                         base.GetChars(bytes, byteIndex, byteCount, chars, charIndex);
177                         int origIndex = charIndex;
178                         int lastByte = 0;
179                         while (byteCount-- > 0) {
180                                 int b = bytes[byteIndex++];
181                                 if (lastByte == 0) {
182                                         if (b <= 0x80 || b == 0xFF) { // ASCII
183                                                 chars[charIndex++] = (char)b;
184                                         } else if (b < 0xA1 || b >= 0xFA) {
185                                                 // incorrect first byte.
186                                                 chars[charIndex++] = '?';
187                                                 byteCount--; // cut one more byte.
188                                         } else {
189                                                 lastByte = b;
190                                         }
191                                         continue;
192                                 }
193                                 int ord = ((lastByte - 0xA1) * 191 + b - 0x40) * 2;
194                                 char c1 = ord < 0 || ord > convert.n2u.Length ?
195                                         '\0' :
196                                         (char)(convert.n2u[ord] + convert.n2u[ord + 1] * 256);
197                                 if (c1 == 0)
198                                         chars[charIndex++] = '?';
199                                 else
200                                         chars[charIndex++] = c1;
201                                 lastByte = 0;
202                         }
203                         if (lastByte != 0)
204                                 chars[charIndex++] = '?';
205
206                         return charIndex - origIndex;
207                         */
208
209                         return GetDecoder ().GetChars (bytes, byteIndex, byteCount, chars, charIndex);
210                 }
211                 
212                 // Get a decoder that handles a rolling Big5 state.
213                 public override Decoder GetDecoder()
214                 {
215                         return new CP950Decoder(GetConvert ());
216                 }
217                 
218                 // Get the mail body name for this encoding.
219                 public override String BodyName
220                 {
221                         get { return "big5"; }
222                 }
223                 
224                 // Get the human-readable name for this encoding.
225                 public override String EncodingName
226                 {
227                         get { return "Chinese Traditional (Big5)"; }
228                 }
229                 
230                 // Get the mail agent header name for this encoding.
231                 public override String HeaderName
232                 {
233                         get { return "big5"; }
234                 }
235                 
236                 // Get the IANA-preferred Web name for this encoding.
237                 public override String WebName
238                 {
239                         get { return "big5"; }
240                 }
241                 
242                 /*
243                 // Get the Windows code page represented by this object.
244                 public override int WindowsCodePage
245                 {
246                         get { return BIG5_PAGE; }
247                 }
248                 */
249                 
250                 // Decoder that handles a rolling Big5 state.
251                 private sealed class CP950Decoder : DbcsDecoder
252                 {
253                         // Constructor.
254                         public CP950Decoder(DbcsConvert convert) : base(convert) {}
255                         int last_byte_count, last_byte_conv;
256
257                         public override int GetCharCount (byte[] bytes, int index, int count)
258                         {
259                                 return GetCharCount (bytes, index, count, false);
260                         }
261
262 #if NET_2_0
263                         public override
264 #endif
265                         int GetCharCount (byte[] bytes, int index, int count, bool refresh)
266                         {
267                                 CheckRange (bytes, index, count);
268
269                                 int lastByte = last_byte_count;
270                                 last_byte_count = 0;
271                                 int length = 0;
272                                 while (count-- > 0) {
273                                         int b = bytes[index++];
274                                         if (lastByte == 0) {
275                                                 if (b <= 0x80 || b == 0xFF) { // ASCII
276                                                         length++;
277                                                 } else if (b < 0xA1 || b >= 0xFA) {
278                                                         // incorrect first byte.
279                                                         length++;
280                                                         count--; // cut one more byte.
281                                                 } else {
282                                                         lastByte = b;
283                                                 }
284                                                 continue;
285                                         }
286                                         int ord = ((lastByte - 0xA1) * 191 + b - 0x40) * 2;
287                                         char c1 = ord < 0 || ord > convert.n2u.Length ?
288                                                 '\0' :
289                                                 (char)(convert.n2u[ord] + convert.n2u[ord + 1] * 256);
290                                         if (c1 == 0)
291                                                 // FIXME: fallback
292                                                 length++;
293                                         else
294                                                 length++;
295                                         lastByte = 0;
296                                 }
297
298                                 if (lastByte != 0) {
299                                         if (refresh)
300                                                 // FIXME: fallback
301                                                 length++;
302                                         else
303                                                 last_byte_count = lastByte;
304                                 }
305                                 return length;
306                         }
307
308                         public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
309                                                      char[] chars, int charIndex)
310                         {
311                                 return GetChars (bytes, byteIndex, byteCount, chars, charIndex, false);
312                         }
313
314 #if NET_2_0
315                         public override
316 #endif
317                         int GetChars(byte[] bytes, int byteIndex, int byteCount,
318                                                      char[] chars, int charIndex, bool refresh)
319                         {
320                                 CheckRange (bytes, byteIndex, byteCount, chars, charIndex);
321
322                                 int origIndex = charIndex;
323                                 int lastByte = last_byte_conv;
324                                 last_byte_conv = 0;
325                                 while (byteCount-- > 0) {
326                                         int b = bytes[byteIndex++];
327                                         if (lastByte == 0) {
328                                                 if (b <= 0x80 || b == 0xFF) { // ASCII
329                                                         chars[charIndex++] = (char)b;
330                                                 } else if (b < 0xA1 || b >= 0xFA) {
331                                                         // incorrect first byte.
332                                                         chars[charIndex++] = '?';
333                                                         byteCount--; // cut one more byte.
334                                                 } else {
335                                                         lastByte = b;
336                                                 }
337                                                 continue;
338                                         }
339                                         int ord = ((lastByte - 0xA1) * 191 + b - 0x40) * 2;
340                                         char c1 = ord < 0 || ord > convert.n2u.Length ?
341                                                 '\0' :
342                                                 (char)(convert.n2u[ord] + convert.n2u[ord + 1] * 256);
343                                         if (c1 == 0)
344                                                 chars[charIndex++] = '?';
345                                         else
346                                                 chars[charIndex++] = c1;
347                                         lastByte = 0;
348                                 }
349
350                                 if (lastByte != 0) {
351                                         if (refresh)
352                                                 chars [charIndex++] = '?';
353                                         else
354                                                 last_byte_conv = lastByte;
355                                 }
356                                 return charIndex - origIndex;
357                         }
358                 }
359         }
360         
361         [Serializable]
362         internal class ENCbig5 : CP950
363         {
364                 public ENCbig5() {}
365         }
366 }