Merge pull request #622 from killabytenow/master
[mono.git] / mcs / class / I18N / CJK / CP949.cs
1 //
2 // I18N.CJK.CP949
3 //
4 // Author:
5 //   Hye-Shik Chang (perky@FreeBSD.org)
6 //   Atsushi Enomoto  <atsushi@ximian.com>
7 //
8
9 using System;
10 using System.Text;
11 using I18N.Common;
12
13 namespace I18N.CJK
14 {
15     [Serializable]
16     internal class CP949 : KoreanEncoding
17     {
18         // Magic number used by Windows for the UHC code page.
19         private const int UHC_CODE_PAGE = 949;
20
21         // Constructor.
22         public CP949 () : base (UHC_CODE_PAGE, true)
23         {
24         }
25
26         // Get the mail body name for this encoding.
27         public override String BodyName
28         {
29             get { return "ks_c_5601-1987"; }
30         }
31
32         // Get the human-readable name for this encoding.
33         public override String EncodingName
34         {
35             get { return "Korean (UHC)"; }
36         }
37
38         // Get the mail agent header name for this encoding.
39         public override String HeaderName
40         {
41             get { return "ks_c_5601-1987"; }
42         }
43
44         // Get the IANA-preferred Web name for this encoding.
45         public override String WebName
46         {
47             get { return "ks_c_5601-1987"; }
48         }
49
50         /*
51         // Get the Windows code page represented by this object.
52         public override int WindowsCodePage
53         {
54             get { return UHC_PAGE; }
55         }
56         */
57     }
58
59     [Serializable]
60     internal class CP51949 : KoreanEncoding
61     {
62         // Magic number used by Windows for the euc-kr code page.
63         private const int EUCKR_CODE_PAGE = 51949;
64
65         // Constructor.
66         public CP51949 () : base (EUCKR_CODE_PAGE, false)
67         {
68         }
69
70         // Get the mail body name for this encoding.
71         public override String BodyName
72         {
73             get { return "euc-kr"; }
74         }
75
76         // Get the human-readable name for this encoding.
77         public override String EncodingName
78         {
79             get { return "Korean (EUC)"; }
80         }
81
82         // Get the mail agent header name for this encoding.
83         public override String HeaderName
84         {
85             get { return "euc-kr"; }
86         }
87
88         // Get the IANA-preferred Web name for this encoding.
89         public override String WebName
90         {
91             get { return "euc-kr"; }
92         }
93
94         /*
95         // Get the Windows code page represented by this object.
96         public override int WindowsCodePage
97         {
98             get { return UHC_PAGE; }
99         }
100         */
101
102     }
103
104     [Serializable]
105     internal class KoreanEncoding : DbcsEncoding
106     {
107         // Constructor.
108         public KoreanEncoding (int codepage, bool useUHC)
109             : base (codepage, 949) {
110             this.useUHC = useUHC;
111         }
112
113         internal override DbcsConvert GetConvert ()
114         {
115                 return DbcsConvert.KS;
116         }
117
118         bool useUHC;
119
120 #if !DISABLE_UNSAFE
121         // Get the bytes that result from encoding a character buffer.
122         public unsafe override int GetByteCountImpl (char* chars, int count)
123         {
124             int index = 0;
125             int length = 0;
126                         int end = count;
127             DbcsConvert convert = GetConvert ();
128
129             // 00 00 - FF FF
130             for (int i = 0; i < end; i++, charCount--) {
131                 char c = chars[i];
132                 if (c <= 0x80 || c == 0xFF) { // ASCII
133                     length++;
134                     continue;
135                 }
136                 byte b1 = convert.u2n[((int)c) * 2];
137                 byte b2 = convert.u2n[((int)c) * 2 + 1];
138                 if (b1 == 0 && b2 == 0) {
139 #if NET_2_0
140                     // FIXME: handle fallback for GetByteCountImpl().
141                     length++;
142 #else
143                     length++;
144 #endif
145                 }
146                 else
147                     length += 2;
148             }
149             return length;
150         }
151
152         // Get the bytes that result from encoding a character buffer.
153         public unsafe override int GetBytesImpl (char* chars, int charCount,
154                          byte* bytes, int byteCount)
155         {
156             int charIndex = 0;
157             int byteIndex = 0;
158                         int end = charCount;
159             DbcsConvert convert = GetConvert ();
160 #if NET_2_0
161             EncoderFallbackBuffer buffer = null;
162 #endif
163
164             // 00 00 - FF FF
165             int origIndex = byteIndex;
166             for (int = charIndex; i < end; i++, charCount--) {
167                 char c = chars[i];
168                 if (c <= 0x80 || c == 0xFF) { // ASCII
169                     bytes[byteIndex++] = (byte)c;
170                     continue;
171                 }
172                 byte b1 = convert.u2n[((int)c) * 2];
173                 byte b2 = convert.u2n[((int)c) * 2 + 1];
174                 if (b1 == 0 && b2 == 0) {
175 #if NET_2_0
176                     HandleFallback (ref buffer, chars, ref i, ref charCount,
177                         bytes, ref byteIndex, ref byteCount, null);
178 #else
179                     bytes[byteIndex++] = (byte)'?';
180 #endif
181                 } else {
182                     bytes[byteIndex++] = b1;
183                     bytes[byteIndex++] = b2;
184                 }
185             }
186             return byteIndex - origIndex;
187         }
188 #else
189                 // Get the bytes that result from encoding a character buffer.
190                 public override int GetByteCount(char[] chars, int index, int count)
191                 {
192                         int length = 0;
193                         DbcsConvert convert = GetConvert();
194
195                         // 00 00 - FF FF
196                         while (count-- > 0)
197                         {
198                                 char c = chars[index++];
199                                 if (c <= 0x80 || c == 0xFF)
200                                 { // ASCII
201                                         length++;
202                                         continue;
203                                 }
204                                 byte b1 = convert.u2n[((int)c) * 2];
205                                 byte b2 = convert.u2n[((int)c) * 2 + 1];
206                                 if (b1 == 0 && b2 == 0)
207                                 {
208 #if NET_2_0
209                                         // FIXME: handle fallback for GetByteCountImpl().
210                                         length++;
211 #else
212                     length++;
213 #endif
214                                 }
215                                 else
216                                         length += 2;
217                         }
218                         return length;
219                 }
220
221                 // Get the bytes that result from encoding a character buffer.
222                 public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
223                 {
224                         int byteCount = bytes.Length;
225                         int end = charIndex + charCount;
226
227                         DbcsConvert convert = GetConvert();
228 #if NET_2_0
229                         EncoderFallbackBuffer buffer = null;
230 #endif
231
232                         // 00 00 - FF FF
233                         int origIndex = byteIndex;
234                         for (int i = charIndex; i < end; i++, charCount--)
235                         {
236                                 char c = chars[i];
237                                 if (c <= 0x80 || c == 0xFF)
238                                 { // ASCII
239                                         bytes[byteIndex++] = (byte)c;
240                                         continue;
241                                 }
242                                 byte b1 = convert.u2n[((int)c) * 2];
243                                 byte b2 = convert.u2n[((int)c) * 2 + 1];
244                                 if (b1 == 0 && b2 == 0)
245                                 {
246 #if NET_2_0
247                                         HandleFallback (ref buffer, chars, ref i, ref charCount,
248                                                 bytes, ref byteIndex, ref byteCount, null);
249 #else
250                     bytes[byteIndex++] = (byte)'?';
251 #endif
252                                 }
253                                 else
254                                 {
255                                         bytes[byteIndex++] = b1;
256                                         bytes[byteIndex++] = b2;
257                                 }
258                         }
259                         return byteIndex - origIndex;
260                 }
261 #endif
262                 // Get the characters that result from decoding a byte buffer.
263         public override int GetCharCount (byte[] bytes, int index, int count)
264         {
265             return GetDecoder ().GetCharCount (bytes, index, count);
266         }
267
268         // Get the characters that result from decoding a byte buffer.
269         public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
270                          char[] chars, int charIndex)
271         {
272             return GetDecoder ().GetChars (bytes, byteIndex, byteCount, chars, charIndex);
273         }
274
275         // Get a decoder that handles a rolling UHC state.
276         public override Decoder GetDecoder()
277         {
278             return new KoreanDecoder (GetConvert (), useUHC);
279         }
280
281         // Decoder that handles a rolling UHC state.
282         private sealed class KoreanDecoder : DbcsDecoder
283         {
284             // Constructor.
285             public KoreanDecoder (DbcsConvert convert, bool useUHC)
286                 : base(convert)
287             {
288                 this.useUHC = useUHC;
289             }
290             bool useUHC;
291             int last_byte_count, last_byte_conv;
292
293             public override int GetCharCount (byte[] bytes, int index, int count)
294             {
295                 return GetCharCount (bytes, index, count, false);
296             }
297
298 #if NET_2_0
299             public override
300 #endif
301             int GetCharCount (byte [] bytes, int index, int count, bool refresh)
302             {
303                 CheckRange (bytes, index, count);
304
305                 int lastByte = last_byte_count;
306                 last_byte_count = 0;
307                 int length = 0;
308                 while (count-- > 0) {
309                     int b = bytes[index++];
310                     if (lastByte == 0) {
311                         if (b <= 0x80 || b == 0xFF) { // ASCII
312                             length++;
313                             continue;
314                         } else {
315                             lastByte = b;
316                             continue;
317                         }
318                     }
319
320                     char c1;
321                     if (useUHC && lastByte < 0xa1) { // UHC Level 1
322                         int ord = 8836 + (lastByte - 0x81) * 178;
323
324                         if (b >= 0x41 && b <= 0x5A)
325                             ord += b - 0x41;
326                         else if (b >= 0x61 && b <= 0x7A)
327                             ord += b - 0x61 + 26;
328                         else if (b >= 0x81 && b <= 0xFE)
329                             ord += b - 0x81 + 52;
330                         else
331                             ord = -1;
332
333                         if (ord >= 0 && ord * 2 <= convert.n2u.Length)
334                             c1 = (char)(convert.n2u[ord*2] +
335                                         convert.n2u[ord*2 + 1] * 256);
336                         else
337                             c1 = (char)0;
338                     } else if (useUHC && lastByte <= 0xC6 && b < 0xA1) { // UHC Level 2
339                         int ord = 14532 + (lastByte - 0xA1) * 84;
340
341                         if (b >= 0x41 && b <= 0x5A)
342                             ord += b - 0x41;
343                         else if (b >= 0x61 && b <= 0x7A)
344                             ord += b - 0x61 + 26;
345                         else if (b >= 0x81 && b <= 0xA0)
346                             ord += b - 0x81 + 52;
347                         else
348                             ord = -1;
349
350                         if (ord >= 0 && ord * 2 <= convert.n2u.Length)
351                             c1 = (char)(convert.n2u[ord*2] +
352                                         convert.n2u[ord*2 + 1] * 256);
353                         else
354                             c1 = (char)0;
355                     } else if (b >= 0xA1 && b <= 0xFE) { // KS X 1001
356                         int ord = ((lastByte - 0xA1) * 94 + b - 0xA1) * 2;
357
358                         c1 = ord < 0 || ord >= convert.n2u.Length ?
359                             '\0' : (char)(convert.n2u[ord] +
360                                     convert.n2u[ord + 1] * 256);
361                     } else
362                         c1 = (char)0;
363
364                     if (c1 == 0)
365                         // FIXME: fallback
366                         length++;
367                     else
368                         length++;
369                     lastByte = 0;
370                 }
371
372                 if (lastByte != 0) {
373                     if (refresh) {
374                         // FIXME: fallback
375                         length++;
376                         last_byte_count = 0;
377                     }
378                     else
379                         last_byte_count = lastByte;
380                 }
381                 return length;
382             }
383
384             public override int GetChars(byte[] bytes, int byteIndex,
385                                 int byteCount, char[] chars, int charIndex)
386             {
387                 return GetChars (bytes, byteIndex, byteCount, chars, charIndex, false);
388             }
389
390 #if NET_2_0
391             public override
392 #endif
393             int GetChars(byte[] bytes, int byteIndex,
394                                 int byteCount, char[] chars, int charIndex, bool refresh)
395             {
396                 CheckRange (bytes, byteIndex, byteCount, chars, charIndex);
397                 int origIndex = charIndex;
398                 int lastByte = last_byte_conv;
399                 last_byte_conv = 0;
400                 while (byteCount-- > 0) {
401                     int b = bytes[byteIndex++];
402                     if (lastByte == 0) {
403                         if (b <= 0x80 || b == 0xFF) { // ASCII
404                             chars[charIndex++] = (char)b;
405                             continue;
406                         } else {
407                             lastByte = b;
408                             continue;
409                         }
410                     }
411
412                     char c1;
413                     if (useUHC && lastByte < 0xa1) { // UHC Level 1
414                         int ord = 8836 + (lastByte - 0x81) * 178;
415
416                         if (b >= 0x41 && b <= 0x5A)
417                             ord += b - 0x41;
418                         else if (b >= 0x61 && b <= 0x7A)
419                             ord += b - 0x61 + 26;
420                         else if (b >= 0x81 && b <= 0xFE)
421                             ord += b - 0x81 + 52;
422                         else
423                             ord = -1;
424
425                         if (ord >= 0 && ord * 2 <= convert.n2u.Length)
426                             c1 = (char)(convert.n2u[ord*2] +
427                                         convert.n2u[ord*2 + 1] * 256);
428                         else
429                             c1 = (char)0;
430                     } else if (useUHC && lastByte <= 0xC6 && b < 0xA1) { // UHC Level 2
431                         int ord = 14532 + (lastByte - 0xA1) * 84;
432
433                         if (b >= 0x41 && b <= 0x5A)
434                             ord += b - 0x41;
435                         else if (b >= 0x61 && b <= 0x7A)
436                             ord += b - 0x61 + 26;
437                         else if (b >= 0x81 && b <= 0xA0)
438                             ord += b - 0x81 + 52;
439                         else
440                             ord = -1;
441
442                         if (ord >= 0 && ord * 2 <= convert.n2u.Length)
443                             c1 = (char)(convert.n2u[ord*2] +
444                                         convert.n2u[ord*2 + 1] * 256);
445                         else
446                             c1 = (char)0;
447                     } else if (b >= 0xA1 && b <= 0xFE) { // KS X 1001
448                         int ord = ((lastByte - 0xA1) * 94 + b - 0xA1) * 2;
449
450                         c1 = ord < 0 || ord >= convert.n2u.Length ?
451                             '\0' : (char)(convert.n2u[ord] +
452                                     convert.n2u[ord + 1] * 256);
453                     } else
454                         c1 = (char)0;
455
456                     if (c1 == 0)
457                         chars[charIndex++] = '?';
458                     else
459                         chars[charIndex++] = c1;
460                     lastByte = 0;
461                 }
462
463                 if (lastByte != 0) {
464                     if (refresh) {
465                         chars[charIndex++] = '?';
466                         last_byte_conv = 0;
467                     }
468                     else
469                         last_byte_conv = lastByte;
470                 }
471                 return charIndex - origIndex;
472             }
473         }
474     }
475
476     [Serializable]
477     internal class ENCuhc : CP949
478     {
479         public ENCuhc() {}
480     }
481
482     [Serializable]
483     internal class ENCeuc_kr: CP51949
484     {
485         public ENCeuc_kr() {}
486     }
487 }
488
489 // ex: ts=8 sts=4 et