Merge pull request #971
[mono.git] / mcs / class / I18N / CJK / CP932.cs
1 /*
2  * CP932.cs - Japanese (Shift-JIS) code page.
3  *
4  * Copyright (c) 2002  Southern Storm Software, Pty Ltd
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining
7  * a copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  */
24
25 //
26 // Copyright (C) 2005-2006 Novell, Inc.
27 //
28
29 namespace I18N.CJK
30 {
31
32         using System;
33         using System.Text;
34         using I18N.Common;
35
36 #if DISABLE_UNSAFE
37         using MonoEncoder = I18N.Common.MonoSafeEncoder;
38         using MonoEncoding = I18N.Common.MonoSafeEncoding;
39 #endif
40
41         [Serializable]
42         public class CP932 : MonoEncoding
43         {
44                 // Magic number used by Windows for the Shift-JIS code page.
45                 private const int SHIFTJIS_CODE_PAGE = 932;
46
47                 // Constructor.
48                 public CP932() : base(SHIFTJIS_CODE_PAGE)
49                 {
50                 }
51
52 #if !DISABLE_UNSAFE
53                 // Get the number of bytes needed to encode a character buffer.
54                 public unsafe override int GetByteCountImpl (char* chars, int count)
55                 {
56                         int index = 0;
57
58                         // Determine the length of the final output.
59                         int length = 0;
60                         int ch, value;
61 #if __PNET__
62                         byte *cjkToJis = JISConvert.Convert.cjkToJis;
63                         byte *extraToJis = JISConvert.Convert.extraToJis;
64 #else
65                         byte[] cjkToJis = JISConvert.Convert.cjkToJis;
66                         byte[] extraToJis = JISConvert.Convert.extraToJis;
67 #endif
68                         while(count > 0)
69                         {
70                                 ch = chars[index++];
71                                 --count;
72                                 ++length;
73                                 if(ch < 0x0080)
74                                 {
75                                         // Character maps to itself.
76                                         continue;
77                                 }
78                                 else if(ch < 0x0100)
79                                 {
80                                         // Check for special Latin 1 characters that
81                                         // can be mapped to double-byte code points.
82                                         if(ch == 0x00A2 || ch == 0x00A3 || ch == 0x00A7 ||
83                                            ch == 0x00A8 || ch == 0x00AC || ch == 0x00B0 ||
84                                            ch == 0x00B1 || ch == 0x00B4 || ch == 0x00B6 ||
85                                            ch == 0x00D7 || ch == 0x00F7)
86                                         {
87                                                 ++length;
88                                         }
89                                 }
90                                 else if(ch >= 0x0391 && ch <= 0x0451)
91                                 {
92                                         // Greek subset characters.
93                                         ++length;
94                                 }
95                                 else if(ch >= 0x2010 && ch <= 0x9FA5)
96                                 {
97                                         // This range contains the bulk of the CJK set.
98                                         value = (ch - 0x2010) * 2;
99                                         value = ((int)(cjkToJis[value])) |
100                                                         (((int)(cjkToJis[value + 1])) << 8);
101                                         if(value >= 0x0100)
102                                         {
103                                                 ++length;
104                                         }
105                                 }
106                                 else if(ch >= 0xE000 && ch <= 0xE757)
107                                         // PrivateUse
108                                         ++length;
109                                 else if(ch >= 0xFF01 && ch <= 0xFFEF)
110                                 {
111                                         // This range contains extra characters,
112                                         // including half-width katakana.
113                                         value = (ch - 0xFF01) * 2;
114                                         value = ((int)(extraToJis[value])) |
115                                                         (((int)(extraToJis[value + 1])) << 8);
116                                         if(value >= 0x0100)
117                                         {
118                                                 ++length;
119                                         }
120                                 }
121                         }
122
123                         // Return the length to the caller.
124                         return length;
125                 }
126
127                 // Get the bytes that result from encoding a character buffer.
128                 public unsafe override int GetBytesImpl (
129                         char* chars, int charCount, byte* bytes, int byteCount)
130                 {
131                         int charIndex = 0;
132                         int byteIndex = 0;
133 #if NET_2_0
134                         EncoderFallbackBuffer buffer = null;
135 #endif
136
137                         // Convert the characters into their byte form.
138                         int posn = byteIndex;
139                         int end = charCount;
140                         int byteLength = byteCount;
141                         int ch, value;
142 #if __PNET__
143                         byte *cjkToJis = JISConvert.Convert.cjkToJis;
144                         byte *greekToJis = JISConvert.Convert.greekToJis;
145                         byte *extraToJis = JISConvert.Convert.extraToJis;
146 #else
147                         byte[] cjkToJis = JISConvert.Convert.cjkToJis;
148                         byte[] greekToJis = JISConvert.Convert.greekToJis;
149                         byte[] extraToJis = JISConvert.Convert.extraToJis;
150 #endif
151                         for (int i = charIndex; i < end; i++, charCount--)
152                         {
153                                 ch = chars[i];
154                                 if(posn >= byteLength)
155                                 {
156                                         throw new ArgumentException
157                                                 (Strings.GetString("Arg_InsufficientSpace"),
158                                                  "bytes");
159                                 }
160                                 if(ch < 0x0080)
161                                 {
162                                         // Character maps to itself.
163                                         bytes[posn++] = (byte)ch;
164                                         continue;
165                                 }
166                                 else if(ch < 0x0100)
167                                 {
168                                         // Check for special Latin 1 characters that
169                                         // can be mapped to double-byte code points.
170                                         if(ch == 0x00A2 || ch == 0x00A3 || ch == 0x00A7 ||
171                                            ch == 0x00A8 || ch == 0x00AC || ch == 0x00B0 ||
172                                            ch == 0x00B1 || ch == 0x00B4 || ch == 0x00B6 ||
173                                            ch == 0x00D7 || ch == 0x00F7)
174                                         {
175                                                 if((posn + 1) >= byteLength)
176                                                 {
177                                                         throw new ArgumentException
178                                                                 (Strings.GetString
179                                                                         ("Arg_InsufficientSpace"), "bytes");
180                                                 }
181                                                 switch(ch)
182                                                 {
183                                                 case 0x00A2:
184                                                         bytes[posn++] = (byte)0x81;
185                                                         bytes[posn++] = (byte)0x91;
186                                                         break;
187
188                                                 case 0x00A3:
189                                                         bytes[posn++] = (byte)0x81;
190                                                         bytes[posn++] = (byte)0x92;
191                                                         break;
192
193                                                 case 0x00A7:
194                                                         bytes[posn++] = (byte)0x81;
195                                                         bytes[posn++] = (byte)0x98;
196                                                         break;
197
198                                                 case 0x00A8:
199                                                         bytes[posn++] = (byte)0x81;
200                                                         bytes[posn++] = (byte)0x4E;
201                                                         break;
202
203                                                 case 0x00AC:
204                                                         bytes[posn++] = (byte)0x81;
205                                                         bytes[posn++] = (byte)0xCA;
206                                                         break;
207
208                                                 case 0x00B0:
209                                                         bytes[posn++] = (byte)0x81;
210                                                         bytes[posn++] = (byte)0x8B;
211                                                         break;
212
213                                                 case 0x00B1:
214                                                         bytes[posn++] = (byte)0x81;
215                                                         bytes[posn++] = (byte)0x7D;
216                                                         break;
217
218                                                 case 0x00B4:
219                                                         bytes[posn++] = (byte)0x81;
220                                                         bytes[posn++] = (byte)0x4C;
221                                                         break;
222
223                                                 case 0x00B6:
224                                                         bytes[posn++] = (byte)0x81;
225                                                         bytes[posn++] = (byte)0xF7;
226                                                         break;
227
228                                                 case 0x00D7:
229                                                         bytes[posn++] = (byte)0x81;
230                                                         bytes[posn++] = (byte)0x7E;
231                                                         break;
232
233                                                 case 0x00F7:
234                                                         bytes[posn++] = (byte)0x81;
235                                                         bytes[posn++] = (byte)0x80;
236                                                         break;
237                                                 }
238                                         }
239                                         else if(ch == 0x00A5)
240                                         {
241                                                 // Yen sign.
242                                                 bytes[posn++] = (byte)0x5C;
243                                         }
244                                         else
245                                         {
246 #if NET_2_0
247                                                 HandleFallback (ref buffer,
248                                                         chars, ref charIndex, ref charCount,
249                                                         bytes, ref posn, ref byteCount, null);
250 #else
251                                                 // Invalid character.
252                                                 bytes[posn++] = (byte)'?';
253 #endif
254                                         }
255                                         continue;
256                                 }
257                                 else if(ch >= 0x0391 && ch <= 0x0451)
258                                 {
259                                         // Greek subset characters.
260                                         value = (ch - 0x0391) * 2;
261                                         value = ((int)(greekToJis[value])) |
262                                                         (((int)(greekToJis[value + 1])) << 8);
263                                 }
264                                 else if(ch >= 0x2010 && ch <= 0x9FA5)
265                                 {
266                                         // This range contains the bulk of the CJK set.
267                                         value = (ch - 0x2010) * 2;
268                                         value = ((int)(cjkToJis[value])) |
269                                                         (((int)(cjkToJis[value + 1])) << 8);
270                                 }
271                                 else if(ch >= 0xE000 && ch <= 0xE757)
272                                 {
273                                         // PrivateUse
274                                         int diff = ch - 0xE000;
275                                         value = ((int) (diff / 0xBC) << 8)
276                                                 + (diff % 0xBC)
277                                                 + 0xF040;
278                                         if (value % 0x100 >= 0x7F)
279                                                 value++;
280                                 }
281                                 else if(ch >= 0xFF01 && ch <= 0xFF60)
282                                 {
283                                         value = (ch - 0xFF01) * 2;
284                                         value = ((int)(extraToJis[value])) |
285                                                         (((int)(extraToJis[value + 1])) << 8);
286                                 }
287                                 else if(ch >= 0xFF60 && ch <= 0xFFA0)
288                                 {
289                                         value = ch - 0xFF60 + 0xA0;
290                                 }
291                                 else
292                                 {
293                                         // Invalid character.
294                                         value = 0;
295                                 }
296                                 if(value == 0)
297                                 {
298 #if NET_2_0
299                                         HandleFallback (ref buffer,
300                                                 chars, ref charIndex, ref charCount,
301                                                 bytes, ref posn, ref byteCount, null);
302 #else
303                                         bytes[posn++] = (byte)'?';
304 #endif
305                                 }
306                                 else if(value < 0x0100)
307                                 {
308                                         bytes[posn++] = (byte)value;
309                                 }
310                                 else if((posn + 1) >= byteLength)
311                                 {
312                                         throw new ArgumentException
313                                                 (Strings.GetString("Arg_InsufficientSpace"),
314                                                  "bytes");
315                                 }
316                                 else if(value < 0x8000)
317                                 {
318                                         // JIS X 0208 character.
319                                         value -= 0x0100;
320                                         ch = (value / 0xBC);
321                                         value = (value % 0xBC) + 0x40;
322                                         if(value >= 0x7F)
323                                         {
324                                                 ++value;
325                                         }
326                                         if(ch < (0x9F - 0x80))
327                                         {
328                                                 bytes[posn++] = (byte)(ch + 0x81);
329                                         }
330                                         else
331                                         {
332                                                 bytes[posn++] = (byte)(ch - (0x9F - 0x80) + 0xE0);
333                                         }
334                                         bytes[posn++] = (byte)value;
335                                 }
336                                 else if (value >= 0xF040 && value <= 0xF9FC)
337                                 {
338                                         // PrivateUse
339                                         bytes[posn++] = (byte) (value / 0x100);
340                                         bytes[posn++] = (byte) (value % 0x100);
341                                 }
342                                 else
343                                 {
344                                         // JIS X 0212 character, which Shift-JIS doesn't
345                                         // support, but we've already allocated two slots.
346                                         bytes[posn++] = (byte)'?';
347                                         bytes[posn++] = (byte)'?';
348                                 }
349                         }
350
351                         // Return the final length to the caller.
352                         return posn - byteIndex;
353                 }
354 #else
355                 // Get the number of bytes needed to encode a character buffer.
356                 public override int GetByteCount(char[] chars, int index, int count)
357                 {
358                         // Determine the length of the final output.
359                         int length = 0;
360                         int ch, value;
361                         byte[] cjkToJis = JISConvert.Convert.cjkToJis;
362                         byte[] extraToJis = JISConvert.Convert.extraToJis;
363
364                         while (count > 0)
365                         {
366                                 ch = chars[index++];
367                                 --count;
368                                 ++length;
369                                 if (ch < 0x0080)
370                                 {
371                                         // Character maps to itself.
372                                         continue;
373                                 }
374                                 else if (ch < 0x0100)
375                                 {
376                                         // Check for special Latin 1 characters that
377                                         // can be mapped to double-byte code points.
378                                         if (ch == 0x00A2 || ch == 0x00A3 || ch == 0x00A7 ||
379                                            ch == 0x00A8 || ch == 0x00AC || ch == 0x00B0 ||
380                                            ch == 0x00B1 || ch == 0x00B4 || ch == 0x00B6 ||
381                                            ch == 0x00D7 || ch == 0x00F7)
382                                         {
383                                                 ++length;
384                                         }
385                                 }
386                                 else if (ch >= 0x0391 && ch <= 0x0451)
387                                 {
388                                         // Greek subset characters.
389                                         ++length;
390                                 }
391                                 else if (ch >= 0x2010 && ch <= 0x9FA5)
392                                 {
393                                         // This range contains the bulk of the CJK set.
394                                         value = (ch - 0x2010) * 2;
395                                         value = ((int)(cjkToJis[value])) |
396                                                         (((int)(cjkToJis[value + 1])) << 8);
397                                         if (value >= 0x0100)
398                                         {
399                                                 ++length;
400                                         }
401                                 }
402                                 else if (ch >= 0xE000 && ch <= 0xE757)
403                                         // PrivateUse
404                                         ++length;
405                                 else if (ch >= 0xFF01 && ch <= 0xFFEF)
406                                 {
407                                         // This range contains extra characters,
408                                         // including half-width katakana.
409                                         value = (ch - 0xFF01) * 2;
410                                         value = ((int)(extraToJis[value])) |
411                                                         (((int)(extraToJis[value + 1])) << 8);
412                                         if (value >= 0x0100)
413                                         {
414                                                 ++length;
415                                         }
416                                 }
417                         }
418
419                         // Return the length to the caller.
420                         return length;
421                 }
422
423                 // Get the bytes that result from encoding a character buffer.
424                 public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
425                 {
426                         int byteCount = bytes.Length;
427 #if NET_2_0
428                         EncoderFallbackBuffer buffer = null;
429 #endif
430
431                         // Convert the characters into their byte form.
432                         int posn = byteIndex;
433                         int end = charIndex + charCount;
434                         int byteLength = byteCount;
435                         int /*ch,*/ value;
436                         byte[] cjkToJis = JISConvert.Convert.cjkToJis;
437                         byte[] greekToJis = JISConvert.Convert.greekToJis;
438                         byte[] extraToJis = JISConvert.Convert.extraToJis;
439
440                         for (int i = charIndex; i < end; i++, charCount--)
441                         {
442                                 int ch = chars[i];
443
444                                 if (posn >= byteLength)
445                                 {
446                                         throw new ArgumentException
447                                                 (Strings.GetString("Arg_InsufficientSpace"),
448                                                  "bytes");
449                                 }
450                                 if (ch < 0x0080)
451                                 {
452                                         // Character maps to itself.
453                                         bytes[posn++] = (byte)ch;
454                                         continue;
455                                 }
456                                 else if (ch < 0x0100)
457                                 {
458                                         // Check for special Latin 1 characters that
459                                         // can be mapped to double-byte code points.
460                                         if (ch == 0x00A2 || ch == 0x00A3 || ch == 0x00A7 ||
461                                            ch == 0x00A8 || ch == 0x00AC || ch == 0x00B0 ||
462                                            ch == 0x00B1 || ch == 0x00B4 || ch == 0x00B6 ||
463                                            ch == 0x00D7 || ch == 0x00F7)
464                                         {
465                                                 if ((posn + 1) >= byteLength)
466                                                 {
467                                                         throw new ArgumentException
468                                                                 (Strings.GetString
469                                                                         ("Arg_InsufficientSpace"), "bytes");
470                                                 }
471                                                 switch (ch)
472                                                 {
473                                                         case 0x00A2:
474                                                                 bytes[posn++] = (byte)0x81;
475                                                                 bytes[posn++] = (byte)0x91;
476                                                                 break;
477
478                                                         case 0x00A3:
479                                                                 bytes[posn++] = (byte)0x81;
480                                                                 bytes[posn++] = (byte)0x92;
481                                                                 break;
482
483                                                         case 0x00A7:
484                                                                 bytes[posn++] = (byte)0x81;
485                                                                 bytes[posn++] = (byte)0x98;
486                                                                 break;
487
488                                                         case 0x00A8:
489                                                                 bytes[posn++] = (byte)0x81;
490                                                                 bytes[posn++] = (byte)0x4E;
491                                                                 break;
492
493                                                         case 0x00AC:
494                                                                 bytes[posn++] = (byte)0x81;
495                                                                 bytes[posn++] = (byte)0xCA;
496                                                                 break;
497
498                                                         case 0x00B0:
499                                                                 bytes[posn++] = (byte)0x81;
500                                                                 bytes[posn++] = (byte)0x8B;
501                                                                 break;
502
503                                                         case 0x00B1:
504                                                                 bytes[posn++] = (byte)0x81;
505                                                                 bytes[posn++] = (byte)0x7D;
506                                                                 break;
507
508                                                         case 0x00B4:
509                                                                 bytes[posn++] = (byte)0x81;
510                                                                 bytes[posn++] = (byte)0x4C;
511                                                                 break;
512
513                                                         case 0x00B6:
514                                                                 bytes[posn++] = (byte)0x81;
515                                                                 bytes[posn++] = (byte)0xF7;
516                                                                 break;
517
518                                                         case 0x00D7:
519                                                                 bytes[posn++] = (byte)0x81;
520                                                                 bytes[posn++] = (byte)0x7E;
521                                                                 break;
522
523                                                         case 0x00F7:
524                                                                 bytes[posn++] = (byte)0x81;
525                                                                 bytes[posn++] = (byte)0x80;
526                                                                 break;
527                                                 }
528                                         }
529                                         else if (ch == 0x00A5)
530                                         {
531                                                 // Yen sign.
532                                                 bytes[posn++] = (byte)0x5C;
533                                         }
534                                         else
535                                         {
536 #if NET_2_0
537                                                 HandleFallback (ref buffer, chars, ref i, ref charCount, bytes, 
538                                                         ref byteIndex, ref byteCount, null);
539 #else
540                                                 // Invalid character.
541                                                 bytes[posn++] = (byte)'?';
542 #endif
543                                         }
544                                         continue;
545                                 }
546                                 else if (ch >= 0x0391 && ch <= 0x0451)
547                                 {
548                                         // Greek subset characters.
549                                         value = (ch - 0x0391) * 2;
550                                         value = ((int)(greekToJis[value])) |
551                                                         (((int)(greekToJis[value + 1])) << 8);
552                                 }
553                                 else if (ch >= 0x2010 && ch <= 0x9FA5)
554                                 {
555                                         // This range contains the bulk of the CJK set.
556                                         value = (ch - 0x2010) * 2;
557                                         value = ((int)(cjkToJis[value])) |
558                                                         (((int)(cjkToJis[value + 1])) << 8);
559                                 }
560                                 else if (ch >= 0xE000 && ch <= 0xE757)
561                                 {
562                                         // PrivateUse
563                                         int diff = ch - 0xE000;
564                                         value = ((int)(diff / 0xBC) << 8)
565                                                 + (diff % 0xBC)
566                                                 + 0xF040;
567                                         if (value % 0x100 >= 0x7F)
568                                                 value++;
569                                 }
570                                 else if (ch >= 0xFF01 && ch <= 0xFF60)
571                                 {
572                                         value = (ch - 0xFF01) * 2;
573                                         value = ((int)(extraToJis[value])) |
574                                                         (((int)(extraToJis[value + 1])) << 8);
575                                 }
576                                 else if (ch >= 0xFF60 && ch <= 0xFFA0)
577                                 {
578                                         value = ch - 0xFF60 + 0xA0;
579                                 }
580                                 else
581                                 {
582                                         // Invalid character.
583                                         value = 0;
584                                 }
585                                 if (value == 0)
586                                 {
587 #if NET_2_0
588                                         HandleFallback (ref buffer, chars, ref charIndex, ref charCount,
589                                                 bytes, ref posn, ref byteCount, null);
590 #else
591                                         bytes[posn++] = (byte)'?';
592 #endif
593                                 }
594                                 else if (value < 0x0100)
595                                 {
596                                         bytes[posn++] = (byte)value;
597                                 }
598                                 else if ((posn + 1) >= byteLength)
599                                 {
600                                         throw new ArgumentException
601                                                 (Strings.GetString("Arg_InsufficientSpace"),
602                                                  "bytes");
603                                 }
604                                 else if (value < 0x8000)
605                                 {
606                                         // JIS X 0208 character.
607                                         value -= 0x0100;
608                                         ch = (value / 0xBC);
609                                         value = (value % 0xBC) + 0x40;
610                                         if (value >= 0x7F)
611                                         {
612                                                 ++value;
613                                         }
614                                         if (ch < (0x9F - 0x80))
615                                         {
616                                                 bytes[posn++] = (byte)(ch + 0x81);
617                                         }
618                                         else
619                                         {
620                                                 bytes[posn++] = (byte)(ch - (0x9F - 0x80) + 0xE0);
621                                         }
622                                         bytes[posn++] = (byte)value;
623                                 }
624                                 else if (value >= 0xF040 && value <= 0xF9FC)
625                                 {
626                                         // PrivateUse
627                                         bytes[posn++] = (byte)(value / 0x100);
628                                         bytes[posn++] = (byte)(value % 0x100);
629                                 }
630                                 else
631                                 {
632                                         // JIS X 0212 character, which Shift-JIS doesn't
633                                         // support, but we've already allocated two slots.
634                                         bytes[posn++] = (byte)'?';
635                                         bytes[posn++] = (byte)'?';
636                                 }
637                         }
638
639                         // Return the final length to the caller.
640                         return posn - byteIndex;
641                 }
642 #endif
643
644                 public override int GetCharCount (byte [] bytes, int index, int count)
645                 {
646                         return new CP932Decoder (JISConvert.Convert).GetCharCount (
647                                 bytes, index, count, true);
648                 }
649
650                 public override int GetChars (
651                         byte [] bytes, int byteIndex, int byteCount,
652                         char [] chars, int charIndex)
653                 {
654                         return new CP932Decoder (JISConvert.Convert).GetChars (bytes,
655                                 byteIndex, byteCount, chars, charIndex,
656                                 true);
657                 }
658
659                 // Get the maximum number of bytes needed to encode a
660                 // specified number of characters.
661                 public override int GetMaxByteCount(int charCount)
662                 {
663                         if(charCount < 0)
664                         {
665                                 throw new ArgumentOutOfRangeException
666                                         ("charCount",
667                                          Strings.GetString("ArgRange_NonNegative"));
668                         }
669                         return charCount * 2;
670                 }
671
672                 // Get the maximum number of characters needed to decode a
673                 // specified number of bytes.
674                 public override int GetMaxCharCount(int byteCount)
675                 {
676                         if(byteCount < 0)
677                         {
678                                 throw new ArgumentOutOfRangeException
679                                         ("byteCount",
680                                          Strings.GetString("ArgRange_NonNegative"));
681                         }
682                         return byteCount;
683                 }
684
685                 // Get a decoder that handles a rolling Shift-JIS state.
686                 public override Decoder GetDecoder()
687                 {
688                         return new CP932Decoder(JISConvert.Convert);
689                 }
690
691 #if !ECMA_COMPAT
692
693                 // Get the mail body name for this encoding.
694                 public override String BodyName {
695                         get { return "iso-2022-jp"; }
696                 }
697
698                 // Get the human-readable name for this encoding.
699                 public override String EncodingName {
700                         get { return "Japanese (Shift-JIS)"; }
701                 }
702
703                 // Get the mail agent header name for this encoding.
704                 public override String HeaderName {
705                         get { return "iso-2022-jp"; }
706                 }
707
708                 // Determine if this encoding can be displayed in a Web browser.
709                 public override bool IsBrowserDisplay {
710                         get { return true; }
711                 }
712
713                 // Determine if this encoding can be saved from a Web browser.
714                 public override bool IsBrowserSave {
715                         get { return true; }
716                 }
717
718                 // Determine if this encoding can be displayed in a mail/news agent.
719                 public override bool IsMailNewsDisplay {
720                         get { return true; }
721                 }
722
723                 // Determine if this encoding can be saved from a mail/news agent.
724                 public override bool IsMailNewsSave {
725                         get { return true; }
726                 }
727
728                 // Get the IANA-preferred Web name for this encoding.
729                 public override String WebName {
730                         get { return "shift_jis"; }
731                 }
732
733                 // Get the Windows code page represented by this object.
734                 public override int WindowsCodePage {
735                         get { return SHIFTJIS_CODE_PAGE; }
736                 }
737
738         }; // class CP932
739
740 #endif // !ECMA_COMPAT
741
742         // Decoder that handles a rolling Shift-JIS state.
743         sealed class CP932Decoder : DbcsEncoding.DbcsDecoder
744         {
745                 private new JISConvert convert;
746                 private int last_byte_count;
747                 private int last_byte_chars;
748
749                 // Constructor.
750                 public CP932Decoder(JISConvert convert)
751                         : base (null)
752                 {
753                         this.convert = convert;
754                 }
755
756                 // Override inherited methods.
757
758                 public override int GetCharCount (
759                         byte [] bytes, int index, int count)
760                 {
761                         return GetCharCount (bytes, index, count, false);
762                 }
763
764                 public
765 #if NET_2_0
766                 override
767 #endif
768                 int GetCharCount (byte [] bytes, int index, int count, bool refresh)
769                 {
770                         CheckRange (bytes, index, count);
771
772                         // Determine the total length of the converted string.
773                         int length = 0;
774                         int byteval;
775                         int last = last_byte_count;
776                         while(count > 0)
777                         {
778                                 byteval = bytes[index++];
779                                 --count;
780                                 if(last == 0)
781                                 {
782                                         if((byteval >= 0x81 && byteval <= 0x9F) ||
783                                            (byteval >= 0xE0 && byteval <= 0xEF))
784                                         {
785                                                 // First byte in a double-byte sequence.
786                                                 last = byteval;
787                                         }
788                                         ++length;
789                                 }
790                                 else
791                                 {
792                                         // Second byte in a double-byte sequence.
793                                         last = 0;
794                                 }
795                         }
796                         if (refresh) {
797                                 if (last != 0)
798                                         length++;
799                                 last_byte_count = '\0';
800                         }
801                         else
802                                 last_byte_count = last;
803
804                         // Return the total length.
805                         return length;
806                 }
807
808                 public override int GetChars (
809                         byte [] bytes, int byteIndex, int byteCount,
810                         char [] chars, int charIndex)
811                 {
812                         return GetChars (bytes, byteIndex, byteCount,
813                                          chars, charIndex, false);
814                 }
815
816                 public
817 #if NET_2_0
818                 override
819 #endif
820                 int GetChars (
821                         byte [] bytes, int byteIndex, int byteCount,
822                         char [] chars, int charIndex, bool refresh)
823                 {
824                         CheckRange (bytes, byteIndex, byteCount,
825                                 chars, charIndex);
826
827                         // Decode the bytes in the buffer.
828                         int posn = charIndex;
829                         int charLength = chars.Length;
830                         int byteval, value;
831                         int last = last_byte_chars;
832 #if __PNET__
833                         byte *table = convert.jisx0208ToUnicode;
834 #else
835                         byte[] table = convert.jisx0208ToUnicode;
836 #endif
837                         while(byteCount > 0)
838                         {
839                                 byteval = bytes[byteIndex++];
840                                 --byteCount;
841                                 if(last == 0)
842                                 {
843                                         if(posn >= charLength)
844                                         {
845                                                 throw new ArgumentException
846                                                         (Strings.GetString
847                                                                 ("Arg_InsufficientSpace"), "chars");
848                                         }
849                                         if((byteval >= 0x81 && byteval <= 0x9F) ||
850                                            (byteval >= 0xE0 && byteval <= 0xEF))
851                                         {
852                                                 // First byte in a double-byte sequence.
853                                                 last = byteval;
854                                         }
855                                         else if(byteval < 0x80)
856                                         {
857                                                 // Ordinary ASCII/Latin1 character.
858                                                 chars[posn++] = (char)byteval;
859                                         }
860                                         else if(byteval >= 0xA1 && byteval <= 0xDF)
861                                         {
862                                                 // Half-width katakana character.
863                                                 chars[posn++] = (char)(byteval - 0xA1 + 0xFF61);
864                                         }
865                                         else
866                                         {
867                                                 // Invalid first byte.
868                                                 chars[posn++] = '?';
869                                         }
870                                 }
871                                 else
872                                 {
873                                         // Second byte in a double-byte sequence.
874                                         if(last >= 0x81 && last <= 0x9F)
875                                         {
876                                                 value = (last - 0x81) * 0xBC;
877                                         }
878                                         else if (last >= 0xF0 && last <= 0xFC && byteval <= 0xFC)
879                                         {
880                                                 // PrivateUse
881                                                 value = 0xE000 + (last - 0xF0) * 0xBC + byteval;
882                                                 if (byteval > 0x7F)
883                                                         value--;
884                                         }
885                                         else
886                                         {
887                                                 value = (last - 0xE0 + (0xA0 - 0x81)) * 0xBC;
888                                         }
889                                         last = 0;
890                                         if(byteval >= 0x40 && byteval <= 0x7E)
891                                         {
892                                                 value += (byteval - 0x40);
893                                         }
894                                         else if(byteval >= 0x80 && byteval <= 0xFC)
895                                         {
896                                                 value += (byteval - 0x80 + 0x3F);
897                                         }
898                                         else
899                                         {
900                                                 // Invalid second byte.
901                                                 chars[posn++] = '?';
902                                                 continue;
903                                         }
904                                         value *= 2;
905                                         value = ((int)(table[value])) |
906                                                         (((int)(table[value + 1])) << 8);
907                                         if(value != 0)
908                                         {
909                                                 chars[posn++] = (char)value;
910                                         }
911                                         else
912                                         {
913                                                 chars[posn++] = '?';
914                                         }
915                                 }
916                         }
917                         if (refresh) {
918                                 if (last != 0)
919                                         chars[posn++] = '\u30FB';
920                                 last_byte_chars = '\0';
921                         }
922                         else
923                                 last_byte_chars = last;
924
925                         // Return the final length to the caller.
926                         return posn - charIndex;
927                 }
928
929         } // class CP932Decoder
930
931         [Serializable]
932         public class ENCshift_jis : CP932
933         {
934                 public ENCshift_jis() : base() {}
935
936         }; // class ENCshift_jis
937
938 }; // namespace I18N.CJK