2 * CP932.cs - Japanese (Shift-JIS) code page.
4 * Copyright (c) 2002 Southern Storm Software, Pty Ltd
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
32 public unsafe class CP932 : Encoding
34 // Magic number used by Windows for the Shift-JIS code page.
35 private const int SHIFTJIS_CODE_PAGE = 932;
38 private JISConvert convert;
41 public CP932() : base(SHIFTJIS_CODE_PAGE)
43 // Load the JIS conversion tables.
44 convert = JISConvert.Convert;
47 // Get the number of bytes needed to encode a character buffer.
48 public override int GetByteCount(char[] chars, int index, int count)
50 // Validate the parameters.
53 throw new ArgumentNullException("chars");
55 if(index < 0 || index > chars.Length)
57 throw new ArgumentOutOfRangeException
58 ("index", Strings.GetString("ArgRange_Array"));
60 if(count < 0 || count > (chars.Length - index))
62 throw new ArgumentOutOfRangeException
63 ("count", Strings.GetString("ArgRange_Array"));
66 // Determine the length of the final output.
70 byte *cjkToJis = convert.cjkToJis;
71 byte *extraToJis = convert.extraToJis;
73 byte[] cjkToJis = convert.cjkToJis;
74 byte[] extraToJis = convert.extraToJis;
83 // Character maps to itself.
88 // Check for special Latin 1 characters that
89 // can be mapped to double-byte code points.
90 if(ch == 0x00A2 || ch == 0x00A3 || ch == 0x00A7 ||
91 ch == 0x00A8 || ch == 0x00AC || ch == 0x00B0 ||
92 ch == 0x00B1 || ch == 0x00B4 || ch == 0x00B6 ||
93 ch == 0x00D7 || ch == 0x00F7)
98 else if(ch >= 0x0391 && ch <= 0x0451)
100 // Greek subset characters.
103 else if(ch >= 0x2010 && ch <= 0x9FA5)
105 // This range contains the bulk of the CJK set.
106 value = (ch - 0x2010) * 2;
107 value = ((int)(cjkToJis[value])) |
108 (((int)(cjkToJis[value + 1])) << 8);
114 else if(ch >= 0xFF01 && ch <= 0xFFEF)
116 // This range contains extra characters,
117 // including half-width katakana.
118 value = (ch - 0xFF01) * 2;
119 value = ((int)(extraToJis[value])) |
120 (((int)(extraToJis[value + 1])) << 8);
128 // Return the length to the caller.
132 // Get the bytes that result from encoding a character buffer.
133 public override int GetBytes(char[] chars, int charIndex, int charCount,
134 byte[] bytes, int byteIndex)
136 // Validate the parameters.
139 throw new ArgumentNullException("chars");
143 throw new ArgumentNullException("bytes");
145 if(charIndex < 0 || charIndex > chars.Length)
147 throw new ArgumentOutOfRangeException
148 ("charIndex", Strings.GetString("ArgRange_Array"));
150 if(charCount < 0 || charCount > (chars.Length - charIndex))
152 throw new ArgumentOutOfRangeException
153 ("charCount", Strings.GetString("ArgRange_Array"));
155 if(byteIndex < 0 || byteIndex > bytes.Length)
157 throw new ArgumentOutOfRangeException
158 ("byteIndex", Strings.GetString("ArgRange_Array"));
161 // Convert the characters into their byte form.
162 int posn = byteIndex;
163 int byteLength = bytes.Length;
166 byte *cjkToJis = convert.cjkToJis;
167 byte *greekToJis = convert.greekToJis;
168 byte *extraToJis = convert.extraToJis;
170 byte[] cjkToJis = convert.cjkToJis;
171 byte[] greekToJis = convert.greekToJis;
172 byte[] extraToJis = convert.extraToJis;
176 ch = chars[charIndex++];
178 if(posn >= byteLength)
180 throw new ArgumentException
181 (Strings.GetString("Arg_InsufficientSpace"),
186 // Character maps to itself.
187 bytes[posn++] = (byte)ch;
192 // Check for special Latin 1 characters that
193 // can be mapped to double-byte code points.
194 if(ch == 0x00A2 || ch == 0x00A3 || ch == 0x00A7 ||
195 ch == 0x00A8 || ch == 0x00AC || ch == 0x00B0 ||
196 ch == 0x00B1 || ch == 0x00B4 || ch == 0x00B6 ||
197 ch == 0x00D7 || ch == 0x00F7)
199 if((posn + 1) >= byteLength)
201 throw new ArgumentException
203 ("Arg_InsufficientSpace"), "bytes");
208 bytes[posn++] = (byte)0x81;
209 bytes[posn++] = (byte)0x91;
213 bytes[posn++] = (byte)0x81;
214 bytes[posn++] = (byte)0x92;
218 bytes[posn++] = (byte)0x81;
219 bytes[posn++] = (byte)0x98;
223 bytes[posn++] = (byte)0x81;
224 bytes[posn++] = (byte)0x4E;
228 bytes[posn++] = (byte)0x81;
229 bytes[posn++] = (byte)0xCA;
233 bytes[posn++] = (byte)0x81;
234 bytes[posn++] = (byte)0x8B;
238 bytes[posn++] = (byte)0x81;
239 bytes[posn++] = (byte)0x7D;
243 bytes[posn++] = (byte)0x81;
244 bytes[posn++] = (byte)0x4C;
248 bytes[posn++] = (byte)0x81;
249 bytes[posn++] = (byte)0xF7;
253 bytes[posn++] = (byte)0x81;
254 bytes[posn++] = (byte)0x7E;
258 bytes[posn++] = (byte)0x81;
259 bytes[posn++] = (byte)0x80;
263 else if(ch == 0x00A5)
266 bytes[posn++] = (byte)0x5C;
270 // Invalid character.
271 bytes[posn++] = (byte)'?';
275 else if(ch >= 0x0391 && ch <= 0x0451)
277 // Greek subset characters.
278 value = (ch - 0x0391) * 2;
279 value = ((int)(greekToJis[value])) |
280 (((int)(greekToJis[value + 1])) << 8);
282 else if(ch >= 0x2010 && ch <= 0x9FA5)
284 // This range contains the bulk of the CJK set.
285 value = (ch - 0x2010) * 2;
286 value = ((int)(cjkToJis[value])) |
287 (((int)(cjkToJis[value + 1])) << 8);
289 else if(ch >= 0xE000 && ch <= 0xE757)
292 int diff = ch - 0xE000;
293 value = ((int) (diff / 0xBC) << 8)
296 if (value % 0x100 >= 0x7F)
299 else if(ch >= 0xFF01 && ch <= 0xFF60)
301 value = ch - 0xFF00 + 0x20;
303 else if(ch >= 0xFF60 && ch <= 0xFFA0)
305 value = ch - 0xFF60 + 0xA0;
309 // Invalid character.
314 bytes[posn++] = (byte)'?';
316 else if(value < 0x0100)
318 bytes[posn++] = (byte)value;
320 else if((posn + 1) >= byteLength)
322 throw new ArgumentException
323 (Strings.GetString("Arg_InsufficientSpace"),
326 else if(value < 0x8000)
328 // JIS X 0208 character.
331 value = (value % 0xBC) + 0x40;
336 if(ch < (0x9F - 0x80))
338 bytes[posn++] = (byte)(ch + 0x81);
342 bytes[posn++] = (byte)(ch - (0x9F - 0x80) + 0xE0);
344 bytes[posn++] = (byte)value;
346 else if (value >= 0xF040 && value <= 0xF9FC)
349 bytes[posn++] = (byte) (value / 0x100);
350 bytes[posn++] = (byte) (value % 0x100);
354 // JIS X 0212 character, which Shift-JIS doesn't
355 // support, but we've already allocated two slots.
356 bytes[posn++] = (byte)'?';
357 bytes[posn++] = (byte)'?';
361 // Return the final length to the caller.
362 return posn - byteIndex;
365 // Get the number of characters needed to decode a byte buffer.
366 public override int GetCharCount(byte[] bytes, int index, int count)
368 // Validate the parameters.
371 throw new ArgumentNullException("bytes");
373 if(index < 0 || index > bytes.Length)
375 throw new ArgumentOutOfRangeException
376 ("index", Strings.GetString("ArgRange_Array"));
378 if(count < 0 || count > (bytes.Length - index))
380 throw new ArgumentOutOfRangeException
381 ("count", Strings.GetString("ArgRange_Array"));
384 // Determine the total length of the converted string.
389 byteval = bytes[index++];
394 // Ordinary ASCII/Latin1 character, or the
395 // single-byte Yen or overline signs.
398 else if(byteval >= 0xA1 && byteval <= 0xDF)
400 // Half-width katakana.
403 else if(byteval < 0x81 ||
404 (byteval > 0x9F && byteval < 0xE0) ||
407 // Invalid first byte.
412 // Missing second byte.
419 // Return the total length.
423 // Get the characters that result from decoding a byte buffer.
424 public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
425 char[] chars, int charIndex)
427 // Validate the parameters.
430 throw new ArgumentNullException("bytes");
434 throw new ArgumentNullException("chars");
436 if(byteIndex < 0 || byteIndex > bytes.Length)
438 throw new ArgumentOutOfRangeException
439 ("byteIndex", Strings.GetString("ArgRange_Array"));
441 if(byteCount < 0 || byteCount > (bytes.Length - byteIndex))
443 throw new ArgumentOutOfRangeException
444 ("byteCount", Strings.GetString("ArgRange_Array"));
446 if(charIndex < 0 || charIndex > chars.Length)
448 throw new ArgumentOutOfRangeException
449 ("charIndex", Strings.GetString("ArgRange_Array"));
452 // Determine the total length of the converted string.
453 int charLength = chars.Length;
454 int posn = charIndex;
458 byte *table = convert.jisx0208ToUnicode;
460 byte[] table = convert.jisx0208ToUnicode;
464 byteval = bytes[byteIndex++];
467 if(posn >= charLength)
469 throw new ArgumentException
470 (Strings.GetString("Arg_InsufficientSpace"),
476 chars[posn++] = '\u00A5';
479 else if(byteval == 0x7E)
482 chars[posn++] = '\u203E';
485 else if(byteval < 0x80)
487 // Ordinary ASCII/Latin1 character.
488 chars[posn++] = (char)byteval;
491 else if(byteval >= 0xA1 && byteval <= 0xDF)
493 // Half-width katakana.
494 chars[posn++] = (char)(byteval - 0xA1 + 0xFF61);
497 else if(byteval >= 0x81 && byteval <= 0x9F)
499 value = (byteval - 0x81) * 0xBC;
501 else if(byteval >= 0xE0 && byteval <= 0xEF)
503 value = (byteval - 0xE0 + (0xA0 - 0x81)) * 0xBC;
507 // Invalid first byte.
513 // Missing second byte.
517 byteval = bytes[byteIndex++];
519 if(byteval >= 0x40 && byteval <= 0x7E)
521 value += (byteval - 0x40);
523 else if(byteval >= 0x80 && byteval <= 0xFC)
525 value += (byteval - 0x80 + 0x3F);
529 // Invalid second byte.
534 value = ((int)(table[value])) |
535 (((int)(table[value + 1])) << 8);
538 chars[posn++] = (char)value;
546 // Return the total length.
547 return posn - charIndex;
550 // Get the maximum number of bytes needed to encode a
551 // specified number of characters.
552 public override int GetMaxByteCount(int charCount)
556 throw new ArgumentOutOfRangeException
558 Strings.GetString("ArgRange_NonNegative"));
560 return charCount * 2;
563 // Get the maximum number of characters needed to decode a
564 // specified number of bytes.
565 public override int GetMaxCharCount(int byteCount)
569 throw new ArgumentOutOfRangeException
571 Strings.GetString("ArgRange_NonNegative"));
576 // Get a decoder that handles a rolling Shift-JIS state.
577 public override Decoder GetDecoder()
579 return new CP932Decoder(convert);
584 // Get the mail body name for this encoding.
585 public override String BodyName
589 return "iso-2022-jp";
593 // Get the human-readable name for this encoding.
594 public override String EncodingName
598 return "Japanese (Shift-JIS)";
602 // Get the mail agent header name for this encoding.
603 public override String HeaderName
607 return "iso-2022-jp";
611 // Determine if this encoding can be displayed in a Web browser.
612 public override bool IsBrowserDisplay
620 // Determine if this encoding can be saved from a Web browser.
621 public override bool IsBrowserSave
629 // Determine if this encoding can be displayed in a mail/news agent.
630 public override bool IsMailNewsDisplay
638 // Determine if this encoding can be saved from a mail/news agent.
639 public override bool IsMailNewsSave
647 // Get the IANA-preferred Web name for this encoding.
648 public override String WebName
656 // Get the Windows code page represented by this object.
657 public override int WindowsCodePage
661 return SHIFTJIS_CODE_PAGE;
665 #endif // !ECMA_COMPAT
667 // Decoder that handles a rolling Shift-JIS state.
668 private sealed class CP932Decoder : Decoder
670 private JISConvert convert;
671 private int lastByte;
674 public CP932Decoder(JISConvert convert)
676 this.convert = convert;
680 // Override inherited methods.
681 public override int GetCharCount(byte[] bytes, int index, int count)
683 // Validate the parameters.
686 throw new ArgumentNullException("bytes");
688 if(index < 0 || index > bytes.Length)
690 throw new ArgumentOutOfRangeException
691 ("index", Strings.GetString("ArgRange_Array"));
693 if(count < 0 || count > (bytes.Length - index))
695 throw new ArgumentOutOfRangeException
696 ("count", Strings.GetString("ArgRange_Array"));
699 // Determine the total length of the converted string.
705 byteval = bytes[index++];
709 if((byteval >= 0x81 && byteval <= 0x9F) ||
710 (byteval >= 0xE0 && byteval <= 0xEF))
712 // First byte in a double-byte sequence.
719 // Second byte in a double-byte sequence.
724 // Return the total length.
727 public override int GetChars(byte[] bytes, int byteIndex,
728 int byteCount, char[] chars,
731 // Validate the parameters.
734 throw new ArgumentNullException("bytes");
738 throw new ArgumentNullException("chars");
740 if(byteIndex < 0 || byteIndex > bytes.Length)
742 throw new ArgumentOutOfRangeException
743 ("byteIndex", Strings.GetString("ArgRange_Array"));
745 if(byteCount < 0 || byteCount > (bytes.Length - byteIndex))
747 throw new ArgumentOutOfRangeException
748 ("byteCount", Strings.GetString("ArgRange_Array"));
750 if(charIndex < 0 || charIndex > chars.Length)
752 throw new ArgumentOutOfRangeException
753 ("charIndex", Strings.GetString("ArgRange_Array"));
756 // Decode the bytes in the buffer.
757 int posn = charIndex;
758 int charLength = chars.Length;
762 byte *table = convert.jisx0208ToUnicode;
764 byte[] table = convert.jisx0208ToUnicode;
768 byteval = bytes[byteIndex++];
772 if(posn >= charLength)
774 throw new ArgumentException
776 ("Arg_InsufficientSpace"), "chars");
778 if((byteval >= 0x81 && byteval <= 0x9F) ||
779 (byteval >= 0xE0 && byteval <= 0xEF))
781 // First byte in a double-byte sequence.
784 else if(byteval == 0x5C)
787 chars[posn++] ='\u00A5';
789 else if(byteval == 0x7E)
792 chars[posn++] ='\u203E';
794 else if(byteval < 0x80)
796 // Ordinary ASCII/Latin1 character.
797 chars[posn++] = (char)byteval;
799 else if(byteval >= 0xA1 && byteval <= 0xDF)
801 // Half-width katakana character.
802 chars[posn++] = (char)(byteval - 0xA1 + 0xFF61);
806 // Invalid first byte.
812 // Second byte in a double-byte sequence.
813 if(last >= 0x81 && last <= 0x9F)
815 value = (last - 0x81) * 0xBC;
817 else if (last >= 0xF0 && last <= 0xFC && byteval <= 0xFC)
820 value = 0xE000 + (last - 0xF0) * 0xBC + byteval;
826 value = (last - 0xE0 + (0xA0 - 0x81)) * 0xBC;
829 if(byteval >= 0x40 && byteval <= 0x7E)
831 value += (byteval - 0x40);
833 else if(byteval >= 0x80 && byteval <= 0xFC)
835 value += (byteval - 0x80 + 0x3F);
839 // Invalid second byte.
844 value = ((int)(table[value])) |
845 (((int)(table[value + 1])) << 8);
848 chars[posn++] = (char)value;
858 // Return the final length to the caller.
859 return posn - charIndex;
862 } // class CP932Decoder
866 public class ENCshift_jis : CP932
868 public ENCshift_jis() : base() {}
870 }; // class ENCshift_jis
872 }; // namespace I18N.CJK