* OTHER DEALINGS IN THE SOFTWARE.
*/
+//
+// Copyright (C) 2005-2006 Novell, Inc.
+//
+
namespace I18N.CJK
{
-using System;
-using System.Text;
-using I18N.Common;
-
-public unsafe class CP932 : Encoding
-{
- // Magic number used by Windows for the Shift-JIS code page.
- private const int SHIFTJIS_CODE_PAGE = 932;
+ using System;
+ using System.Text;
+ using I18N.Common;
- // Internal state.
- private JISConvert convert;
+#if DISABLE_UNSAFE
+ using MonoEncoder = I18N.Common.MonoSafeEncoder;
+ using MonoEncoding = I18N.Common.MonoSafeEncoding;
+#endif
- // Constructor.
- public CP932() : base(SHIFTJIS_CODE_PAGE)
- {
- // Load the JIS conversion tables.
- convert = JISConvert.Convert;
- }
+ [Serializable]
+ public class CP932 : MonoEncoding
+ {
+ // Magic number used by Windows for the Shift-JIS code page.
+ private const int SHIFTJIS_CODE_PAGE = 932;
- // Get the number of bytes needed to encode a character buffer.
- public override int GetByteCount(char[] chars, int index, int count)
+ // Constructor.
+ public CP932() : base(SHIFTJIS_CODE_PAGE)
+ {
+ }
+
+#if !DISABLE_UNSAFE
+ // Get the number of bytes needed to encode a character buffer.
+ public unsafe override int GetByteCountImpl (char* chars, int count)
+ {
+ int index = 0;
+
+ // Determine the length of the final output.
+ int length = 0;
+ int ch, value;
+#if __PNET__
+ byte *cjkToJis = JISConvert.Convert.cjkToJis;
+ byte *extraToJis = JISConvert.Convert.extraToJis;
+#else
+ byte[] cjkToJis = JISConvert.Convert.cjkToJis;
+ byte[] extraToJis = JISConvert.Convert.extraToJis;
+#endif
+ while(count > 0)
{
- // Validate the parameters.
- if(chars == null)
+ ch = chars[index++];
+ --count;
+ ++length;
+ if(ch < 0x0080)
{
- throw new ArgumentNullException("chars");
+ // Character maps to itself.
+ continue;
}
- if(index < 0 || index > chars.Length)
+ else if(ch < 0x0100)
{
- throw new ArgumentOutOfRangeException
- ("index", Strings.GetString("ArgRange_Array"));
+ // Check for special Latin 1 characters that
+ // can be mapped to double-byte code points.
+ if(ch == 0x00A2 || ch == 0x00A3 || ch == 0x00A7 ||
+ ch == 0x00A8 || ch == 0x00AC || ch == 0x00B0 ||
+ ch == 0x00B1 || ch == 0x00B4 || ch == 0x00B6 ||
+ ch == 0x00D7 || ch == 0x00F7)
+ {
+ ++length;
+ }
}
- if(count < 0 || count > (chars.Length - index))
+ else if(ch >= 0x0391 && ch <= 0x0451)
{
- throw new ArgumentOutOfRangeException
- ("count", Strings.GetString("ArgRange_Array"));
+ // Greek subset characters.
+ ++length;
}
-
- // Determine the length of the final output.
- int length = 0;
- int ch, value;
-#if __PNET__
- byte *cjkToJis = convert.cjkToJis;
- byte *extraToJis = convert.extraToJis;
-#else
- byte[] cjkToJis = convert.cjkToJis;
- byte[] extraToJis = convert.extraToJis;
-#endif
- while(count > 0)
+ else if(ch >= 0x2010 && ch <= 0x9FA5)
{
- ch = chars[index++];
- --count;
- ++length;
- if(ch < 0x0080)
+ // This range contains the bulk of the CJK set.
+ value = (ch - 0x2010) * 2;
+ value = ((int)(cjkToJis[value])) |
+ (((int)(cjkToJis[value + 1])) << 8);
+ if(value >= 0x0100)
{
- // Character maps to itself.
- continue;
- }
- else if(ch < 0x0100)
- {
- // Check for special Latin 1 characters that
- // can be mapped to double-byte code points.
- if(ch == 0x00A2 || ch == 0x00A3 || ch == 0x00A7 ||
- ch == 0x00A8 || ch == 0x00AC || ch == 0x00B0 ||
- ch == 0x00B1 || ch == 0x00B4 || ch == 0x00B6 ||
- ch == 0x00D7 || ch == 0x00F7)
- {
- ++length;
- }
- }
- else if(ch >= 0x0391 && ch <= 0x0451)
- {
- // Greek subset characters.
++length;
}
- else if(ch >= 0x2010 && ch <= 0x9FA5)
- {
- // This range contains the bulk of the CJK set.
- value = (ch - 0x2010) * 2;
- value = ((int)(cjkToJis[value])) |
- (((int)(cjkToJis[value + 1])) << 8);
- if(value >= 0x0100)
- {
- ++length;
- }
- }
- else if(ch >= 0xFF01 && ch <= 0xFFEF)
+ }
+ else if(ch >= 0xE000 && ch <= 0xE757)
+ // PrivateUse
+ ++length;
+ else if(ch >= 0xFF01 && ch <= 0xFFEF)
+ {
+ // This range contains extra characters,
+ // including half-width katakana.
+ value = (ch - 0xFF01) * 2;
+ value = ((int)(extraToJis[value])) |
+ (((int)(extraToJis[value + 1])) << 8);
+ if(value >= 0x0100)
{
- // This range contains extra characters,
- // including half-width katakana.
- value = (ch - 0xFF01) * 2;
- value = ((int)(extraToJis[value])) |
- (((int)(extraToJis[value + 1])) << 8);
- if(value >= 0x0100)
- {
- ++length;
- }
+ ++length;
}
}
-
- // Return the length to the caller.
- return length;
}
- // Get the bytes that result from encoding a character buffer.
- public override int GetBytes(char[] chars, int charIndex, int charCount,
- byte[] bytes, int byteIndex)
+ // Return the length to the caller.
+ return length;
+ }
+
+ // Get the bytes that result from encoding a character buffer.
+ public unsafe override int GetBytesImpl (
+ char* chars, int charCount, byte* bytes, int byteCount)
+ {
+ int charIndex = 0;
+ int byteIndex = 0;
+ EncoderFallbackBuffer buffer = null;
+
+ // Convert the characters into their byte form.
+ int posn = byteIndex;
+ int end = charCount;
+ int byteLength = byteCount;
+ int ch, value;
+#if __PNET__
+ byte *cjkToJis = JISConvert.Convert.cjkToJis;
+ byte *greekToJis = JISConvert.Convert.greekToJis;
+ byte *extraToJis = JISConvert.Convert.extraToJis;
+#else
+ byte[] cjkToJis = JISConvert.Convert.cjkToJis;
+ byte[] greekToJis = JISConvert.Convert.greekToJis;
+ byte[] extraToJis = JISConvert.Convert.extraToJis;
+#endif
+ for (int i = charIndex; i < end; i++, charCount--)
{
- // Validate the parameters.
- if(chars == null)
- {
- throw new ArgumentNullException("chars");
- }
- if(bytes == null)
- {
- throw new ArgumentNullException("bytes");
- }
- if(charIndex < 0 || charIndex > chars.Length)
+ ch = chars[i];
+ if(posn >= byteLength)
{
- throw new ArgumentOutOfRangeException
- ("charIndex", Strings.GetString("ArgRange_Array"));
+ throw new ArgumentException
+ (Strings.GetString("Arg_InsufficientSpace"),
+ "bytes");
}
- if(charCount < 0 || charCount > (chars.Length - charIndex))
+ if(ch < 0x0080)
{
- throw new ArgumentOutOfRangeException
- ("charCount", Strings.GetString("ArgRange_Array"));
+ // Character maps to itself.
+ bytes[posn++] = (byte)ch;
+ continue;
}
- if(byteIndex < 0 || byteIndex > bytes.Length)
- {
- throw new ArgumentOutOfRangeException
- ("byteIndex", Strings.GetString("ArgRange_Array"));
- }
-
- // Convert the characters into their byte form.
- int posn = byteIndex;
- int byteLength = bytes.Length;
- int ch, value;
-#if __PNET__
- byte *cjkToJis = convert.cjkToJis;
- byte *greekToJis = convert.greekToJis;
- byte *extraToJis = convert.extraToJis;
-#else
- byte[] cjkToJis = convert.cjkToJis;
- byte[] greekToJis = convert.greekToJis;
- byte[] extraToJis = convert.extraToJis;
-#endif
- while(charCount > 0)
+ else if(ch < 0x0100)
{
- ch = chars[charIndex++];
- --charCount;
- if(posn >= byteLength)
- {
- throw new ArgumentException
- (Strings.GetString("Arg_InsufficientSpace"),
- "bytes");
- }
- if(ch < 0x0080)
- {
- // Character maps to itself.
- bytes[posn++] = (byte)ch;
- continue;
- }
- else if(ch < 0x0100)
+ // Check for special Latin 1 characters that
+ // can be mapped to double-byte code points.
+ if(ch == 0x00A2 || ch == 0x00A3 || ch == 0x00A7 ||
+ ch == 0x00A8 || ch == 0x00AC || ch == 0x00B0 ||
+ ch == 0x00B1 || ch == 0x00B4 || ch == 0x00B6 ||
+ ch == 0x00D7 || ch == 0x00F7)
{
- // Check for special Latin 1 characters that
- // can be mapped to double-byte code points.
- if(ch == 0x00A2 || ch == 0x00A3 || ch == 0x00A7 ||
- ch == 0x00A8 || ch == 0x00AC || ch == 0x00B0 ||
- ch == 0x00B1 || ch == 0x00B4 || ch == 0x00B6 ||
- ch == 0x00D7 || ch == 0x00F7)
+ if((posn + 1) >= byteLength)
{
- if((posn + 1) >= byteLength)
- {
- throw new ArgumentException
- (Strings.GetString
- ("Arg_InsufficientSpace"), "bytes");
- }
- switch(ch)
- {
- case 0x00A2:
- bytes[posn++] = (byte)0x81;
- bytes[posn++] = (byte)0x91;
- break;
-
- case 0x00A3:
- bytes[posn++] = (byte)0x81;
- bytes[posn++] = (byte)0x92;
- break;
-
- case 0x00A7:
- bytes[posn++] = (byte)0x81;
- bytes[posn++] = (byte)0x98;
- break;
-
- case 0x00A8:
- bytes[posn++] = (byte)0x81;
- bytes[posn++] = (byte)0x4E;
- break;
-
- case 0x00AC:
- bytes[posn++] = (byte)0x81;
- bytes[posn++] = (byte)0xCA;
- break;
-
- case 0x00B0:
- bytes[posn++] = (byte)0x81;
- bytes[posn++] = (byte)0x8B;
- break;
-
- case 0x00B1:
- bytes[posn++] = (byte)0x81;
- bytes[posn++] = (byte)0x7D;
- break;
-
- case 0x00B4:
- bytes[posn++] = (byte)0x81;
- bytes[posn++] = (byte)0x4C;
- break;
-
- case 0x00B6:
- bytes[posn++] = (byte)0x81;
- bytes[posn++] = (byte)0xF7;
- break;
-
- case 0x00D7:
- bytes[posn++] = (byte)0x81;
- bytes[posn++] = (byte)0x7E;
- break;
-
- case 0x00F7:
- bytes[posn++] = (byte)0x81;
- bytes[posn++] = (byte)0x80;
- break;
- }
+ throw new ArgumentException
+ (Strings.GetString
+ ("Arg_InsufficientSpace"), "bytes");
}
- else if(ch == 0x00A5)
+ switch(ch)
{
- // Yen sign.
- bytes[posn++] = (byte)0x5C;
+ case 0x00A2:
+ bytes[posn++] = (byte)0x81;
+ bytes[posn++] = (byte)0x91;
+ break;
+
+ case 0x00A3:
+ bytes[posn++] = (byte)0x81;
+ bytes[posn++] = (byte)0x92;
+ break;
+
+ case 0x00A7:
+ bytes[posn++] = (byte)0x81;
+ bytes[posn++] = (byte)0x98;
+ break;
+
+ case 0x00A8:
+ bytes[posn++] = (byte)0x81;
+ bytes[posn++] = (byte)0x4E;
+ break;
+
+ case 0x00AC:
+ bytes[posn++] = (byte)0x81;
+ bytes[posn++] = (byte)0xCA;
+ break;
+
+ case 0x00B0:
+ bytes[posn++] = (byte)0x81;
+ bytes[posn++] = (byte)0x8B;
+ break;
+
+ case 0x00B1:
+ bytes[posn++] = (byte)0x81;
+ bytes[posn++] = (byte)0x7D;
+ break;
+
+ case 0x00B4:
+ bytes[posn++] = (byte)0x81;
+ bytes[posn++] = (byte)0x4C;
+ break;
+
+ case 0x00B6:
+ bytes[posn++] = (byte)0x81;
+ bytes[posn++] = (byte)0xF7;
+ break;
+
+ case 0x00D7:
+ bytes[posn++] = (byte)0x81;
+ bytes[posn++] = (byte)0x7E;
+ break;
+
+ case 0x00F7:
+ bytes[posn++] = (byte)0x81;
+ bytes[posn++] = (byte)0x80;
+ break;
}
- else
- {
- // Invalid character.
- bytes[posn++] = (byte)'?';
- }
- continue;
}
- else if(ch >= 0x0391 && ch <= 0x0451)
+ else if(ch == 0x00A5)
{
- // Greek subset characters.
- value = (ch - 0x0391) * 2;
- value = ((int)(greekToJis[value])) |
- (((int)(greekToJis[value + 1])) << 8);
+ // Yen sign.
+ bytes[posn++] = (byte)0x5C;
}
- else if(ch >= 0x2010 && ch <= 0x9FA5)
+ else
{
- // This range contains the bulk of the CJK set.
- value = (ch - 0x2010) * 2;
- value = ((int)(cjkToJis[value])) |
- (((int)(cjkToJis[value + 1])) << 8);
+ HandleFallback (ref buffer,
+ chars, ref charIndex, ref charCount,
+ bytes, ref posn, ref byteCount, null);
}
- else if(ch >= 0xE000 && ch <= 0xE757)
- {
- // PrivateUse
- int diff = ch - 0xE000;
- value = ((int) (diff / 0xBC) << 8)
- + (diff % 0xBC)
- + 0xF040;
- if (value % 0x100 >= 0x7F)
- value++;
- }
- else if(ch >= 0xFF01 && ch <= 0xFF60)
+ continue;
+ }
+ else if(ch >= 0x0391 && ch <= 0x0451)
+ {
+ // Greek subset characters.
+ value = (ch - 0x0391) * 2;
+ value = ((int)(greekToJis[value])) |
+ (((int)(greekToJis[value + 1])) << 8);
+ }
+ else if(ch >= 0x2010 && ch <= 0x9FA5)
+ {
+ // This range contains the bulk of the CJK set.
+ value = (ch - 0x2010) * 2;
+ value = ((int)(cjkToJis[value])) |
+ (((int)(cjkToJis[value + 1])) << 8);
+ }
+ else if(ch >= 0xE000 && ch <= 0xE757)
+ {
+ // PrivateUse
+ int diff = ch - 0xE000;
+ value = ((int) (diff / 0xBC) << 8)
+ + (diff % 0xBC)
+ + 0xF040;
+ if (value % 0x100 >= 0x7F)
+ value++;
+ }
+ else if(ch >= 0xFF01 && ch <= 0xFF60)
+ {
+ value = (ch - 0xFF01) * 2;
+ value = ((int)(extraToJis[value])) |
+ (((int)(extraToJis[value + 1])) << 8);
+ }
+ else if(ch >= 0xFF60 && ch <= 0xFFA0)
+ {
+ value = ch - 0xFF60 + 0xA0;
+ }
+ else
+ {
+ // Invalid character.
+ value = 0;
+ }
+ if(value == 0)
+ {
+ HandleFallback (ref buffer,
+ chars, ref charIndex, ref charCount,
+ bytes, ref posn, ref byteCount, null);
+ }
+ else if(value < 0x0100)
+ {
+ bytes[posn++] = (byte)value;
+ }
+ else if((posn + 1) >= byteLength)
+ {
+ throw new ArgumentException
+ (Strings.GetString("Arg_InsufficientSpace"),
+ "bytes");
+ }
+ else if(value < 0x8000)
+ {
+ // JIS X 0208 character.
+ value -= 0x0100;
+ ch = (value / 0xBC);
+ value = (value % 0xBC) + 0x40;
+ if(value >= 0x7F)
{
- value = ch - 0xFF00 + 0x20;
+ ++value;
}
- else if(ch >= 0xFF60 && ch <= 0xFFA0)
+ if(ch < (0x9F - 0x80))
{
- value = ch - 0xFF60 + 0xA0;
+ bytes[posn++] = (byte)(ch + 0x81);
}
else
{
- // Invalid character.
- value = 0;
+ bytes[posn++] = (byte)(ch - (0x9F - 0x80) + 0xE0);
}
- if(value == 0)
+ bytes[posn++] = (byte)value;
+ }
+ else if (value >= 0xF040 && value <= 0xF9FC)
+ {
+ // PrivateUse
+ bytes[posn++] = (byte) (value / 0x100);
+ bytes[posn++] = (byte) (value % 0x100);
+ }
+ else
+ {
+ // JIS X 0212 character, which Shift-JIS doesn't
+ // support, but we've already allocated two slots.
+ bytes[posn++] = (byte)'?';
+ bytes[posn++] = (byte)'?';
+ }
+ }
+
+ // Return the final length to the caller.
+ return posn - byteIndex;
+ }
+#else
+ // Get the number of bytes needed to encode a character buffer.
+ public override int GetByteCount(char[] chars, int index, int count)
+ {
+ // Determine the length of the final output.
+ int length = 0;
+ int ch, value;
+ byte[] cjkToJis = JISConvert.Convert.cjkToJis;
+ byte[] extraToJis = JISConvert.Convert.extraToJis;
+
+ while (count > 0)
+ {
+ ch = chars[index++];
+ --count;
+ ++length;
+ if (ch < 0x0080)
+ {
+ // Character maps to itself.
+ continue;
+ }
+ else if (ch < 0x0100)
+ {
+ // Check for special Latin 1 characters that
+ // can be mapped to double-byte code points.
+ if (ch == 0x00A2 || ch == 0x00A3 || ch == 0x00A7 ||
+ ch == 0x00A8 || ch == 0x00AC || ch == 0x00B0 ||
+ ch == 0x00B1 || ch == 0x00B4 || ch == 0x00B6 ||
+ ch == 0x00D7 || ch == 0x00F7)
{
- bytes[posn++] = (byte)'?';
+ ++length;
}
- else if(value < 0x0100)
+ }
+ else if (ch >= 0x0391 && ch <= 0x0451)
+ {
+ // Greek subset characters.
+ ++length;
+ }
+ else if (ch >= 0x2010 && ch <= 0x9FA5)
+ {
+ // This range contains the bulk of the CJK set.
+ value = (ch - 0x2010) * 2;
+ value = ((int)(cjkToJis[value])) |
+ (((int)(cjkToJis[value + 1])) << 8);
+ if (value >= 0x0100)
{
- bytes[posn++] = (byte)value;
+ ++length;
}
- else if((posn + 1) >= byteLength)
+ }
+ else if (ch >= 0xE000 && ch <= 0xE757)
+ // PrivateUse
+ ++length;
+ else if (ch >= 0xFF01 && ch <= 0xFFEF)
+ {
+ // This range contains extra characters,
+ // including half-width katakana.
+ value = (ch - 0xFF01) * 2;
+ value = ((int)(extraToJis[value])) |
+ (((int)(extraToJis[value + 1])) << 8);
+ if (value >= 0x0100)
{
- throw new ArgumentException
- (Strings.GetString("Arg_InsufficientSpace"),
- "bytes");
+ ++length;
}
- else if(value < 0x8000)
+ }
+ }
+
+ // Return the length to the caller.
+ return length;
+ }
+
+ // Get the bytes that result from encoding a character buffer.
+ public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
+ {
+ int byteCount = bytes.Length;
+ EncoderFallbackBuffer buffer = null;
+
+ // Convert the characters into their byte form.
+ int posn = byteIndex;
+ int end = charIndex + charCount;
+ int byteLength = byteCount;
+ int /*ch,*/ value;
+ byte[] cjkToJis = JISConvert.Convert.cjkToJis;
+ byte[] greekToJis = JISConvert.Convert.greekToJis;
+ byte[] extraToJis = JISConvert.Convert.extraToJis;
+
+ for (int i = charIndex; i < end; i++, charCount--)
+ {
+ int ch = chars[i];
+
+ if (posn >= byteLength)
+ {
+ throw new ArgumentException
+ (Strings.GetString("Arg_InsufficientSpace"),
+ "bytes");
+ }
+ if (ch < 0x0080)
+ {
+ // Character maps to itself.
+ bytes[posn++] = (byte)ch;
+ continue;
+ }
+ else if (ch < 0x0100)
+ {
+ // Check for special Latin 1 characters that
+ // can be mapped to double-byte code points.
+ if (ch == 0x00A2 || ch == 0x00A3 || ch == 0x00A7 ||
+ ch == 0x00A8 || ch == 0x00AC || ch == 0x00B0 ||
+ ch == 0x00B1 || ch == 0x00B4 || ch == 0x00B6 ||
+ ch == 0x00D7 || ch == 0x00F7)
{
- // JIS X 0208 character.
- value -= 0x0100;
- ch = (value / 0xBC);
- value = (value % 0xBC) + 0x40;
- if(value >= 0x7F)
+ if ((posn + 1) >= byteLength)
{
- ++value;
+ throw new ArgumentException
+ (Strings.GetString
+ ("Arg_InsufficientSpace"), "bytes");
}
- if(ch < (0x9F - 0x80))
+ switch (ch)
{
- bytes[posn++] = (byte)(ch + 0x81);
+ case 0x00A2:
+ bytes[posn++] = (byte)0x81;
+ bytes[posn++] = (byte)0x91;
+ break;
+
+ case 0x00A3:
+ bytes[posn++] = (byte)0x81;
+ bytes[posn++] = (byte)0x92;
+ break;
+
+ case 0x00A7:
+ bytes[posn++] = (byte)0x81;
+ bytes[posn++] = (byte)0x98;
+ break;
+
+ case 0x00A8:
+ bytes[posn++] = (byte)0x81;
+ bytes[posn++] = (byte)0x4E;
+ break;
+
+ case 0x00AC:
+ bytes[posn++] = (byte)0x81;
+ bytes[posn++] = (byte)0xCA;
+ break;
+
+ case 0x00B0:
+ bytes[posn++] = (byte)0x81;
+ bytes[posn++] = (byte)0x8B;
+ break;
+
+ case 0x00B1:
+ bytes[posn++] = (byte)0x81;
+ bytes[posn++] = (byte)0x7D;
+ break;
+
+ case 0x00B4:
+ bytes[posn++] = (byte)0x81;
+ bytes[posn++] = (byte)0x4C;
+ break;
+
+ case 0x00B6:
+ bytes[posn++] = (byte)0x81;
+ bytes[posn++] = (byte)0xF7;
+ break;
+
+ case 0x00D7:
+ bytes[posn++] = (byte)0x81;
+ bytes[posn++] = (byte)0x7E;
+ break;
+
+ case 0x00F7:
+ bytes[posn++] = (byte)0x81;
+ bytes[posn++] = (byte)0x80;
+ break;
}
- else
- {
- bytes[posn++] = (byte)(ch - (0x9F - 0x80) + 0xE0);
- }
- bytes[posn++] = (byte)value;
}
- else if (value >= 0xF040 && value <= 0xF9FC)
+ else if (ch == 0x00A5)
{
- // PrivateUse
- bytes[posn++] = (byte) (value / 0x100);
- bytes[posn++] = (byte) (value % 0x100);
+ // Yen sign.
+ bytes[posn++] = (byte)0x5C;
}
else
{
- // JIS X 0212 character, which Shift-JIS doesn't
- // support, but we've already allocated two slots.
- bytes[posn++] = (byte)'?';
- bytes[posn++] = (byte)'?';
+ HandleFallback (ref buffer, chars, ref i, ref charCount, bytes,
+ ref byteIndex, ref byteCount, null);
}
+ continue;
}
-
- // Return the final length to the caller.
- return posn - byteIndex;
- }
-
- // Get the number of characters needed to decode a byte buffer.
- public override int GetCharCount(byte[] bytes, int index, int count)
- {
- // Validate the parameters.
- if(bytes == null)
+ else if (ch >= 0x0391 && ch <= 0x0451)
{
- throw new ArgumentNullException("bytes");
+ // Greek subset characters.
+ value = (ch - 0x0391) * 2;
+ value = ((int)(greekToJis[value])) |
+ (((int)(greekToJis[value + 1])) << 8);
}
- if(index < 0 || index > bytes.Length)
+ else if (ch >= 0x2010 && ch <= 0x9FA5)
{
- throw new ArgumentOutOfRangeException
- ("index", Strings.GetString("ArgRange_Array"));
+ // This range contains the bulk of the CJK set.
+ value = (ch - 0x2010) * 2;
+ value = ((int)(cjkToJis[value])) |
+ (((int)(cjkToJis[value + 1])) << 8);
}
- if(count < 0 || count > (bytes.Length - index))
+ else if (ch >= 0xE000 && ch <= 0xE757)
{
- throw new ArgumentOutOfRangeException
- ("count", Strings.GetString("ArgRange_Array"));
+ // PrivateUse
+ int diff = ch - 0xE000;
+ value = ((int)(diff / 0xBC) << 8)
+ + (diff % 0xBC)
+ + 0xF040;
+ if (value % 0x100 >= 0x7F)
+ value++;
}
-
- // Determine the total length of the converted string.
- int length = 0;
- int byteval;
- while(count > 0)
+ else if (ch >= 0xFF01 && ch <= 0xFF60)
{
- byteval = bytes[index++];
- --count;
- ++length;
- if(byteval < 0x80)
- {
- // Ordinary ASCII/Latin1 character, or the
- // single-byte Yen or overline signs.
- continue;
- }
- else if(byteval >= 0xA1 && byteval <= 0xDF)
+ value = (ch - 0xFF01) * 2;
+ value = ((int)(extraToJis[value])) |
+ (((int)(extraToJis[value + 1])) << 8);
+ }
+ else if (ch >= 0xFF60 && ch <= 0xFFA0)
+ {
+ value = ch - 0xFF60 + 0xA0;
+ }
+ else
+ {
+ // Invalid character.
+ value = 0;
+ }
+ if (value == 0)
+ {
+ HandleFallback (ref buffer, chars, ref charIndex, ref charCount,
+ bytes, ref posn, ref byteCount, null);
+ }
+ else if (value < 0x0100)
+ {
+ bytes[posn++] = (byte)value;
+ }
+ else if ((posn + 1) >= byteLength)
+ {
+ throw new ArgumentException
+ (Strings.GetString("Arg_InsufficientSpace"),
+ "bytes");
+ }
+ else if (value < 0x8000)
+ {
+ // JIS X 0208 character.
+ value -= 0x0100;
+ ch = (value / 0xBC);
+ value = (value % 0xBC) + 0x40;
+ if (value >= 0x7F)
{
- // Half-width katakana.
- continue;
+ ++value;
}
- else if(byteval < 0x81 ||
- (byteval > 0x9F && byteval < 0xE0) ||
- byteval > 0xEF)
+ if (ch < (0x9F - 0x80))
{
- // Invalid first byte.
- continue;
+ bytes[posn++] = (byte)(ch + 0x81);
}
- if(count == 0)
+ else
{
- // Missing second byte.
- continue;
+ bytes[posn++] = (byte)(ch - (0x9F - 0x80) + 0xE0);
}
- ++index;
- --count;
+ bytes[posn++] = (byte)value;
}
-
- // Return the total length.
- return length;
- }
-
- // Get the characters that result from decoding a byte buffer.
- public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
- char[] chars, int charIndex)
- {
- // Validate the parameters.
- if(bytes == null)
- {
- throw new ArgumentNullException("bytes");
- }
- if(chars == null)
+ else if (value >= 0xF040 && value <= 0xF9FC)
{
- throw new ArgumentNullException("chars");
+ // PrivateUse
+ bytes[posn++] = (byte)(value / 0x100);
+ bytes[posn++] = (byte)(value % 0x100);
}
- if(byteIndex < 0 || byteIndex > bytes.Length)
+ else
{
- throw new ArgumentOutOfRangeException
- ("byteIndex", Strings.GetString("ArgRange_Array"));
+ // JIS X 0212 character, which Shift-JIS doesn't
+ // support, but we've already allocated two slots.
+ bytes[posn++] = (byte)'?';
+ bytes[posn++] = (byte)'?';
}
- if(byteCount < 0 || byteCount > (bytes.Length - byteIndex))
+ }
+
+ // Return the final length to the caller.
+ return posn - byteIndex;
+ }
+#endif
+
+ public override int GetCharCount (byte [] bytes, int index, int count)
+ {
+ return new CP932Decoder (JISConvert.Convert).GetCharCount (
+ bytes, index, count, true);
+ }
+
+ public override int GetChars (
+ byte [] bytes, int byteIndex, int byteCount,
+ char [] chars, int charIndex)
+ {
+ return new CP932Decoder (JISConvert.Convert).GetChars (bytes,
+ byteIndex, byteCount, chars, charIndex,
+ true);
+ }
+
+ // Get the maximum number of bytes needed to encode a
+ // specified number of characters.
+ public override int GetMaxByteCount(int charCount)
+ {
+ if(charCount < 0)
+ {
+ throw new ArgumentOutOfRangeException
+ ("charCount",
+ Strings.GetString("ArgRange_NonNegative"));
+ }
+ return charCount * 2;
+ }
+
+ // Get the maximum number of characters needed to decode a
+ // specified number of bytes.
+ public override int GetMaxCharCount(int byteCount)
+ {
+ if(byteCount < 0)
+ {
+ throw new ArgumentOutOfRangeException
+ ("byteCount",
+ Strings.GetString("ArgRange_NonNegative"));
+ }
+ return byteCount;
+ }
+
+ // Get a decoder that handles a rolling Shift-JIS state.
+ public override Decoder GetDecoder()
+ {
+ return new CP932Decoder(JISConvert.Convert);
+ }
+
+#if !ECMA_COMPAT
+
+ // Get the mail body name for this encoding.
+ public override String BodyName {
+ get { return "iso-2022-jp"; }
+ }
+
+ // Get the human-readable name for this encoding.
+ public override String EncodingName {
+ get { return "Japanese (Shift-JIS)"; }
+ }
+
+ // Get the mail agent header name for this encoding.
+ public override String HeaderName {
+ get { return "iso-2022-jp"; }
+ }
+
+ // Determine if this encoding can be displayed in a Web browser.
+ public override bool IsBrowserDisplay {
+ get { return true; }
+ }
+
+ // Determine if this encoding can be saved from a Web browser.
+ public override bool IsBrowserSave {
+ get { return true; }
+ }
+
+ // Determine if this encoding can be displayed in a mail/news agent.
+ public override bool IsMailNewsDisplay {
+ get { return true; }
+ }
+
+ // Determine if this encoding can be saved from a mail/news agent.
+ public override bool IsMailNewsSave {
+ get { return true; }
+ }
+
+ // Get the IANA-preferred Web name for this encoding.
+ public override String WebName {
+ get { return "shift_jis"; }
+ }
+
+ // Get the Windows code page represented by this object.
+ public override int WindowsCodePage {
+ get { return SHIFTJIS_CODE_PAGE; }
+ }
+
+ // FIXME: This doesn't make sense, but without declaring this override
+ // System.XML regresses at Encoder.Convert() in
+ // MonoTests.System.Xml.XmlWriterSettingsTests.EncodingTest.
+ public override Encoder GetEncoder ()
+ {
+ return new MonoEncodingDefaultEncoder (this);
+ }
+
+ }; // class CP932
+
+#endif // !ECMA_COMPAT
+
+ // Decoder that handles a rolling Shift-JIS state.
+ sealed class CP932Decoder : DbcsEncoding.DbcsDecoder
+ {
+ private new JISConvert convert;
+ private int last_byte_count;
+ private int last_byte_chars;
+
+ // Constructor.
+ public CP932Decoder(JISConvert convert)
+ : base (null)
+ {
+ this.convert = convert;
+ }
+
+ // Override inherited methods.
+
+ public override int GetCharCount (
+ byte [] bytes, int index, int count)
+ {
+ return GetCharCount (bytes, index, count, false);
+ }
+
+ public
+ override
+ int GetCharCount (byte [] bytes, int index, int count, bool refresh)
+ {
+ CheckRange (bytes, index, count);
+
+ // Determine the total length of the converted string.
+ int length = 0;
+ int byteval;
+ int last = last_byte_count;
+ while(count > 0)
+ {
+ byteval = bytes[index++];
+ --count;
+ if(last == 0)
{
- throw new ArgumentOutOfRangeException
- ("byteCount", Strings.GetString("ArgRange_Array"));
+ if((byteval >= 0x81 && byteval <= 0x9F) ||
+ (byteval >= 0xE0 && byteval <= 0xEF))
+ {
+ // First byte in a double-byte sequence.
+ last = byteval;
+ }
+ ++length;
}
- if(charIndex < 0 || charIndex > chars.Length)
+ else
{
- throw new ArgumentOutOfRangeException
- ("charIndex", Strings.GetString("ArgRange_Array"));
+ // Second byte in a double-byte sequence.
+ last = 0;
}
-
- // Determine the total length of the converted string.
- int charLength = chars.Length;
- int posn = charIndex;
- int length = 0;
- int byteval, value;
+ }
+ if (refresh) {
+ if (last != 0)
+ length++;
+ last_byte_count = '\0';
+ }
+ else
+ last_byte_count = last;
+
+ // Return the total length.
+ return length;
+ }
+
+ public override int GetChars (
+ byte [] bytes, int byteIndex, int byteCount,
+ char [] chars, int charIndex)
+ {
+ return GetChars (bytes, byteIndex, byteCount,
+ chars, charIndex, false);
+ }
+
+ public
+ override
+ int GetChars (
+ byte [] bytes, int byteIndex, int byteCount,
+ char [] chars, int charIndex, bool refresh)
+ {
+ CheckRange (bytes, byteIndex, byteCount,
+ chars, charIndex);
+
+ // Decode the bytes in the buffer.
+ int posn = charIndex;
+ int charLength = chars.Length;
+ int byteval, value;
+ int last = last_byte_chars;
#if __PNET__
- byte *table = convert.jisx0208ToUnicode;
+ byte *table = convert.jisx0208ToUnicode;
#else
- byte[] table = convert.jisx0208ToUnicode;
+ byte[] table = convert.jisx0208ToUnicode;
#endif
- while(byteCount > 0)
+ while(byteCount > 0)
+ {
+ byteval = bytes[byteIndex++];
+ --byteCount;
+ if(last == 0)
{
- byteval = bytes[byteIndex++];
- --byteCount;
- ++length;
if(posn >= charLength)
{
throw new ArgumentException
- (Strings.GetString("Arg_InsufficientSpace"),
- "chars");
+ (Strings.GetString
+ ("Arg_InsufficientSpace"), "chars");
+ }
+ if((byteval >= 0x81 && byteval <= 0x9F) ||
+ (byteval >= 0xE0 && byteval <= 0xEF))
+ {
+ // First byte in a double-byte sequence.
+ last = byteval;
}
else if(byteval < 0x80)
{
// Ordinary ASCII/Latin1 character.
chars[posn++] = (char)byteval;
- continue;
}
else if(byteval >= 0xA1 && byteval <= 0xDF)
{
- // Half-width katakana.
+ // Half-width katakana character.
chars[posn++] = (char)(byteval - 0xA1 + 0xFF61);
- continue;
}
- else if(byteval >= 0x81 && byteval <= 0x9F)
+ else
{
- value = (byteval - 0x81) * 0xBC;
+ // Invalid first byte.
+ chars[posn++] = '?';
}
- else if(byteval >= 0xE0 && byteval <= 0xEF)
+ }
+ else
+ {
+ // Second byte in a double-byte sequence.
+ if(last >= 0x81 && last <= 0x9F)
{
- value = (byteval - 0xE0 + (0xA0 - 0x81)) * 0xBC;
+ value = (last - 0x81) * 0xBC;
}
- else
+ else if (last >= 0xF0 && last <= 0xFC && byteval <= 0xFC)
{
- // Invalid first byte.
- chars[posn++] = '?';
- continue;
+ // PrivateUse
+ value = 0xE000 + (last - 0xF0) * 0xBC + byteval;
+ if (byteval > 0x7F)
+ value--;
}
- if(byteCount == 0)
+ else
{
- // Missing second byte.
- chars[posn++] = '?';
- continue;
+ value = (last - 0xE0 + (0xA0 - 0x81)) * 0xBC;
}
- byteval = bytes[byteIndex++];
- --byteCount;
+ last = 0;
if(byteval >= 0x40 && byteval <= 0x7E)
{
value += (byteval - 0x40);
chars[posn++] = '?';
}
}
-
- // Return the total length.
- return posn - charIndex;
- }
-
- // Get the maximum number of bytes needed to encode a
- // specified number of characters.
- public override int GetMaxByteCount(int charCount)
- {
- if(charCount < 0)
- {
- throw new ArgumentOutOfRangeException
- ("charCount",
- Strings.GetString("ArgRange_NonNegative"));
- }
- return charCount * 2;
- }
-
- // Get the maximum number of characters needed to decode a
- // specified number of bytes.
- public override int GetMaxCharCount(int byteCount)
- {
- if(byteCount < 0)
- {
- throw new ArgumentOutOfRangeException
- ("byteCount",
- Strings.GetString("ArgRange_NonNegative"));
- }
- return byteCount;
}
-
- // Get a decoder that handles a rolling Shift-JIS state.
- public override Decoder GetDecoder()
- {
- return new CP932Decoder(convert);
+ if (refresh) {
+ if (last != 0)
+ chars[posn++] = '\u30FB';
+ last_byte_chars = '\0';
}
+ else
+ last_byte_chars = last;
-#if !ECMA_COMPAT
-
- // Get the mail body name for this encoding.
- public override String BodyName
- {
- get
- {
- return "iso-2022-jp";
- }
- }
-
- // Get the human-readable name for this encoding.
- public override String EncodingName
- {
- get
- {
- return "Japanese (Shift-JIS)";
- }
- }
-
- // Get the mail agent header name for this encoding.
- public override String HeaderName
- {
- get
- {
- return "iso-2022-jp";
- }
- }
-
- // Determine if this encoding can be displayed in a Web browser.
- public override bool IsBrowserDisplay
- {
- get
- {
- return true;
- }
- }
-
- // Determine if this encoding can be saved from a Web browser.
- public override bool IsBrowserSave
- {
- get
- {
- return true;
- }
- }
-
- // Determine if this encoding can be displayed in a mail/news agent.
- public override bool IsMailNewsDisplay
- {
- get
- {
- return true;
- }
- }
-
- // Determine if this encoding can be saved from a mail/news agent.
- public override bool IsMailNewsSave
- {
- get
- {
- return true;
- }
- }
-
- // Get the IANA-preferred Web name for this encoding.
- public override String WebName
- {
- get
- {
- return "shift_jis";
- }
- }
-
- // Get the Windows code page represented by this object.
- public override int WindowsCodePage
- {
- get
- {
- return SHIFTJIS_CODE_PAGE;
- }
- }
-
-#endif // !ECMA_COMPAT
-
- // Decoder that handles a rolling Shift-JIS state.
- private sealed class CP932Decoder : Decoder
- {
- private JISConvert convert;
- private int lastByte;
-
- // Constructor.
- public CP932Decoder(JISConvert convert)
- {
- this.convert = convert;
- this.lastByte = 0;
- }
-
- // Override inherited methods.
- public override int GetCharCount(byte[] bytes, int index, int count)
- {
- // Validate the parameters.
- if(bytes == null)
- {
- throw new ArgumentNullException("bytes");
- }
- if(index < 0 || index > bytes.Length)
- {
- throw new ArgumentOutOfRangeException
- ("index", Strings.GetString("ArgRange_Array"));
- }
- if(count < 0 || count > (bytes.Length - index))
- {
- throw new ArgumentOutOfRangeException
- ("count", Strings.GetString("ArgRange_Array"));
- }
-
- // Determine the total length of the converted string.
- int length = 0;
- int byteval;
- int last = lastByte;
- while(count > 0)
- {
- byteval = bytes[index++];
- --count;
- if(last == 0)
- {
- if((byteval >= 0x81 && byteval <= 0x9F) ||
- (byteval >= 0xE0 && byteval <= 0xEF))
- {
- // First byte in a double-byte sequence.
- last = byteval;
- }
- ++length;
- }
- else
- {
- // Second byte in a double-byte sequence.
- last = 0;
- }
- }
-
- // Return the total length.
- return length;
- }
- public override int GetChars(byte[] bytes, int byteIndex,
- int byteCount, char[] chars,
- int charIndex)
- {
- // Validate the parameters.
- if(bytes == null)
- {
- throw new ArgumentNullException("bytes");
- }
- if(chars == null)
- {
- throw new ArgumentNullException("chars");
- }
- if(byteIndex < 0 || byteIndex > bytes.Length)
- {
- throw new ArgumentOutOfRangeException
- ("byteIndex", Strings.GetString("ArgRange_Array"));
- }
- if(byteCount < 0 || byteCount > (bytes.Length - byteIndex))
- {
- throw new ArgumentOutOfRangeException
- ("byteCount", Strings.GetString("ArgRange_Array"));
- }
- if(charIndex < 0 || charIndex > chars.Length)
- {
- throw new ArgumentOutOfRangeException
- ("charIndex", Strings.GetString("ArgRange_Array"));
- }
-
- // Decode the bytes in the buffer.
- int posn = charIndex;
- int charLength = chars.Length;
- int byteval, value;
- int last = lastByte;
-#if __PNET__
- byte *table = convert.jisx0208ToUnicode;
-#else
- byte[] table = convert.jisx0208ToUnicode;
-#endif
- while(byteCount > 0)
- {
- byteval = bytes[byteIndex++];
- --byteCount;
- if(last == 0)
- {
- if(posn >= charLength)
- {
- throw new ArgumentException
- (Strings.GetString
- ("Arg_InsufficientSpace"), "chars");
- }
- if((byteval >= 0x81 && byteval <= 0x9F) ||
- (byteval >= 0xE0 && byteval <= 0xEF))
- {
- // First byte in a double-byte sequence.
- last = byteval;
- }
- else if(byteval < 0x80)
- {
- // Ordinary ASCII/Latin1 character.
- chars[posn++] = (char)byteval;
- }
- else if(byteval >= 0xA1 && byteval <= 0xDF)
- {
- // Half-width katakana character.
- chars[posn++] = (char)(byteval - 0xA1 + 0xFF61);
- }
- else
- {
- // Invalid first byte.
- chars[posn++] = '?';
- }
- }
- else
- {
- // Second byte in a double-byte sequence.
- if(last >= 0x81 && last <= 0x9F)
- {
- value = (last - 0x81) * 0xBC;
- }
- else if (last >= 0xF0 && last <= 0xFC && byteval <= 0xFC)
- {
- // PrivateUse
- value = 0xE000 + (last - 0xF0) * 0xBC + byteval;
- if (byteval > 0x7F)
- value--;
- }
- else
- {
- value = (last - 0xE0 + (0xA0 - 0x81)) * 0xBC;
- }
- last = 0;
- if(byteval >= 0x40 && byteval <= 0x7E)
- {
- value += (byteval - 0x40);
- }
- else if(byteval >= 0x80 && byteval <= 0xFC)
- {
- value += (byteval - 0x80 + 0x3F);
- }
- else
- {
- // Invalid second byte.
- chars[posn++] = '?';
- continue;
- }
- value *= 2;
- value = ((int)(table[value])) |
- (((int)(table[value + 1])) << 8);
- if(value != 0)
- {
- chars[posn++] = (char)value;
- }
- else
- {
- chars[posn++] = '?';
- }
- }
- }
- lastByte = last;
-
- // Return the final length to the caller.
- return posn - charIndex;
- }
+ // Return the final length to the caller.
+ return posn - charIndex;
+ }
} // class CP932Decoder
-}; // class CP932
-
-public class ENCshift_jis : CP932
-{
- public ENCshift_jis() : base() {}
+ [Serializable]
+ public class ENCshift_jis : CP932
+ {
+ public ENCshift_jis() : base() {}
-}; // class ENCshift_jis
+ }; // class ENCshift_jis
}; // namespace I18N.CJK