Fixes #683475: - Fixed ISO2022JPEncoder's GetBytesCount(..) to avoid it from returnin...
authorPablo Ruiz Garcia <pablo.ruiz@gmail.com>
Tue, 15 Nov 2011 17:50:43 +0000 (18:50 +0100)
committerRodrigo Kumpera <kumpera@gmail.com>
Wed, 23 Nov 2011 22:25:43 +0000 (20:25 -0200)
NOTE: This fixes were found by creating alternative safe-only implementations of the actual CJK encoders. Such implementations are disabled by default for performance reasons, but can be enabled if needed by using the conditional compiling flag DISABLE_UNSAFE.

18 files changed:
mcs/class/I18N/CJK/CP51932.cs
mcs/class/I18N/CJK/CP932.cs
mcs/class/I18N/CJK/CP936.cs
mcs/class/I18N/CJK/CP949.cs
mcs/class/I18N/CJK/CP950.cs
mcs/class/I18N/CJK/DbcsEncoding.cs
mcs/class/I18N/CJK/GB18030Encoding.cs
mcs/class/I18N/CJK/GB18030Source.cs
mcs/class/I18N/CJK/ISO2022JP.cs
mcs/class/I18N/CJK/Test/I18N.CJK.Test.cs
mcs/class/I18N/CJK/Test/texts/japanese3-50220.txt [new file with mode: 0644]
mcs/class/I18N/CJK/Test/texts/japanese3-50221.txt [new file with mode: 0644]
mcs/class/I18N/CJK/Test/texts/japanese3-utf8.txt [new file with mode: 0644]
mcs/class/I18N/Common/I18N-net_2_0.csproj
mcs/class/I18N/Common/I18N-net_4_0.csproj
mcs/class/I18N/Common/I18N.dll.sources
mcs/class/I18N/Common/MonoEncoding.cs
mcs/class/I18N/Common/MonoSafeEncoding.cs [new file with mode: 0644]

index bffb15ba77b8fabf9d62be16beb0e7759a468402..bb2696546540cce762200d625adf13a0c7369f9f 100644 (file)
@@ -58,6 +58,11 @@ using System;
 using System.Text;
 using I18N.Common;
 
+#if DISABLE_UNSAFE
+using MonoEncoder = I18N.Common.MonoSafeEncoder;
+using MonoEncoding = I18N.Common.MonoSafeEncoding;
+#endif
+
 [Serializable]
 public class CP51932 : MonoEncoding
 {
@@ -69,12 +74,7 @@ public class CP51932 : MonoEncoding
        {
        }
 
-
-       public override int GetByteCount (char [] chars, int index, int length)
-       {
-               return new CP51932Encoder (this).GetByteCount (chars, index, length, true);
-       }
-
+#if !DISABLE_UNSAFE
        public unsafe override int GetByteCountImpl (char* chars, int count)
        {
                return new CP51932Encoder (this).GetByteCountImpl (chars, count, true);
@@ -84,6 +84,17 @@ public class CP51932 : MonoEncoding
        {
                return new CP51932Encoder (this).GetBytesImpl (chars, charCount, bytes, byteCount, true);
        }
+#else
+       public override int GetByteCount (char [] chars, int index, int length)
+       {
+               return new CP51932Encoder (this).GetByteCount (chars, index, length, true);
+       }
+
+       public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
+       {
+               return new CP51932Encoder (this).GetBytes (chars, charIndex, charCount, bytes, byteIndex, true);
+       }
+#endif
 
        public override int GetCharCount (byte [] bytes, int index, int count)
        {
@@ -196,6 +207,7 @@ public class CP51932Encoder : MonoEncoder
        {
        }
 
+#if !DISABLE_UNSAFE
        // Get the number of bytes needed to encode a character buffer.
        public unsafe override int GetByteCountImpl (
                char* chars, int count, bool refresh)
@@ -302,7 +314,7 @@ public class CP51932Encoder : MonoEncoder
 #if NET_2_0
                                HandleFallback (
                                        chars, ref charIndex, ref charCount,
-                                       bytes, ref posn, ref byteCount);
+                                       bytes, ref posn, ref byteCount, null);
 #else
                                bytes [posn++] = (byte) '?';
 #endif
@@ -329,6 +341,167 @@ public class CP51932Encoder : MonoEncoder
                // Return the final length to the caller.
                return posn - byteIndex;
        }
+#else
+       // Get the number of bytes needed to encode a character buffer.
+       public override int GetByteCount(char[] chars, int index, int count, bool flush)
+       {
+               // Determine the length of the final output.
+               int length = 0;
+               int ch, value;
+               byte[] cjkToJis = JISConvert.Convert.cjkToJis;
+               byte[] extraToJis = JISConvert.Convert.extraToJis;
+
+               while (count > 0)
+               {
+                       ch = chars[index++];
+                       --count;
+                       ++length;
+                       if (ch < 0x0080)
+                       {
+                               // Character maps to itself.
+                               continue;
+                       }
+                       else if (ch < 0x0100)
+                       {
+                               // Check for special Latin 1 characters that
+                               // can be mapped to double-byte code points.
+                               if (ch == 0x00A2 || ch == 0x00A3 || ch == 0x00A7 ||
+                                  ch == 0x00A8 || ch == 0x00AC || ch == 0x00B0 ||
+                                  ch == 0x00B1 || ch == 0x00B4 || ch == 0x00B6 ||
+                                  ch == 0x00D7 || ch == 0x00F7)
+                               {
+                                       ++length;
+                               }
+                       }
+                       else if (ch >= 0x0391 && ch <= 0x0451)
+                       {
+                               // Greek subset characters.
+                               ++length;
+                       }
+                       else if (ch >= 0x2010 && ch <= 0x9FA5)
+                       {
+                               // This range contains the bulk of the CJK set.
+                               value = (ch - 0x2010) * 2;
+                               value = ((int)(cjkToJis[value])) | (((int)(cjkToJis[value + 1])) << 8);
+                               if (value >= 0x0100)
+                                       ++length;
+                       }
+                       else if (ch >= 0xFF01 && ch < 0xFF60)
+                       {
+                               // This range contains extra characters.
+                               value = (ch - 0xFF01) * 2;
+                               value = ((int)(extraToJis[value])) |
+                                               (((int)(extraToJis[value + 1])) << 8);
+                               if (value >= 0x0100)
+                                       ++length;
+                       }
+                       else if (ch >= 0xFF60 && ch <= 0xFFA0)
+                       {
+                               ++length; // half-width kana
+                       }
+               }
+
+               // Return the length to the caller.
+               return length;
+       }
+
+       // Get the bytes that result from encoding a character buffer.
+       public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex, bool flush)
+       {
+               // Convert the characters into their byte form.
+               int posn = byteIndex;
+               int byteLength = bytes.Length;
+               int byteCount = bytes.Length;
+               int ch, value;
+
+               byte[] cjkToJis = JISConvert.Convert.cjkToJis;
+               byte[] greekToJis = JISConvert.Convert.greekToJis;
+               byte[] extraToJis = JISConvert.Convert.extraToJis;
+
+               for (; charCount > 0; charIndex++, --charCount)
+               {
+                       ch = chars[charIndex];
+                       if (posn >= byteLength)
+                       {
+                               throw new ArgumentException(Strings.GetString("Arg_InsufficientSpace"), "bytes");
+                       }
+
+                       if (ch < 0x0080)
+                       {
+                               // Character maps to itself.
+                               bytes[posn++] = (byte)ch;
+                               continue;
+                       }
+                       else if (ch >= 0x0391 && ch <= 0x0451)
+                       {
+                               // Greek subset characters.
+                               value = (ch - 0x0391) * 2;
+                               value = ((int)(greekToJis[value])) |
+                                               (((int)(greekToJis[value + 1])) << 8);
+                       }
+                       else if (ch >= 0x2010 && ch <= 0x9FA5)
+                       {
+                               // This range contains the bulk of the CJK set.
+                               value = (ch - 0x2010) * 2;
+                               value = ((int)(cjkToJis[value])) |
+                                               (((int)(cjkToJis[value + 1])) << 8);
+                       }
+                       else if (ch >= 0xFF01 && ch <= 0xFF60)
+                       {
+                               // This range contains extra characters,
+                               // including half-width katakana.
+                               value = (ch - 0xFF01) * 2;
+                               value = ((int)(extraToJis[value])) |
+                                               (((int)(extraToJis[value + 1])) << 8);
+                       }
+                       else if (ch >= 0xFF60 && ch <= 0xFFA0)
+                       {
+                               value = ch - 0xFF60 + 0x8EA0;
+                       }
+                       else
+                       {
+                               // Invalid character.
+                               value = 0;
+                       }
+
+                       if (value == 0)
+                       {
+#if NET_2_0
+                               HandleFallback (chars, ref charIndex, ref charCount,
+                                       bytes, ref posn, ref byteCount, null);
+#else
+                               bytes [posn++] = (byte) '?';
+#endif
+                       }
+                       else if (value < 0x0100)
+                       {
+                               bytes[posn++] = (byte)value;
+                       }
+                       else if ((posn + 1) >= byteLength)
+                       {
+                               throw new ArgumentException(Strings.GetString("Arg_InsufficientSpace"), "bytes");
+                       }
+                       else if (value < 0x8000)
+                       {
+                               // general 2byte glyph/kanji
+                               value -= 0x0100;
+                               bytes[posn++] = (byte)(value / 0x5E + 0xA1);
+                               bytes[posn++] = (byte)(value % 0x5E + 0xA1);
+                               //Console.WriteLine ("{0:X04}", ch);
+                               continue;
+                       }
+                       else
+                       {
+                               // half-width kana
+                               bytes[posn++] = 0x8E;
+                               bytes[posn++] = (byte)(value - 0x8E00);
+                       }
+               }
+
+               // Return the final length to the caller.
+               return posn - byteIndex;
+       }
+#endif
 } // CP51932Encoder
 
 internal class CP51932Decoder : DbcsEncoding.DbcsDecoder
index 7d3f2ce1a02ea2a3f7fdd30d4296f7337ecc68ea..7a388af9232ceca57174421005ce2a1fb438c4da 100644 (file)
@@ -33,8 +33,13 @@ namespace I18N.CJK
        using System.Text;
        using I18N.Common;
 
+#if DISABLE_UNSAFE
+       using MonoEncoder = I18N.Common.MonoSafeEncoder;
+       using MonoEncoding = I18N.Common.MonoSafeEncoding;
+#endif
+
        [Serializable]
-       public unsafe class CP932 : MonoEncoding
+       public class CP932 : MonoEncoding
        {
                // Magic number used by Windows for the Shift-JIS code page.
                private const int SHIFTJIS_CODE_PAGE = 932;
@@ -44,6 +49,7 @@ namespace I18N.CJK
                {
                }
 
+#if !DISABLE_UNSAFE
                // Get the number of bytes needed to encode a character buffer.
                public unsafe override int GetByteCountImpl (char* chars, int count)
                {
@@ -130,6 +136,7 @@ namespace I18N.CJK
 
                        // Convert the characters into their byte form.
                        int posn = byteIndex;
+                       int end = charCount;
                        int byteLength = byteCount;
                        int ch, value;
 #if __PNET__
@@ -141,10 +148,9 @@ namespace I18N.CJK
                        byte[] greekToJis = JISConvert.Convert.greekToJis;
                        byte[] extraToJis = JISConvert.Convert.extraToJis;
 #endif
-                       while(charCount > 0)
+                       for (int i = charIndex; i < end; i++, charCount--)
                        {
-                               ch = chars[charIndex++];
-                               --charCount;
+                               ch = chars[i];
                                if(posn >= byteLength)
                                {
                                        throw new ArgumentException
@@ -240,7 +246,7 @@ namespace I18N.CJK
 #if NET_2_0
                                                HandleFallback (ref buffer,
                                                        chars, ref charIndex, ref charCount,
-                                                       bytes, ref posn, ref byteCount);
+                                                       bytes, ref posn, ref byteCount, null);
 #else
                                                // Invalid character.
                                                bytes[posn++] = (byte)'?';
@@ -292,7 +298,7 @@ namespace I18N.CJK
 #if NET_2_0
                                        HandleFallback (ref buffer,
                                                chars, ref charIndex, ref charCount,
-                                               bytes, ref posn, ref byteCount);
+                                               bytes, ref posn, ref byteCount, null);
 #else
                                        bytes[posn++] = (byte)'?';
 #endif
@@ -345,6 +351,298 @@ namespace I18N.CJK
                        // Return the final length to the caller.
                        return posn - byteIndex;
                }
+#else
+               // Get the number of bytes needed to encode a character buffer.
+               public override int GetByteCount(char[] chars, int index, int count)
+               {
+                       // Determine the length of the final output.
+                       int length = 0;
+                       int ch, value;
+                       byte[] cjkToJis = JISConvert.Convert.cjkToJis;
+                       byte[] extraToJis = JISConvert.Convert.extraToJis;
+
+                       while (count > 0)
+                       {
+                               ch = chars[index++];
+                               --count;
+                               ++length;
+                               if (ch < 0x0080)
+                               {
+                                       // Character maps to itself.
+                                       continue;
+                               }
+                               else if (ch < 0x0100)
+                               {
+                                       // Check for special Latin 1 characters that
+                                       // can be mapped to double-byte code points.
+                                       if (ch == 0x00A2 || ch == 0x00A3 || ch == 0x00A7 ||
+                                          ch == 0x00A8 || ch == 0x00AC || ch == 0x00B0 ||
+                                          ch == 0x00B1 || ch == 0x00B4 || ch == 0x00B6 ||
+                                          ch == 0x00D7 || ch == 0x00F7)
+                                       {
+                                               ++length;
+                                       }
+                               }
+                               else if (ch >= 0x0391 && ch <= 0x0451)
+                               {
+                                       // Greek subset characters.
+                                       ++length;
+                               }
+                               else if (ch >= 0x2010 && ch <= 0x9FA5)
+                               {
+                                       // This range contains the bulk of the CJK set.
+                                       value = (ch - 0x2010) * 2;
+                                       value = ((int)(cjkToJis[value])) |
+                                                       (((int)(cjkToJis[value + 1])) << 8);
+                                       if (value >= 0x0100)
+                                       {
+                                               ++length;
+                                       }
+                               }
+                               else if (ch >= 0xE000 && ch <= 0xE757)
+                                       // PrivateUse
+                                       ++length;
+                               else if (ch >= 0xFF01 && ch <= 0xFFEF)
+                               {
+                                       // This range contains extra characters,
+                                       // including half-width katakana.
+                                       value = (ch - 0xFF01) * 2;
+                                       value = ((int)(extraToJis[value])) |
+                                                       (((int)(extraToJis[value + 1])) << 8);
+                                       if (value >= 0x0100)
+                                       {
+                                               ++length;
+                                       }
+                               }
+                       }
+
+                       // Return the length to the caller.
+                       return length;
+               }
+
+               // Get the bytes that result from encoding a character buffer.
+               public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
+               {
+                       int byteCount = bytes.Length;
+#if NET_2_0
+                       EncoderFallbackBuffer buffer = null;
+#endif
+
+                       // Convert the characters into their byte form.
+                       int posn = byteIndex;
+                       int end = charIndex + charCount;
+                       int byteLength = byteCount;
+                       int /*ch,*/ value;
+                       byte[] cjkToJis = JISConvert.Convert.cjkToJis;
+                       byte[] greekToJis = JISConvert.Convert.greekToJis;
+                       byte[] extraToJis = JISConvert.Convert.extraToJis;
+
+                       //while (charCount > 0)
+                       for (int i = charIndex; i < end; i++, charCount--)
+                       {
+                               //ch = chars[charIndex++];
+                               //--charCount;
+                               int ch = chars[i];
+
+                               if (posn >= byteLength)
+                               {
+                                       throw new ArgumentException
+                                               (Strings.GetString("Arg_InsufficientSpace"),
+                                                "bytes");
+                               }
+                               if (ch < 0x0080)
+                               {
+                                       // Character maps to itself.
+                                       bytes[posn++] = (byte)ch;
+                                       continue;
+                               }
+                               else if (ch < 0x0100)
+                               {
+                                       // Check for special Latin 1 characters that
+                                       // can be mapped to double-byte code points.
+                                       if (ch == 0x00A2 || ch == 0x00A3 || ch == 0x00A7 ||
+                                          ch == 0x00A8 || ch == 0x00AC || ch == 0x00B0 ||
+                                          ch == 0x00B1 || ch == 0x00B4 || ch == 0x00B6 ||
+                                          ch == 0x00D7 || ch == 0x00F7)
+                                       {
+                                               if ((posn + 1) >= byteLength)
+                                               {
+                                                       throw new ArgumentException
+                                                               (Strings.GetString
+                                                                       ("Arg_InsufficientSpace"), "bytes");
+                                               }
+                                               switch (ch)
+                                               {
+                                                       case 0x00A2:
+                                                               bytes[posn++] = (byte)0x81;
+                                                               bytes[posn++] = (byte)0x91;
+                                                               break;
+
+                                                       case 0x00A3:
+                                                               bytes[posn++] = (byte)0x81;
+                                                               bytes[posn++] = (byte)0x92;
+                                                               break;
+
+                                                       case 0x00A7:
+                                                               bytes[posn++] = (byte)0x81;
+                                                               bytes[posn++] = (byte)0x98;
+                                                               break;
+
+                                                       case 0x00A8:
+                                                               bytes[posn++] = (byte)0x81;
+                                                               bytes[posn++] = (byte)0x4E;
+                                                               break;
+
+                                                       case 0x00AC:
+                                                               bytes[posn++] = (byte)0x81;
+                                                               bytes[posn++] = (byte)0xCA;
+                                                               break;
+
+                                                       case 0x00B0:
+                                                               bytes[posn++] = (byte)0x81;
+                                                               bytes[posn++] = (byte)0x8B;
+                                                               break;
+
+                                                       case 0x00B1:
+                                                               bytes[posn++] = (byte)0x81;
+                                                               bytes[posn++] = (byte)0x7D;
+                                                               break;
+
+                                                       case 0x00B4:
+                                                               bytes[posn++] = (byte)0x81;
+                                                               bytes[posn++] = (byte)0x4C;
+                                                               break;
+
+                                                       case 0x00B6:
+                                                               bytes[posn++] = (byte)0x81;
+                                                               bytes[posn++] = (byte)0xF7;
+                                                               break;
+
+                                                       case 0x00D7:
+                                                               bytes[posn++] = (byte)0x81;
+                                                               bytes[posn++] = (byte)0x7E;
+                                                               break;
+
+                                                       case 0x00F7:
+                                                               bytes[posn++] = (byte)0x81;
+                                                               bytes[posn++] = (byte)0x80;
+                                                               break;
+                                               }
+                                       }
+                                       else if (ch == 0x00A5)
+                                       {
+                                               // Yen sign.
+                                               bytes[posn++] = (byte)0x5C;
+                                       }
+                                       else
+                                       {
+#if NET_2_0
+                                               HandleFallback (ref buffer, chars, ref i, ref charCount, bytes, 
+                                                       ref byteIndex, ref byteCount, null);
+#else
+                                               // Invalid character.
+                                               bytes[posn++] = (byte)'?';
+#endif
+                                       }
+                                       continue;
+                               }
+                               else if (ch >= 0x0391 && ch <= 0x0451)
+                               {
+                                       // Greek subset characters.
+                                       value = (ch - 0x0391) * 2;
+                                       value = ((int)(greekToJis[value])) |
+                                                       (((int)(greekToJis[value + 1])) << 8);
+                               }
+                               else if (ch >= 0x2010 && ch <= 0x9FA5)
+                               {
+                                       // This range contains the bulk of the CJK set.
+                                       value = (ch - 0x2010) * 2;
+                                       value = ((int)(cjkToJis[value])) |
+                                                       (((int)(cjkToJis[value + 1])) << 8);
+                               }
+                               else if (ch >= 0xE000 && ch <= 0xE757)
+                               {
+                                       // PrivateUse
+                                       int diff = ch - 0xE000;
+                                       value = ((int)(diff / 0xBC) << 8)
+                                               + (diff % 0xBC)
+                                               + 0xF040;
+                                       if (value % 0x100 >= 0x7F)
+                                               value++;
+                               }
+                               else if (ch >= 0xFF01 && ch <= 0xFF60)
+                               {
+                                       value = (ch - 0xFF01) * 2;
+                                       value = ((int)(extraToJis[value])) |
+                                                       (((int)(extraToJis[value + 1])) << 8);
+                               }
+                               else if (ch >= 0xFF60 && ch <= 0xFFA0)
+                               {
+                                       value = ch - 0xFF60 + 0xA0;
+                               }
+                               else
+                               {
+                                       // Invalid character.
+                                       value = 0;
+                               }
+                               if (value == 0)
+                               {
+#if NET_2_0
+                                       HandleFallback (ref buffer, chars, ref charIndex, ref charCount,
+                                               bytes, ref posn, ref byteCount, null);
+#else
+                                       bytes[posn++] = (byte)'?';
+#endif
+                               }
+                               else if (value < 0x0100)
+                               {
+                                       bytes[posn++] = (byte)value;
+                               }
+                               else if ((posn + 1) >= byteLength)
+                               {
+                                       throw new ArgumentException
+                                               (Strings.GetString("Arg_InsufficientSpace"),
+                                                "bytes");
+                               }
+                               else if (value < 0x8000)
+                               {
+                                       // JIS X 0208 character.
+                                       value -= 0x0100;
+                                       ch = (value / 0xBC);
+                                       value = (value % 0xBC) + 0x40;
+                                       if (value >= 0x7F)
+                                       {
+                                               ++value;
+                                       }
+                                       if (ch < (0x9F - 0x80))
+                                       {
+                                               bytes[posn++] = (byte)(ch + 0x81);
+                                       }
+                                       else
+                                       {
+                                               bytes[posn++] = (byte)(ch - (0x9F - 0x80) + 0xE0);
+                                       }
+                                       bytes[posn++] = (byte)value;
+                               }
+                               else if (value >= 0xF040 && value <= 0xF9FC)
+                               {
+                                       // PrivateUse
+                                       bytes[posn++] = (byte)(value / 0x100);
+                                       bytes[posn++] = (byte)(value % 0x100);
+                               }
+                               else
+                               {
+                                       // JIS X 0212 character, which Shift-JIS doesn't
+                                       // support, but we've already allocated two slots.
+                                       bytes[posn++] = (byte)'?';
+                                       bytes[posn++] = (byte)'?';
+                               }
+                       }
+
+                       // Return the final length to the caller.
+                       return posn - byteIndex;
+               }
+#endif
 
                public override int GetCharCount (byte [] bytes, int index, int count)
                {
index 7b83448939c0ad9f93fb1f77339d243c53be0d81..574366c72cd32e1e319fd643b4634725bb88f81e 100644 (file)
@@ -28,6 +28,7 @@ namespace I18N.CJK
                        return DbcsConvert.Gb2312;
                }
 
+#if !DISABLE_UNSAFE
                // Get the bytes that result from encoding a character buffer.
                public unsafe override int GetByteCountImpl (
                        char* chars, int count)
@@ -80,7 +81,7 @@ namespace I18N.CJK
 #if NET_2_0
                                        HandleFallback (ref buffer, chars,
                                                ref charIndex, ref charCount,
-                                               bytes, ref byteIndex, ref byteCount);
+                                               bytes, ref byteIndex, ref byteCount, null);
 #else
                                        bytes[byteIndex++] = (byte)'?';
 #endif
@@ -91,7 +92,75 @@ namespace I18N.CJK
                        }
                        return byteIndex - origIndex;
                }
-               
+#else
+               // Get the bytes that result from encoding a character buffer.
+               public override int GetByteCount(char[] chars, int index, int count)
+               {
+                       DbcsConvert gb2312 = GetConvert();
+                       int length = 0;
+                       while (count-- > 0)
+                       {
+                               char c = chars[index++];
+                               if (c <= 0x80 || c == 0xFF)
+                               { // ASCII
+                                       length++;
+                                       continue;
+                               }
+                               byte b1 = gb2312.u2n[((int)c) * 2 + 1];
+                               byte b2 = gb2312.u2n[((int)c) * 2];
+                               if (b1 == 0 && b2 == 0)
+                               {
+#if NET_2_0
+                                       // FIXME: handle fallback for GetByteCount().
+#else
+                                       length++;
+#endif
+                               }
+                               else
+                                       length += 2;
+                       }
+                       return length;
+               }
+
+               // Get the bytes that result from encoding a character buffer.
+               public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
+               {
+                       int byteCount = bytes.Length;
+
+                       DbcsConvert gb2312 = GetConvert();
+#if NET_2_0
+                       EncoderFallbackBuffer buffer = null;
+#endif
+
+                       int origIndex = byteIndex;
+                       while (charCount-- > 0)
+                       {
+                               char c = chars[charIndex++];
+                               if (c <= 0x80 || c == 0xFF)
+                               { // ASCII
+                                       bytes[byteIndex++] = (byte)c;
+                                       continue;
+                               }
+                               byte b1 = gb2312.u2n[((int)c) * 2 + 1];
+                               byte b2 = gb2312.u2n[((int)c) * 2];
+                               if (b1 == 0 && b2 == 0)
+                               {
+#if NET_2_0
+                                       HandleFallback (ref buffer, chars, ref charIndex, ref charCount,
+                                               bytes, ref byteIndex, ref byteCount, null);
+#else
+                                       bytes[byteIndex++] = (byte)'?';
+#endif
+                               }
+                               else
+                               {
+                                       bytes[byteIndex++] = b1;
+                                       bytes[byteIndex++] = b2;
+                               }
+                       }
+                       return byteIndex - origIndex;
+               }
+#endif
                // Get the characters that result from decoding a byte buffer.
                public override int GetCharCount (byte [] bytes, int index, int count)
                {
index bda38c9a7fe0c1aa7e1047aa1f5312b428d1ee47..35f85dc7ccb760f88cdae7dd77080584d92c22a5 100644 (file)
@@ -117,6 +117,7 @@ namespace I18N.CJK
 
         bool useUHC;
 
+#if !DISABLE_UNSAFE
         // Get the bytes that result from encoding a character buffer.
         public unsafe override int GetByteCountImpl (char* chars, int count)
         {
@@ -171,7 +172,7 @@ namespace I18N.CJK
                 if (b1 == 0 && b2 == 0) {
 #if NET_2_0
                     HandleFallback (ref buffer, chars, ref charIndex, ref charCount,
-                        bytes, ref byteIndex, ref byteCount);
+                        bytes, ref byteIndex, ref byteCount, null);
 #else
                     bytes[byteIndex++] = (byte)'?';
 #endif
@@ -182,8 +183,80 @@ namespace I18N.CJK
             }
             return byteIndex - origIndex;
         }
+#else
+               // Get the bytes that result from encoding a character buffer.
+               public override int GetByteCount(char[] chars, int index, int count)
+               {
+                       int length = 0;
+                       DbcsConvert convert = GetConvert();
+
+                       // 00 00 - FF FF
+                       while (count-- > 0)
+                       {
+                               char c = chars[index++];
+                               if (c <= 0x80 || c == 0xFF)
+                               { // ASCII
+                                       length++;
+                                       continue;
+                               }
+                               byte b1 = convert.u2n[((int)c) * 2];
+                               byte b2 = convert.u2n[((int)c) * 2 + 1];
+                               if (b1 == 0 && b2 == 0)
+                               {
+#if NET_2_0
+                                       // FIXME: handle fallback for GetByteCountImpl().
+                                       length++;
+#else
+                    length++;
+#endif
+                               }
+                               else
+                                       length += 2;
+                       }
+                       return length;
+               }
+
+               // Get the bytes that result from encoding a character buffer.
+               public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
+               {
+                       int byteCount = bytes.Length;
+
+                       DbcsConvert convert = GetConvert();
+#if NET_2_0
+                       EncoderFallbackBuffer buffer = null;
+#endif
 
-        // Get the characters that result from decoding a byte buffer.
+                       // 00 00 - FF FF
+                       int origIndex = byteIndex;
+                       while (charCount-- > 0)
+                       {
+                               char c = chars[charIndex++];
+                               if (c <= 0x80 || c == 0xFF)
+                               { // ASCII
+                                       bytes[byteIndex++] = (byte)c;
+                                       continue;
+                               }
+                               byte b1 = convert.u2n[((int)c) * 2];
+                               byte b2 = convert.u2n[((int)c) * 2 + 1];
+                               if (b1 == 0 && b2 == 0)
+                               {
+#if NET_2_0
+                                       HandleFallback (ref buffer, chars, ref charIndex, ref charCount,
+                                               bytes, ref byteIndex, ref byteCount, null);
+#else
+                    bytes[byteIndex++] = (byte)'?';
+#endif
+                               }
+                               else
+                               {
+                                       bytes[byteIndex++] = b1;
+                                       bytes[byteIndex++] = b2;
+                               }
+                       }
+                       return byteIndex - origIndex;
+               }
+#endif
+               // Get the characters that result from decoding a byte buffer.
         public override int GetCharCount (byte[] bytes, int index, int count)
         {
             return GetDecoder ().GetCharCount (bytes, index, count);
index acb1aeefa12dbb77bd1437cf6512b5b1e1606d08..3bbf211c2616325ad307c2727b4076c8eb0d5df4 100644 (file)
@@ -27,6 +27,7 @@ namespace I18N.CJK
                        return DbcsConvert.Big5;
                }
 
+#if !DISABLE_UNSAFE
                // Get the bytes that result from encoding a character buffer.
                public unsafe override int GetByteCountImpl (char* chars, int count)
                {
@@ -80,7 +81,7 @@ namespace I18N.CJK
 #if NET_2_0
                                        HandleFallback (ref buffer, chars,
                                                ref charIndex, ref charCount,
-                                               bytes, ref byteIndex, ref byteCount);
+                                               bytes, ref byteIndex, ref byteCount, null);
 #else
                                        bytes[byteIndex++] = (byte)'?';
 #endif
@@ -91,7 +92,77 @@ namespace I18N.CJK
                        }
                        return byteIndex - origIndex;
                }
-               
+#else
+               // Get the bytes that result from encoding a character buffer.
+               public override int GetByteCount(char[] chars, int index, int count)
+               {
+                       DbcsConvert convert = GetConvert();
+                       int length = 0;
+
+                       while (count-- > 0)
+                       {
+                               char c = chars[index++];
+                               if (c <= 0x80 || c == 0xFF)
+                               { // ASCII
+                                       length++;
+                                       continue;
+                               }
+                               byte b1 = convert.u2n[((int)c) * 2 + 1];
+                               byte b2 = convert.u2n[((int)c) * 2];
+                               if (b1 == 0 && b2 == 0)
+                               {
+#if NET_2_0
+                                       // FIXME: handle fallback for GetByteCountImpl().
+                                       length++;
+#else
+                                       length++;
+#endif
+                               }
+                               else
+                                       length += 2;
+                       }
+                       return length;
+               }
+
+               // Get the bytes that result from encoding a character buffer.
+               public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
+               {
+                       int byteCount = bytes.Length;
+
+                       DbcsConvert convert = GetConvert();
+#if NET_2_0
+                       EncoderFallbackBuffer buffer = null;
+#endif
+
+                       int origIndex = byteIndex;
+                       while (charCount-- > 0)
+                       {
+                               char c = chars[charIndex++];
+                               if (c <= 0x80 || c == 0xFF)
+                               { // ASCII
+                                       bytes[byteIndex++] = (byte)c;
+                                       continue;
+                               }
+                               byte b1 = convert.u2n[((int)c) * 2 + 1];
+                               byte b2 = convert.u2n[((int)c) * 2];
+                               if (b1 == 0 && b2 == 0)
+                               {
+#if NET_2_0
+                                       HandleFallback (ref buffer, chars, ref charIndex, ref charCount,
+                                               bytes, ref byteIndex, ref byteCount, null);
+#else
+                                       bytes[byteIndex++] = (byte)'?';
+#endif
+                               }
+                               else
+                               {
+                                       bytes[byteIndex++] = b1;
+                                       bytes[byteIndex++] = b2;
+                               }
+                       }
+                       return byteIndex - origIndex;
+               }
+#endif
                // Get the characters that result from decoding a byte buffer.
                public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
                                             char[] chars, int charIndex)
index 5768ee43171e67f3e4b8343982476a3344ec5d2e..9c644cddf694878404cefe33d332dfbf5ed7c4c1 100644 (file)
@@ -9,6 +9,11 @@ using System;
 using System.Text;
 using I18N.Common;
 
+#if DISABLE_UNSAFE
+using MonoEncoder = I18N.Common.MonoSafeEncoder;
+using MonoEncoding = I18N.Common.MonoSafeEncoding;
+#endif
+
 namespace I18N.CJK
 {
        [Serializable]
index 2cb1ff121521b24c5c1e4163248f4aa60f1c740f..957b72c63e22c5d27867c0eeae93495bd5af72a5 100644 (file)
@@ -9,6 +9,11 @@ using System.Reflection;
 using System.Text;
 using I18N.Common;
 
+#if DISABLE_UNSAFE
+using MonoEncoder = I18N.Common.MonoSafeEncoder;
+using MonoEncoding = I18N.Common.MonoSafeEncoding;
+#endif
+
 namespace I18N.CJK
 {
        [Serializable]
@@ -72,11 +77,7 @@ namespace I18N.CJK
                        return len;
                }
 
-               public override int GetByteCount (char [] chars, int index, int length)
-               {
-                       return new GB18030Encoder (this).GetByteCount (chars, index, length, true);
-               }
-
+#if !DISABLE_UNSAFE
                public unsafe override int GetByteCountImpl (char* chars, int count)
                {
                        return new GB18030Encoder (this).GetByteCountImpl (chars, count, true);
@@ -86,6 +87,17 @@ namespace I18N.CJK
                {
                        return new GB18030Encoder (this).GetBytesImpl (chars, charCount, bytes, byteCount, true);
                }
+#else
+               public override int GetByteCount (char [] chars, int index, int length)
+               {
+                       return new GB18030Encoder (this).GetByteCount (chars, index, length, true);
+               }
+
+               public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
+               {
+                       return new GB18030Encoder (this).GetBytes (chars, charIndex, charCount, bytes, byteIndex, true);
+               }
+#endif
 
                public override int GetCharCount (byte [] bytes, int start, int len)
                {
@@ -286,6 +298,7 @@ namespace I18N.CJK
                char incomplete_byte_count;
                char incomplete_bytes;
 
+#if !DISABLE_UNSAFE
                public unsafe override int GetByteCountImpl (char* chars, int count, bool refresh)
                {
                        int start = 0;
@@ -374,7 +387,7 @@ namespace I18N.CJK
 #if NET_2_0
                                                HandleFallback (
                                                        chars, ref charIndex, ref charCount,
-                                                       bytes, ref byteIndex, ref byteCount);
+                                                       bytes, ref byteIndex, ref byteCount, null);
 #else
                                                bytes [byteIndex++] = (byte) '?';
 #endif
@@ -419,5 +432,158 @@ namespace I18N.CJK
 
                        return byteIndex - byteStart;
                }
+#else
+
+               public override int GetByteCount(char[] chars, int index, int count, bool refresh)
+               {
+                       int start = 0;
+                       int end = count;
+                       int ret = 0;
+                       while (start < end)
+                       {
+                               char ch = chars[start];
+                               if (ch < 0x80)
+                               {
+                                       // ASCII
+                                       ret++;
+                                       start++;
+                                       continue;
+                               }
+                               else if (Char.IsSurrogate(ch))
+                               {
+                                       // Surrogate
+                                       if (start + 1 == end)
+                                       {
+                                               incomplete_byte_count = ch;
+                                               start++;
+                                       }
+                                       else
+                                       {
+                                               ret += 4;
+                                               start += 2;
+                                       }
+                                       continue;
+                               }
+
+                               if (ch < 0x80 || ch == 0xFF)
+                               {
+                                       // ASCII
+                                       ret++;
+                                       start++;
+                                       continue;
+                               }
+
+                               byte b1 = gb2312.u2n[((int)ch) * 2 + 1];
+                               byte b2 = gb2312.u2n[((int)ch) * 2];
+                               if (b1 != 0 && b2 != 0)
+                               {
+                                       // GB2312
+                                       ret += 2;
+                                       start++;
+                                       continue;
+                               }
+
+                               // non-GB2312
+                               long value = GB18030Source.FromUCS(ch);
+                               if (value < 0)
+                                       ret++; // invalid(?)
+                               else
+                                       ret += 4;
+                               start++;
+                       }
+
+                       if (refresh)
+                       {
+                               if (incomplete_byte_count != char.MinValue)
+                                       ret++;
+                               incomplete_byte_count = char.MinValue;
+                       }
+                       return ret;
+               }
+
+               public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex, bool refresh)
+               {
+                       int byteCount = bytes.Length;
+                       int charEnd = charIndex + charCount;
+                       int byteStart = byteIndex;
+                       char ch = incomplete_bytes;
+
+                       while (charIndex < charEnd)
+                       {
+                               if (incomplete_bytes == char.MinValue)
+                                       ch = chars[charIndex++];
+                               else
+                                       incomplete_bytes = char.MinValue;
+
+                               if (ch < 0x80)
+                               {
+                                       // ASCII
+                                       bytes[byteIndex++] = (byte)ch;
+                                       continue;
+                               }
+                               else if (Char.IsSurrogate(ch))
+                               {
+                                       // Surrogate
+                                       if (charIndex == charEnd)
+                                       {
+                                               incomplete_bytes = ch;
+                                               break; // incomplete
+                                       }
+                                       char ch2 = chars[charIndex++];
+                                       if (!Char.IsSurrogate(ch2))
+                                       {
+                                               // invalid surrogate
+#if NET_2_0
+                                               HandleFallback (chars, ref charIndex, ref charCount,
+                                                       bytes, ref byteIndex, ref byteCount, null);
+#else
+                                               bytes [byteIndex++] = (byte) '?';
+#endif
+                                               continue;
+                                       }
+                                       int cp = (ch - 0xD800) * 0x400 + ch2 - 0xDC00;
+                                       GB18030Source.Unlinear(bytes,  byteIndex, GB18030Source.FromUCSSurrogate(cp));
+                                       byteIndex += 4;
+                                       continue;
+                               }
+
+
+                               if (ch <= 0x80 || ch == 0xFF)
+                               {
+                                       // Character maps to itself
+                                       bytes[byteIndex++] = (byte)ch;
+                                       continue;
+                               }
+
+                               byte b1 = gb2312.u2n[((int)ch) * 2 + 1];
+                               byte b2 = gb2312.u2n[((int)ch) * 2];
+                               if (b1 != 0 && b2 != 0)
+                               {
+                                       bytes[byteIndex++] = b1;
+                                       bytes[byteIndex++] = b2;
+                                       continue;
+                               }
+
+                               long value = GB18030Source.FromUCS(ch);
+                               if (value < 0)
+                                       bytes[byteIndex++] = 0x3F; // invalid(?)
+                               else
+                               {
+                                       // non-GB2312
+                                       GB18030Source.Unlinear(bytes, byteIndex, value);
+                                       byteIndex += 4;
+                               }
+                       }
+
+                       if (refresh)
+                       {
+                               if (incomplete_bytes != char.MinValue)
+                                       bytes[byteIndex++] = 0x3F; // incomplete
+                               incomplete_bytes = char.MinValue;
+                       }
+
+                       return byteIndex - byteStart;
+               }
+#endif
        }
 }
index d5dc313e88c8c7ec2399b85ac3bee340f168ebca..f9814f3336554d683a3898c125435ef28c11b1ef 100644 (file)
@@ -49,11 +49,30 @@ namespace I18N.CJK
                        MethodInfo mi = typeof (Assembly).GetMethod (
                                "GetManifestResourceInternal",
                                BindingFlags.NonPublic | BindingFlags.Instance);
+
                        int size = 0;
                        Module mod = null;
-                       IntPtr ret = (IntPtr) mi.Invoke (
-                               Assembly.GetExecutingAssembly (),
-                               new object [] {"gb18030.table", size, mod});
+                       IntPtr ret = IntPtr.Zero;
+
+                       if (mi != null)
+                       {
+                               ret = (IntPtr)mi.Invoke(
+                                Assembly.GetExecutingAssembly(),
+                                new object[] { "gb18030.table", size, mod });
+                       }
+                       else
+                       {
+                               // DotNet's way ;)
+                               using (var ms = Assembly.GetExecutingAssembly()
+                                       .GetManifestResourceStream("gb18030.table"))
+                               {
+                                       var data = new byte[ms.Length];
+                                       ms.Read(data, 0, data.Length);
+
+                                       fixed (byte* p = data) ret = (IntPtr)p;
+                               }
+                       }
+
                        if (ret != IntPtr.Zero) {
                                gbx2uni = (byte*) ((void*) ret);
                                gbx2uniSize =
index 35505664aa4d01aba1df9cb465bdff1781cd0ec2..675cd2a333fd3a3782c429145b71b743030a9977 100644 (file)
@@ -8,6 +8,11 @@ using System;
 using System.Text;
 using I18N.Common;
 
+#if DISABLE_UNSAFE
+using MonoEncoder = I18N.Common.MonoSafeEncoder;
+using MonoEncoding = I18N.Common.MonoSafeEncoding;
+#endif
+
 namespace I18N.CJK
 {
        [Serializable]
@@ -85,9 +90,13 @@ namespace I18N.CJK
                        return byteCount;
                }
 
-               public override int GetByteCount (char [] chars, int charIndex, int charCount)
+#if !DISABLE_UNSAFE
+               protected override unsafe int GetBytesInternal(char* chars, int charCount, byte* bytes, int byteCount, bool flush, object state)
                {
-                       return new ISO2022JPEncoder (this, allow_1byte_kana, allow_shift_io).GetByteCount (chars, charIndex, charCount, true);
+                       if (state != null)
+                               return ((ISO2022JPEncoder)state).GetBytesImpl (chars, charCount, bytes, byteCount, true);
+
+                       return new ISO2022JPEncoder (this, allow_1byte_kana, allow_shift_io).GetBytesImpl (chars, charCount, bytes, byteCount, true);
                }
 
                public unsafe override int GetByteCountImpl (char* chars, int count)
@@ -99,6 +108,25 @@ namespace I18N.CJK
                {
                        return new ISO2022JPEncoder (this, allow_1byte_kana, allow_shift_io).GetBytesImpl (chars, charCount, bytes, byteCount, true);
                }
+#else
+               protected override int GetBytesInternal(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex, bool flush, object state)
+               {
+                       if (state != null)
+                               return ((ISO2022JPEncoder)state).GetBytesInternal(chars, charIndex, charCount, bytes, byteIndex, true);
+
+                       return new ISO2022JPEncoder(this, allow_1byte_kana, allow_shift_io).GetBytesInternal(chars, charIndex, charCount, bytes, byteIndex, true);
+               }
+
+               public override int GetByteCount(char[] chars, int charIndex, int charCount)
+               {
+                       return new ISO2022JPEncoder(this, allow_1byte_kana, allow_shift_io).GetByteCount(chars, charIndex, charCount, true);
+               }
+
+               public override int  GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
+               {
+                       return new ISO2022JPEncoder (this, allow_1byte_kana, allow_shift_io).GetBytes(chars, charIndex, charCount, bytes, byteIndex, true);
+               }
+#endif
 
                public override int GetCharCount (byte [] bytes, int index, int count)
                {
@@ -126,96 +154,37 @@ namespace I18N.CJK
                ISO2022JPMode m = ISO2022JPMode.ASCII;
                bool shifted_in_count, shifted_in_conv;
 
-               public ISO2022JPEncoder (MonoEncoding owner, bool allow1ByteKana, bool allowShiftIO)
+               public ISO2022JPEncoder(MonoEncoding owner, bool allow1ByteKana, bool allowShiftIO)
                        : base (owner)
                {
                        this.allow_1byte_kana = allow1ByteKana;
                        this.allow_shift_io = allowShiftIO;
                }
 
+#if !DISABLE_UNSAFE
                public unsafe override int GetByteCountImpl (char* chars, int charCount, bool flush)
                {
-                       int charIndex = 0;
-                       int end = charCount;
-                       int value;
-                       int byteCount = 0;
-
-                       for (int i = charIndex; i < end; i++) {
-                               char ch = chars [i];
-                               // When half-kana is not allowed and it is
-                               // actually in the input, convert to full width
-                               // kana.
-                               if (!allow_1byte_kana &&
-                                       ch >= 0xFF60 && ch <= 0xFFA0)
-                                       ch = full_width_map [ch - 0xFF60];
-
-                               if (ch >= 0x2010 && ch <= 0x9FA5)
-                               {
-                                       if (shifted_in_count) {
-                                               shifted_in_count = false;
-                                               byteCount++; // shift_out
-                                       }
-                                       if (m != ISO2022JPMode.JISX0208)
-                                               byteCount += 3;
-                                       m = ISO2022JPMode.JISX0208;
-                                       // This range contains the bulk of the CJK set.
-                                       value = (ch - 0x2010) * 2;
-                                       value = ((int)(convert.cjkToJis[value])) |
-                                                       (((int)(convert.cjkToJis[value + 1])) << 8);
-                               } else if (ch >= 0xFF01 && ch <= 0xFF60) {
-                                       if (shifted_in_count) {
-                                               shifted_in_count = false;
-                                               byteCount++;
-                                       }
-                                       if (m != ISO2022JPMode.JISX0208)
-                                               byteCount += 3;
-                                       m = ISO2022JPMode.JISX0208;
+                       return GetBytesImpl(chars, charCount, null, 0, flush);
+               }
+#else
+               public override int GetByteCount(char[] chars, int charIndex, int charCount, bool flush)
+               {
+                       return GetBytesInternal (chars, charIndex, charCount, null, 0, true);
+               }
+#endif
 
-                                       // This range contains extra characters,
-                                       value = (ch - 0xFF01) * 2;
-                                       value = ((int)(convert.extraToJis[value])) |
-                                                       (((int)(convert.extraToJis[value + 1])) << 8);
-                               } else if(ch >= 0xFF60 && ch <= 0xFFA0) {
-                                       if (allow_shift_io) {
-                                               if (!shifted_in_count) {
-                                                       byteCount++;
-                                                       shifted_in_count = true;
-                                               }
-                                       }
-                                       else if (m != ISO2022JPMode.JISX0201) {
-                                               byteCount += 3;
-                                               m = ISO2022JPMode.JISX0201;
-                                       }
-                                       value = ch - 0xFF60 + 0xA0;
-                               } else if (ch < 128) {
-                                       if (shifted_in_count) {
-                                               shifted_in_count = false;
-                                               byteCount++;
-                                       }
-                                       if (m != ISO2022JPMode.ASCII)
-                                               byteCount += 3;
-                                       m = ISO2022JPMode.ASCII;
-                                       value = (int) ch;
-                               } else
-                                       // skip non-convertible character
-                                       continue;
+#if !DISABLE_UNSAFE
+               private unsafe bool IsShifted(byte *bytes)
+               {
+                       return bytes == null ? shifted_in_count : shifted_in_conv;
+               }
 
-                               if (value > 0x100)
-                                       byteCount += 2;
-                               else
-                                       byteCount++;
-                       }
-                       // must end in ASCII mode
-                       if (flush) {
-                               if (shifted_in_count) {
-                                       shifted_in_count = false;
-                                       byteCount++;
-                               }
-                               if (m != ISO2022JPMode.ASCII)
-                                       byteCount += 3;
-                               m = ISO2022JPMode.ASCII;
-                       }
-                       return byteCount;
+               private unsafe void SetShifted(byte *bytes, bool state)
+               {
+                       if (bytes == null)
+                               shifted_in_count = state;
+                       else
+                               shifted_in_conv = state;
                }
 
                // returns false if it failed to add required ESC.
@@ -225,8 +194,16 @@ namespace I18N.CJK
                        if (cur == next)
                                return;
 
+                       // If bytes == null we are just counting chars..
+                       if (bytes == null) {
+                               byteIndex += 3;
+                               cur = next;
+                               return;
+                       }
+
                        if (byteCount <= 3)
                                throw new ArgumentOutOfRangeException ("Insufficient byte buffer.");
+
                        bytes [byteIndex++] = 0x1B;
                        switch (next) {
                        case ISO2022JPMode.JISX0201:
@@ -244,6 +221,57 @@ namespace I18N.CJK
                        }
                        cur = next;
                }
+#else
+               private bool IsShifted(byte[] bytes)
+               {
+                       return bytes == null ? shifted_in_count : shifted_in_conv;
+               }
+
+               private void SetShifted(byte[] bytes, bool state)
+               {
+                       if (bytes == null)
+                               shifted_in_count = state;
+                       else
+                               shifted_in_conv = state;
+               }
+
+               private void SwitchMode(byte[] bytes, ref int byteIndex,
+                       ref int byteCount, ref ISO2022JPMode cur, ISO2022JPMode next)
+               {
+                       if (cur == next)
+                               return;
+
+                       // If bytes == null we are just counting chars..
+                       if (bytes == null)
+                       {
+                               byteIndex += 3;
+                               cur = next;
+                               return;
+                       }
+
+                       if (byteCount <= 3)
+                               throw new ArgumentOutOfRangeException("Insufficient byte buffer.");
+
+                       bytes[byteIndex++] = 0x1B;
+                       switch (next)
+                       {
+                               case ISO2022JPMode.JISX0201:
+                                       bytes[byteIndex++] = 0x28;
+                                       bytes[byteIndex++] = 0x49;
+                                       break;
+                               case ISO2022JPMode.JISX0208:
+                                       bytes[byteIndex++] = 0x24;
+                                       bytes[byteIndex++] = 0x42;
+                                       break;
+                               default:
+                                       bytes[byteIndex++] = 0x28;
+                                       bytes[byteIndex++] = 0x42;
+                                       break;
+                       }
+
+                       cur = next;
+               }
+#endif
 
                static readonly char [] full_width_map = new char [] {
                        '\0', '\u3002', '\u300C', '\u300D', '\u3001', '\u30FB', // to nakaguro
@@ -252,13 +280,14 @@ namespace I18N.CJK
                        '\u30AB', '\u30AD', '\u30AF', '\u30B1', '\u30B3',
                        '\u30B5', '\u30B7', '\u30B9', '\u30BB', '\u30BD',
                        '\u30BF', '\u30C1', '\u30C4', '\u30C6', '\u30C8',
-                       '\u30C9', '\u30CA', '\u30CB', '\u30CC', '\u30CD',
+                       '\u30CA', '\u30CB', '\u30CC', '\u30CD', '\u30CE',
                        '\u30CF', '\u30D2', '\u30D5', '\u30D8', '\u30DB',
                        '\u30DE', '\u30DF', '\u30E0', '\u30E1', '\u30E2',
                        '\u30E4', '\u30E6', '\u30E8', // Ya-Yo
                        '\u30E9', '\u30EA', '\u30EB', '\u30EC', '\u30ED',
-                       '\u30EF', '\u30F1', '\u30F3', '\u309B', '\u309C'};
+                       '\u30EF', '\u30F3', '\u309B', '\u309C' };
 
+#if !DISABLE_UNSAFE
                public unsafe override int GetBytesImpl (
                        char* chars, int charCount,
                        byte* bytes, int byteCount, bool flush)
@@ -282,9 +311,10 @@ namespace I18N.CJK
 
                                if (ch >= 0x2010 && ch <= 0x9FA5)
                                {
-                                       if (shifted_in_conv) {
-                                               bytes [byteIndex++] = 0x0F;
-                                               shifted_in_conv = false;
+                                       if (IsShifted(bytes)) {
+                                               var offset = byteIndex++;
+                                               if (bytes != null) bytes [offset] = 0x0F;
+                                               SetShifted(bytes, false);
                                                byteCount--;
                                        }
                                        switch (m) {
@@ -299,9 +329,10 @@ namespace I18N.CJK
                                        value = ((int)(convert.cjkToJis[value])) |
                                                        (((int)(convert.cjkToJis[value + 1])) << 8);
                                } else if (ch >= 0xFF01 && ch <= 0xFF60) {
-                                       if (shifted_in_conv) {
-                                               bytes [byteIndex++] = 0x0F;
-                                               shifted_in_conv = false;
+                                       if (IsShifted(bytes)) {
+                                               var offset = byteIndex++;
+                                               if (bytes != null) bytes [offset] = 0x0F;
+                                               SetShifted(bytes, false);
                                                byteCount--;
                                        }
                                        switch (m) {
@@ -322,9 +353,10 @@ namespace I18N.CJK
                                        // so here we don't have to consider it.
 
                                        if (allow_shift_io) {
-                                               if (!shifted_in_conv) {
-                                                       bytes [byteIndex++] = 0x0E;
-                                                       shifted_in_conv = true;
+                                               if (!IsShifted(bytes)) {
+                                                       var offset = byteIndex++;
+                                                       if (bytes != null) bytes [offset] = 0x0E;
+                                                       SetShifted(bytes, true);
                                                        byteCount--;
                                                }
                                        } else {
@@ -338,9 +370,10 @@ namespace I18N.CJK
                                        }
                                        value = ch - 0xFF40;
                                } else if (ch < 128) {
-                                       if (shifted_in_conv) {
-                                               bytes [byteIndex++] = 0x0F;
-                                               shifted_in_conv = false;
+                                       if (IsShifted(bytes)) {
+                                               var offset = byteIndex++;
+                                               if (bytes != null) bytes [offset] = 0x0F;
+                                               SetShifted(bytes, false);
                                                byteCount--;
                                        }
                                        SwitchMode (bytes, ref byteIndex, ref byteCount, ref m, ISO2022JPMode.ASCII);
@@ -349,29 +382,35 @@ namespace I18N.CJK
 #if NET_2_0
                                        HandleFallback (
                                                chars, ref i, ref charCount,
-                                               bytes, ref byteIndex, ref byteCount);
+                                               bytes, ref byteIndex, ref byteCount, this);
 #endif
                                        // skip non-convertible character
                                        continue;
                                }
 
 //Console.WriteLine ("{0:X04} : {1:x02} {2:x02}", v, (int) v / 94 + 33, v % 94 + 33);
-                               if (value > 0x100) {
+                               if (value >= 0x100) {
                                        value -= 0x0100;
-                                       bytes [byteIndex++] = (byte) (value / 94 + 33);
-                                       bytes [byteIndex++] = (byte) (value % 94 + 33);
+                                       if (bytes != null) {
+                                               bytes [byteIndex++] = (byte) (value / 94 + 33);
+                                               bytes [byteIndex++] = (byte) (value % 94 + 33);
+                                       } else {
+                                               byteIndex += 2;
+                                       }
                                        byteCount -= 2;
                                }
                                else {
-                                       bytes [byteIndex++] = (byte) value;
+                                       var offset = byteIndex++;
+                                       if (bytes != null) bytes [offset] = (byte) value;
                                        byteCount--;
                                }
                        }
                        if (flush) {
                                // must end in ASCII mode
-                               if (shifted_in_conv) {
-                                       bytes [byteIndex++] = 0x0F;
-                                       shifted_in_conv = false;
+                               if (IsShifted(bytes)) {
+                                       var offset = byteIndex++;
+                                       if (bytes != null) bytes [offset] = 0x0F;
+                                       SetShifted(bytes, false);
                                        byteCount--;
                                }
                                if (m != ISO2022JPMode.ASCII)
@@ -379,6 +418,165 @@ namespace I18N.CJK
                        }
                        return byteIndex - start;
                }
+#else
+               internal int GetBytesInternal(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex, bool flush)
+               {
+                       int start = byteIndex;
+                       int end = charIndex + charCount;
+                       int value;
+                       int byteCount = bytes != null ? bytes.Length : 0;
+
+                       for (int i = charIndex; i < end; i++, charCount--)
+                       {
+                               char ch = chars[i];
+
+                               // When half-kana is not allowed and it is
+                               // actually in the input, convert to full width
+                               // kana.
+                               if (!allow_1byte_kana &&
+                                       ch >= 0xFF60 && ch <= 0xFFA0)
+                                       ch = full_width_map[ch - 0xFF60];
+
+                               if (ch >= 0x2010 && ch <= 0x9FA5)
+                               {
+                                       if (IsShifted (bytes))
+                                       {
+                                               var offset = byteIndex++;
+                                               if (bytes != null) bytes[offset] = 0x0F;
+                                               SetShifted (bytes, false);
+                                               byteCount--;
+                                       }
+                                       switch (m)
+                                       {
+                                               case ISO2022JPMode.JISX0208:
+                                                       break;
+                                               default:
+                                                       SwitchMode(bytes, ref byteIndex, ref byteCount, ref m, ISO2022JPMode.JISX0208);
+                                                       break;
+                                       }
+                                       // This range contains the bulk of the CJK set.
+                                       value = (ch - 0x2010) * 2;
+                                       value = ((int)(convert.cjkToJis[value])) |
+                                                       (((int)(convert.cjkToJis[value + 1])) << 8);
+                               }
+                               else if (ch >= 0xFF01 && ch <= 0xFF60)
+                               {
+                                       if (IsShifted(bytes))
+                                       {
+                                               var offset = byteIndex++;
+                                               if (bytes != null) bytes[offset] = 0x0F;
+                                               SetShifted (bytes, false);
+                                               byteCount--;
+                                       }
+                                       switch (m)
+                                       {
+                                               case ISO2022JPMode.JISX0208:
+                                                       break;
+                                               default:
+                                                       SwitchMode(bytes, ref byteIndex, ref byteCount, ref m, ISO2022JPMode.JISX0208);
+                                                       break;
+                                       }
+
+                                       // This range contains extra characters,
+                                       value = (ch - 0xFF01) * 2;
+                                       value = ((int)(convert.extraToJis[value])) |
+                                                       (((int)(convert.extraToJis[value + 1])) << 8);
+                               }
+                               else if (ch >= 0xFF60 && ch <= 0xFFA0)
+                               {
+                                       // disallowed half-width kana is
+                                       // already converted to full-width kana
+                                       // so here we don't have to consider it.
+
+                                       if (allow_shift_io)
+                                       {
+                                               if (!IsShifted (bytes))
+                                               {
+                                                       var offset = byteIndex++;
+                                                       if (bytes != null) bytes[offset] = 0x0E;
+                                                       SetShifted (bytes, true);
+                                                       byteCount--;
+                                               }
+                                       }
+                                       else
+                                       {
+                                               switch (m)
+                                               {
+                                                       case ISO2022JPMode.JISX0201:
+                                                               break;
+                                                       default:
+                                                               SwitchMode(bytes, ref byteIndex, ref byteCount, ref m, ISO2022JPMode.JISX0201);
+                                                               break;
+                                               }
+                                       }
+                                       value = ch - 0xFF40;
+                               }
+                               else if (ch < 128)
+                               {
+                                       if (IsShifted (bytes))
+                                       {
+                                               var offset = byteIndex++;
+                                               if (bytes != null) bytes[offset] = 0x0F;
+                                               SetShifted (bytes, false);
+                                               byteCount--;
+                                       }
+                                       SwitchMode(bytes, ref byteIndex, ref byteCount, ref m, ISO2022JPMode.ASCII);
+                                       value = (int)ch;
+                               }
+                               else
+                               {
+#if NET_2_0
+                                       HandleFallback (chars, ref i, ref charCount,
+                                               bytes, ref byteIndex, ref byteCount, this);
+#endif
+                                       // skip non-convertible character
+                                       continue;
+                               }
+
+                               //Console.WriteLine ("{0:X04} : {1:x02} {2:x02}", v, (int) v / 94 + 33, v % 94 + 33);
+                               if (value >= 0x100)
+                               {
+                                       value -= 0x0100;
+                                       if (bytes != null)
+                                       {
+                                               bytes[byteIndex++] = (byte)(value / 94 + 33);
+                                               bytes[byteIndex++] = (byte)(value % 94 + 33);
+                                       }
+                                       else
+                                       {
+                                               byteIndex += 2;
+                                       }
+                                       byteCount -= 2;
+                               }
+                               else
+                               {
+                                       var offset = byteIndex++;
+                                       if (bytes != null) bytes[offset] = (byte)value;
+                                       byteCount--;
+                               }
+                       }
+                       if (flush)
+                       {
+                               // must end in ASCII mode
+                               if (IsShifted (bytes))
+                               {
+                                       var offset = byteIndex++;
+                                       if (bytes != null) bytes[offset] = 0x0F;
+                                       SetShifted (bytes, false);
+                                       byteCount--;
+                               }
+                               if (m != ISO2022JPMode.ASCII)
+                                       SwitchMode(bytes, ref byteIndex, ref byteCount, ref m, ISO2022JPMode.ASCII);
+                       }
+
+                       return byteIndex - start;
+               }
+               
+               public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex, bool flush)
+               {
+                       return GetBytesInternal (chars, charIndex, charCount, bytes, byteIndex, flush);
+               }
+#endif
 
 #if NET_2_0
                public override void Reset ()
@@ -389,6 +587,7 @@ namespace I18N.CJK
 #endif
        }
 
+
        internal class ISO2022JPDecoder : Decoder
        {
                static JISConvert convert = JISConvert.Convert;
index 61260ad328c526db50c0f27b0f20f5fda11906d8..e92606b4aa9b39a6b64c5a0496920984a421e804 100644 (file)
@@ -17,11 +17,13 @@ namespace MonoTests.I18N.CJK
        [TestFixture]
        public class TestCJK
        {
+               private global::I18N.Common.Manager Manager = global::I18N.Common.Manager.PrimaryManager;
+
                void AssertEncode (string utf8file, string decfile, int codepage)
                {
                        string decoded = null;
                        byte [] encoded = null;
-                       using (StreamReader sr = new StreamReader (utf8file,
+                       using (StreamReader sr = new StreamReader (utf8file, 
                                Encoding.UTF8)) {
                                decoded = sr.ReadToEnd ();
                        }
@@ -29,7 +31,7 @@ namespace MonoTests.I18N.CJK
                                encoded = new byte [fs.Length];
                                fs.Read (encoded, 0, (int) fs.Length);
                        }
-                       Encoding enc = Encoding.GetEncoding (codepage);
+                       Encoding enc = Manager.GetEncoding (codepage);
                        byte [] actual;
 
                        // simple string case
@@ -61,7 +63,7 @@ namespace MonoTests.I18N.CJK
                                encoded = new byte [fs.Length];
                                fs.Read (encoded, 0, (int) fs.Length);
                        }
-                       Encoding enc = Encoding.GetEncoding (codepage);
+                       Encoding enc = Manager.GetEncoding (codepage);
                        char [] actual;
 
                        Assert.AreEqual (decoded.Length,
@@ -156,6 +158,12 @@ namespace MonoTests.I18N.CJK
                        AssertEncode ("Test/texts/japanese2-utf8.txt", "Test/texts/japanese2-50220.txt", 50220);
                }
 
+               [Test]
+               public void CP50220_Encode_3 ()
+               {
+                       AssertEncode("Test/texts/japanese3-utf8.txt", "Test/texts/japanese3-50220.txt", 50220);
+               }
+
                [Test]
                public void CP50220_Decode ()
                {
@@ -168,6 +176,12 @@ namespace MonoTests.I18N.CJK
                        AssertEncode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-50221.txt", 50221);
                }
 
+               [Test]
+               public void CP50221_Encode_3()
+               {
+                       AssertEncode("Test/texts/japanese3-utf8.txt", "Test/texts/japanese3-50221.txt", 50221);
+               }
+
                [Test]
                public void CP50221_Decode ()
                {
@@ -195,7 +209,7 @@ namespace MonoTests.I18N.CJK
                [Test]
                public void CP50220BrokenESC ()
                {
-                       Assert.AreEqual ("\u001B$0", Encoding.GetEncoding (50220).GetString (new byte [] {0x1B, 0x24, 0x30}), "#1");
+                       Assert.AreEqual ("\u001B$0", Manager.GetEncoding (50220).GetString (new byte [] {0x1B, 0x24, 0x30}), "#1");
                }
 
                [Test]
@@ -235,14 +249,14 @@ namespace MonoTests.I18N.CJK
 
                void GetBytesAllSingleChars (int enc)
                {
-                       Encoding e = Encoding.GetEncoding (enc);
+                       Encoding e = Manager.GetEncoding (enc);
                        for (int i = 0; i < 0x10000; i++)
                                e.GetBytes (new char [] { (char)i });
                }
 
                void GetCharsAllBytePairs (int enc)
                {
-                       Encoding e = Encoding.GetEncoding (enc);
+                       Encoding e = Manager.GetEncoding (enc);
                        byte [] bytes = new byte [2];
                        for (int i0 = 0; i0 < 0x100; i0++) {
                                bytes [0] = (byte) i0;
@@ -289,7 +303,7 @@ namespace MonoTests.I18N.CJK
 #endif
                public void Encoder54936Refresh ()
                {
-                       Encoding e = Encoding.GetEncoding ("gb18030");
+                       Encoding e = Manager.GetEncoding ("gb18030");
                        Encoder d = e.GetEncoder ();
                        byte [] bytes;
 
@@ -313,14 +327,14 @@ namespace MonoTests.I18N.CJK
                public void Bug491799 ()
                {
                        Assert.AreEqual (new byte [] {0xEE, 0xFC},
-                                          Encoding.GetEncoding (932).GetBytes ("\uFF02"));
+                                          Manager.GetEncoding (932).GetBytes ("\uFF02"));
                }
 
 #if NET_2_0
                [Test]
                public void Decoder932Refresh ()
                {
-                       Encoding e = Encoding.GetEncoding (932);
+                       Encoding e = Manager.GetEncoding (932);
                        Decoder d = e.GetDecoder ();
                        char [] chars;
 
@@ -340,7 +354,7 @@ namespace MonoTests.I18N.CJK
                [Test]
                public void Decoder51932Refresh ()
                {
-                       Encoding e = Encoding.GetEncoding (51932);
+                       Encoding e = Manager.GetEncoding (51932);
                        Decoder d = e.GetDecoder ();
                        char [] chars;
 
@@ -368,7 +382,7 @@ namespace MonoTests.I18N.CJK
                [Test]
                public void Decoder936Refresh ()
                {
-                       Encoding e = Encoding.GetEncoding (936);
+                       Encoding e = Manager.GetEncoding (936);
                        Decoder d = e.GetDecoder ();
                        char [] chars;
 
@@ -391,7 +405,7 @@ namespace MonoTests.I18N.CJK
                [Test]
                public void Decoder949Refresh ()
                {
-                       Encoding e = Encoding.GetEncoding (949);
+                       Encoding e = Manager.GetEncoding (949);
                        Decoder d = e.GetDecoder ();
                        char [] chars;
 
@@ -414,7 +428,7 @@ namespace MonoTests.I18N.CJK
                [Test]
                public void Decoder950Refresh ()
                {
-                       Encoding e = Encoding.GetEncoding (950);
+                       Encoding e = Manager.GetEncoding (950);
                        Decoder d = e.GetDecoder ();
                        char [] chars;
 
@@ -439,7 +453,7 @@ namespace MonoTests.I18N.CJK
                [Test]
                public void Decoder51932NoRefresh ()
                {
-                       Encoding e = Encoding.GetEncoding (51932);
+                       Encoding e = Manager.GetEncoding (51932);
                        Decoder d = e.GetDecoder ();
                        char [] chars;
 
@@ -462,7 +476,7 @@ namespace MonoTests.I18N.CJK
                [Test]
                public void Decoder936NoRefresh ()
                {
-                       Encoding e = Encoding.GetEncoding (936);
+                       Encoding e = Manager.GetEncoding (936);
                        Decoder d = e.GetDecoder ();
                        char [] chars;
 
@@ -485,7 +499,7 @@ namespace MonoTests.I18N.CJK
                [Test]
                public void Decoder949NoRefresh ()
                {
-                       Encoding e = Encoding.GetEncoding (949);
+                       Encoding e = Manager.GetEncoding (949);
                        Decoder d = e.GetDecoder ();
                        char [] chars;
 
@@ -508,7 +522,7 @@ namespace MonoTests.I18N.CJK
                [Test]
                public void Decoder950NoRefresh ()
                {
-                       Encoding e = Encoding.GetEncoding (950);
+                       Encoding e = Manager.GetEncoding (950);
                        Decoder d = e.GetDecoder ();
                        char [] chars;
 
@@ -532,7 +546,7 @@ namespace MonoTests.I18N.CJK
                public void HandleObsoletedESCJ () // bug #398273
                {
                        byte [] b = new byte [] {0x64, 0x6f, 0x6e, 0x1b, 0x24, 0x42, 0x21, 0x47, 0x1b, 0x28, 0x4a, 0x74};
-                       string s = Encoding.GetEncoding ("ISO-2022-JP").GetString (b);
+                       string s = Manager.GetEncoding ("ISO-2022-JP").GetString (b);
                        Assert.AreEqual ("don\u2019t", s);
 
                }
diff --git a/mcs/class/I18N/CJK/Test/texts/japanese3-50220.txt b/mcs/class/I18N/CJK/Test/texts/japanese3-50220.txt
new file mode 100644 (file)
index 0000000..764cda2
--- /dev/null
@@ -0,0 +1,108 @@
+<title>\e$B0lHV$*F@$J%+\e(B?\e$B$l$N%K\e(B?            \e$B0lHV$*F@$J%+$rA*$V$?$a$N5U0z$-\e(B   \e$B!!!!\e(B  \e$B!!\e(B    
+\e$B!!!!!!%/%l%8%C%H%+\e(B> \e$B0lHV$*F@$J%+\e(B     \e$B0lHV$*F@$J%+\e(B            
+\e$B%/%l%8%C%H%+$N5U0z$-L\E*JL!"%i%$%U%9%?%$%kJL!"5!G=JL!"CO0hJL!"G/NpJL$KJ,$+$l$F$*$j!"$=$l$>$l$N%K\e(B  
+\e$B%/%l%8%C%H%+\e(B     <\e$BL\E*JL\e(B>    \e$BHt9T5!$N%^%$%k$,Cy$^$j!"L5NA9R6u7t$,$b$i$($k%+\e(B 
+\e$B('\e(B \e$BF|K\9R6u\e(B JAL\e$B%+\e(B \e$B=iG/EYG/2qHqL5NA\e(B
+\e$B('\e(B \e$BBg4Z9R6u\e(B JCB\e$B%9%+%$%Q%9%+\e(B \e$B=iG/EYG/2qHqL5NA\e(B
+\e$B('\e(B \e$B%3%s%A%M%s%?%k9R6u\e(B \e$B%o%s%Q%9\e(BJCB\e$B%0%"%`%+\e(B \e$B=iG/EYG/2qHqL5NA\e(B 
+\e$B('\e(B \e$B%f%J%$%F%C%I9R6u\e(B \e$B%^%$%l\e(B \e$B=iG/EYG/2qHqL5NA\e(B
+\e$B('\e(B \e$B%?%$9R6u\e(B \e$B%m%$%d%k%*\e(B \e$B=iG/EYG/2qHqL5NA\e(B
+\e$B('\e(B \e$B%7%s%,%]%/%j%9%U%i%$%d\e(B \e$B=iG/EYG/2qHqL5NA\e(B
+\e$B(&\e(B \e$B%(%U%i%$%s%0!&%V%k\e(B \e$B=iG/EYG/2qHqL5NA\e(B   
+\e$B;29M\e(B \e$B!'\e(B \e$B%^%$%k$H$O!)\e(B       \e$B%]%$%s%H$,8zN($h$/Cy$^$k$*F@$J%+\e(B 
+\e$B('\e(B \e$B3ZE7;T>l$G#2!s!"3ZE7%+\e(B \e$BG/2qHqL5NA\e(B
+\e$B('\e(B UFJ \e$B%S%"%=%+\e(B \e$BG/2qHqL5NA\e(B
+\e$B('\e(B \e$B%i%$%U%+\e(B \e$BG/2qHqL5NA\e(B
+\e$B(&\e(B \e$B%*%j%3%"%W%F%#\e(BiD \e$BG/2qHqL5NA\e(B       \e$B3$30N99T=}32J]81$,\e(B 
+\e$B('\e(B \e$B%K%3%9\e(B \e$B%S%"%=%+\e(B \e$BG/2qHqL5NA\e(B
+\e$B('\e(B \e$B%*%j%3%"%W%F%#\e(BiD \e$BG/2qHqL5NA\e(B
+\e$B('\e(B DC\e$B%+\e(B \e$BG/2qHqL5NA\e(B 
+\e$B('\e(B DC\e$B%4\e(B \e$B%+\e(B
+\e$B('\e(B DC\e$B%4\e(B \e$B=iG/EYG/2qHqL5NA\e(B
+\e$B('\e(B DC\e$B%+\e(B \e$B=iG/EYG/2qHqL5NA\e(B 
+\e$B('\e(B  \e$B=iG/EYG/2qHqL5NA\e(B
+\e$B(&\e(B \e$B%=%K\e(B \e$B=iG/EYG/2qHqL5NA\e(B 1\e$BG/$K\e(B1\e$BEY$N7h:Q$GG/2qHqL5NA\e(B
+      \e$B%]%$%s%H$r8=6b2=$G$-$k%/%l%8%C%H%+\e(B 
+\e$B('\e(B P-one\e$B%+\e(B \e$BG/2qHqL5NA\e(B
+\e$B('\e(B UFJ \e$B%S%"%=%+\e(B \e$BG/2qHqL5NA\e(B
+\e$B('\e(B NTT\e$B%0%k\e(B \e$BG/2qHqL5NA\e(B
+\e$B(&\e(B \e$B$5$/$i\e(BJCB\e$B%$\e(B \e$BG/2qHqL5NA\e(B       \e$BA4$F$N>&IJ$,#1!s3d0z$5$l$k%9\e(B 
+\e$B(&\e(B P-one\e$B%+\e(B \e$BG/2qHqL5NA\e(B       \e$B%;%>%s1J5WITLG\e(B.com\e$B$G$N$*Gc$$J*$G%]%$%s%H:GBg#1#9G\!*\e(B 
+\e$B(&\e(B \e$B!T%;%>%s!U%+\e(B       \e$BCB@8F|7n$O%]%$%s%H#5G\!"Cy$^$C$?%]%$%s%H$O>&IJ7t$K8r49\e(B 
+\e$B(&\e(B \e$B%i%$%U%+\e(B \e$BG/2qHqL5NA\e(B       \e$B;}$C$F$$$k$@$1$G@($$!*%9%F%$%?%9%+\e(B 
+\e$B('\e(B \e$B%@%$%J\e(B
+\e$B('\e(B \e$B%"%a%j%+%s!&%(%-%9%W%l%9!&%+\e(B
+\e$B(&\e(B \e$B!T%;%>%s!U%W%i%A%J!&%"%a%j%+%s!&%(%-%9%W%l%9!&%+\e(B       \e$BMxMQ8BEY3[$,L5@)8B$N%9\e(B 
+\e$B(&\e(B \e$B%@%$%J\e(B      \e$B%4\e(B 
+\e$B(&\e(B \e$B%*%j%3%"%W%F%#\e(BiD \e$BG/2qHqL5NA\e(B       \e$B?M5$%3%9%a$,Ev$?$kG/2qHqL5NA$N=w@-8~$1\e(BJCB\e$B%+\e(B 
+\e$B(&\e(B JCB \e$B%j%s%@%+\e(B \e$BG/2qHqL5NA\e(B       \e$B7HBSBe$,:GBg#6#0!s3d0z$5$l$kG/2qHqL5NA%+\e(B 
+\e$B(&\e(B NTT\e$B%0%k\e(B \e$BG/2qHqL5NA\e(B       \e$B%-%c%C%7%s%0$rDc6bMx$G$G$-$k%+\e(B 
+\e$B(&\e(B \e$B$5$/$i\e(B JCB\e$B%$\e(B \e$BG/2qHqL5NA\e(B \e$B#3K|1_$^$G$J$iG/Mx#1!s!*\e(B
+      \e$B%,%=%j%sBe$,%-%c%C%7%e%P%C%/$5$l!"3$30J]81\e(B 
+\e$B(&\e(B  \e$B=iG/EYG/2qHqL5NA\e(B       \e$B1G2h!&%3%s%5\e(B 
+\e$B(&\e(B \e$B$T$"%+\e(B \e$B=iG/EYG/2qHqL5NA\e(B       \e$B0YBX%l\e(B 
+\e$B(&\e(B SBI\e$B%+\e(B       \e$B%H%i%Y%i\e(B.@Z 
+\e$B(&\e(B \e$B%7%F%#\e(B \e$B%(%j\e(B       \e$B%,%=%j%sBe!"9bB.F;O)NA6b$r%-%c%C%7%e%P%C%/\e(B 
+\e$B(&\e(B JCB\e$B%4\e(B       \e$B%m\e(B 
+\e$B(&\e(B \e$B%I%s!&%-%[\e(B(VISA\e$B!"\e(BJCB\e$B$N$_\e(B)
+      FC\e$B%P%m%;%m%J!"\e(BAC\e$B%_%i%s\e(B \e$B%5%C%+\e(B 
+\e$B('\e(B AC\e$B%_%i%s%+\e(B
+\e$B('\e(B AC\e$B%_%i%s%+\e(B
+\e$B(&\e(B FC\e$B%P%k%;%m%J%*%U%#%7%c%k3ZE7%+\e(B       \e$B1G2h$r1G2h4[$G3d0zNA6b$G8+$k!*\e(B 
+\e$B('\e(B \e$B1G2h$r3d0zNA6b$G8+$l$k%/%l%8%C%H%+\e(B
+\e$B(&\e(B \e$B1G2h$r3d0zNA6b$G8+$l$k%/%l%8%C%H%+\e(BF|K\JT)       \e$B%$%s%?\e(B 
+\e$B(&\e(B \e$B%=%K=iG/EYG/2qHqL5NA\e(B
+\e$B#1G/$K#1EY$N7h:Q$GG/2qHqL5NA\e(B
+      \e$BI42_E9$G:GBg#5!s3d0z\e(B 
+\e$B(&\e(B \e$BEl5^%[%F%k%:%3%s%U%)\e(B \e$BG/2qHqL5NA\e(B       \e$BN99TA0$K$f$C$?$jM%2m$J5$J,!*6u9A%i%&%s%8$rL5NA$GMxMQ\e(B 
+\e$B('\e(B  \e$B%@%$%J\e(B
+\e$B('\e(B \e$B%7%F%#\e(B \e$B%4\e(B
+\e$B('\e(B \e$B%N\e(B 
+\e$B('\e(B \e$B%7%F%#\e(B \e$B%(%j\e(B
+\e$B('\e(B \e$B%"%a%j%+%s!&%(%-%9%W%l%9!&%+\e(B
+\e$B('\e(B \e$B%"%a%j%+%s!&%(%-%9%W%l%9!&%4\e(B 
+\e$B('\e(B \e$B%"%a%j%+%s!&%(%-%9%W%l%9!&%S%8%M%9%+\e(B
+\e$B('\e(B UFJ \e$B%4\e(B
+\e$B('\e(B UFJ \e$B%d%s%0%4\e(B 
+\e$B('\e(B DC\e$B%+\e(B
+\e$B('\e(B \e$BEl5^\e(BTOP\e$B%4\e(B(\e$B0lItM-NA$"$j\e(B)
+\e$B('\e(B 
+\e$B(&\e(B \e$B3ZE7%W%l%_%"%`%+\e(B       \e$BM%BT%5\e(B 
+\e$B(&\e(B JCB\e$B%+\e(B \e$B=iG/EYG/2qHqL5NA\e(B       \e$B7c0B$NEBF2%I%s!&%-%[\e(B 
+\e$B(&\e(B \e$B%I%s%-!&%[\e(B \e$BG/2qHqL5NA\e(B        \e$B?3::$N$$$i$J$$\e(BVISA\e$B%G%S%C%H%+\e(B 
+\e$B('\e(B H.I.S.\e$B%o\e(B \e$BG/2qHqL5NA\e(B
+\e$B('\e(B KNT\e$B%H%i%Y%k%-%c%C%7%e%+\e(B \e$BG/2qHqL5NA\e(B
+\e$B(&\e(B \e$B%9%k%,6d9T\e(BVISA\e$B%G%S%C%H%+\e(B \e$BG/2qHqL5NA\e(B       \e$B8x6&NA6b$b3d0z$G$-$k%+\e(B 
+\e$B(&\e(B P-one\e$B%+\e(B \e$BG/2qHqL5NA\e(B       \e$B2?$G$b#1!s3d0z$5$l$k$*F@$J%+\e(B 
+\e$B(&\e(B P-one\e$B%+\e(B \e$BG/2qHqL5NA\e(B      <\e$B%i%$%U%9%?%$%kJL\e(B>    \e$B%3%s%S%K$G$*F@!uJXMx$J%+\e(B 
+\e$B(&\e(B UCS \e$B%+%k%o%6%+\e(B(\e$B%5G/2qHqL5NA\e(B       \e$B%9#5!s!A#1#5!s3d0z$G$*Gc$$J*"v\e(B 
+\e$B('\e(B \e$B@>M'\e(B
+\e$B('\e(B \e$B%$%H\e(B
+\e$B('\e(B \e$B%(%9%Q\e(B 
+\e$B('\e(B \e$B%f%K\e(B
+\e$B('\e(B \e$B%"%T%?\e(B
+\e$B(&\e(B \e$B%f\e(B       \e$BEl5^@~$GDL6P$7$F$$$k?M$N$?$a$N%+\e(B 
+\e$B(&\e(B \e$BEl5^%]%$%s%H%+\e(B \e$BG/2qHqL5NA\e(B       \e$B%$%s%?\e(B 
+\e$B('\e(B UFJ\e$B%K%3%9\e(B \e$B%S%"%=%+\e(B \e$BG/2qHqL5NA\e(B
+\e$B(&\e(B \e$B3ZE7%+\e(B \e$BG/2qHqL5NA\e(B    <\e$B5!G=JL\e(B>    \e$BEE;R%^%M\e(B 
+\e$B('\e(B UCS \e$B%+%k%o%6%+\e(B \e$BG/2qHqL5NA\e(B
+\e$B('\e(B \e$B%M%/%9%3CfF|K\\e(B \e$B%W%l%_%"%`%I%i%$%P\e(B \e$B=iG/EYG/2qHqL5NA\e(B
+\e$B#1G/$K#1EY$N7h:Q$GG/2qHqL5NA\e(B 
+\e$B('\e(B \e$B%=%K\e(B
+\e$B(&\e(B \e$B%=%K\e(B \e$B=iG/EYG/2qHqL5NA\e(B
+\e$B#1G/$K#1EY$N7h:Q$GG/2qHqL5NA\e(B
+   <\e$BCO0hJL\e(B> <\e$B9qFbJT\e(B>   \e$BEl5~ET$K$*=;$^$$$NJ}$,F@$9$k%+\e(B 
+\e$B(&\e(B \e$BEl5^%+\e(B \e$BG/2qHqL5NA\e(B       \e$B0&CN8)!&4tIl8)!&D9Ln8)$K$*=;$^$$$NJ}$,F@$9$k%+\e(B 
+\e$B(&\e(B UCS \e$B%+%k%o%6%+\e(B \e$BG/2qHqL5NA\e(B       \e$BCO0hJL\e(B \e$B$*F@$J\e(BETC\e$B%+\e(B 
+\e$B(&\e(B \e$BElF|K\%$\e(B
+\e$B#1G/$K#1EY$N7h:Q$GG/2qHqL5NA\e(B
+    <\e$B3$30JT\e(B>   \e$B%O%o%$!&%0%"%`!&%"%a%j%+K\EZ$N$*E9$GM%BT!"3d0z$,B?$$%+\e(B 
+\e$B('\e(B \e$B%K%3%9%+\e(B(\e$B%/=iG/EYG/2qHqL5NA\e(B
+\e$B(&\e(B UFJ\e$B%+\e(B(\e$B%/=iG/EYG/2qHqL5NA\e(B       \e$B3$30$N\e(BATM\e$B$G8=CO$N$*6b$r0z$-=P$;$k%+\e(B 
+\e$B('\e(B \e$B%9%k%,6d9T\e(BVISA\e$B%G%S%C%H%+\e(B \e$BG/2qHqL5NA\e(B
+\e$B('\e(B H.I.S.\e$B%o\e(B \e$BG/2qHqL5NA\e(B
+\e$B(&\e(B KNT\e$B%H%i%Y%k%-%c%C%7%e%+\e(B \e$BG/2qHqL5NA\e(B      <\e$BG/NpJL\e(B>    \e$B#1#5:P$+$i;}$F$k\e(BVISA\e$B%G%S%C%H%+\e(B 
+\e$B('\e(B \e$B%9%k%,6d9T\e(BVISA\e$B%G%S%C%H%+\e(B \e$BG/2qHqL5NA\e(B
+\e$B('\e(B H.I.S.\e$B%o\e(B \e$BG/2qHqL5NA\e(B
+\e$B(&\e(B KNT\e$B%H%i%Y%k%-%c%C%7%e%+\e(B \e$BG/2qHqL5NA\e(B                                    \e$BA0$X!'%/%l%8%C%H%+\e(B TOP\e$B!'%/%l%8%C%H%+\e(B                                  ?N???W?b?g?J?[?h?\e$B%U\e(B?m?\e$B%C\e(B   ?E ?N???W?b?g?J?[?h?\e$B%U\e(B?\e$B%%\e(B?\e$B!,\e(B?I   ?E ?N???W?b?g?J?[?h?\e$B%X\e(B?M?p?\e$B%U\e(B?\e$B%j\e(B?H   ?E ?N???W?b?g?J?[?h?\e$B%U\e(B?d?g?\e$B%s\e(B   ?E ?N???W?b?g?J?[?h?\e$B%U\e(B?@?\??????   ?E ?N???W?b?g?J?[?h?\e$B%U\e(B???\e$B!+\e(B   ?E ?N???W?b?g?J?[?h?\e$B%U\e(B?????N   ?E ?N???W?b?g?J?[?h?\e$B%U\e(B?\e$B%%\e(B??   ?E IC?`?b?v?????N???W?b?g?J?[?h   ?E ?J?[?h?\e$B%U\e(B?x?\e$B!&\e(B???@?\e$B%U\e(B???\e$B!+\e(B   ?E ?N???W?b?g?J?[?h?u?\e$B%(\e(B?v???p?p?I       ?N???W?b?g?J?[?h?\e$B%U\e(B?\e$B%m\e(B?\e$B%C\e(B   ?E ?N???W?b?g?J?[?h?\e$B%U\e(B?t?\e$B%`\e(B?\e$B%m\e(B?\e$B%C\e(B   ?E ?C?O?\e$B%-\e(B?s?\e$B%m\e(B?\e$B%C\e(B?\e$B%K\e(B?\e$B%X\e(B?H   ?E ?C?O?\e$B%-\e(B?s?\e$B%m\e(B?\e$B%C\e(B?\e$B%U\e(B???\e$B%b\e(B?_   ?E ?C?O?\e$B%-\e(B?s?\e$B%m\e(B?\e$B%C\e(B?\e$B%U\e(B???{?p??   ?E ?C?O?\e$B%-\e(B?s?\e$B%m\e(B?\e$B%C\e(B?\e$B%U\e(B?e?N?j?b?N   ?E ?\e$B%-\e(B?s?\e$B%U\e(B?\e$B%J\e(B?\e$B%e\e(B?N???W?b?g?J?[?h       ?\e$B%d\e(B?E?K?\?????????\e$B!,\e(B??   ?E ?K?\?????????\e$B!,\e(B???\e$B%J\e(B?\e$B%)\e(B???J?[?h   ?E ???[?h?T?[?r?X?t?\e$B%)\e(B?J?[?h   ?E ?K?\?????? ?L???b?V???o?b?N   ?E ?V?[?g?x???g???Q?\e$B%m\e(B?\e$B%C\e(B?J?[?h   ?E ?\e$B%d\e(B?\e$B%U\e(B?\e$B%J\e(B?\e$B%e\e(B?N???W?b?g?J?[?h       ?\e$B%A\e(B?T?E?????\e$B%%\e(B???I?\e$B%d\e(B   ?E ?|?C???g?\e$B%a\e(B?\e$B%&\e(B?\e$B%r\e(BUP?J?[?h   ?E ?A?j???E?L?????N?^?[?n?J?[?h   ?E ?????\e$B%A\e(B?T?t?\e$B%)\e(B?N???W?b?g?J?[?h       ?N???W?b?g?J?[?h?\e$B%U\e(B????   ?E ?N???W?b?g?J?[?h?\e$B%K\e(B?\e$B!,\e(B?\e$B%[\e(B??   ?E ?v???p?[?J?[?h???????r   ?E ?v???p?[?J?[?h?\e$B%m\e(B?\e$B%C\e(B???r   ?E ?N?????\e$B%&\e(B?\e$B%=\e(B?J?[?h?\e$B%N\e(B???\e$B%b\e(B?H   ?E ?S?[???h?J?[?h?\e$B%U\e(B?????b?g
+      ?@ [?\e$B%-\e(B?s?\e$B%a\e(B?E?V???b?s???O?\e$B%a\e(B?E???`]   ?E ?N???W?b?g?J?[?h?p???\e$B%)\e(B?T   ?E ?\e$B%#\e(B?\e$B%;\e(B?\e$B%M\e(B?J?[?h?I?\e$B%`\e(B?t???\e$B%)\e(B?\e$B%)\e(B?T        ?N???W?b?g?J?[?h?\e$B%=\e(B???\e$B%K\e(B????   ?E ?N???W?b?g?J?[?h???\e$B%e\e(B?????\e$B%=\e(B??????   ?E ?N???W?b?g?J?[?h ?\???\e$B%s\e(B?\e$B%a\e(B   ?E ?N???W?b?g?J?[?h ?R?\e$B%/\e(B?\e$B%a\e(B   ?E ?N???W?b?g?J?[?h ?V???b?s???O?\e$B%a\e(B   ?E ?N???W?b?g?J?[?h ?x?\e$B!&\e(B???@?\e$B%a\e(B   ?E ?N???W?b?g?J?[?h ?????\e$B%a\e(B   ?E ?N???W?b?g?J?[?h ?g???u???\e$B%a\e(B         ?\e$B%#\e(B?\e$B%%\e(B?\e$B!,\e(B?T?C?g????    ?E ?\e$B%j\e(B?????\e$B%_\e(B(?l?b?g?\e$B%j\e(B??) ???r     ?E FX(?O???\e$B%i\e(B?\e$B%h\e(B?\e$B%j\e(B????????)    ?E ?\e$B%5\e(B???v???[???g?L?????y?[??   ?E ?l?b?g?o???N(?l?b?g???s) ???r   ?E ?Z?MSBI?l?b?g???s ???p?p   ?E ???\e$B%C\e(B?N???E???\e$B%+\e(B?N??????        ???T?C?g?\e$B%N\e(B?\e$B%D\e(B?\e$B!V\e(B?\e$B%H\e(B   ?E ???T?C?g?\e$B%h\e(B?\e$B%U\e(B?????N?\e$B%N\e(B?\e$B%D\e(B?\e$B!V\e(B?\e$B%H\e(B   ?E ???\e$B%s\e(B?????N?\e$B%N\e(B?\e$B%D\e(B?\e$B!V\e(B?\e$B%H\e(B???????N?W   ?E ?\e$B%A\e(B???\e$B!"\e(B?????@?\e$B%N\e(B???\e$B%F\e(B?\e$B%e\e(B?\?L   ?E ?\e$B%#\e(B???\e$B!V\e(B?????\e$B%1\e(B?t?H?[??   ?E ?T?C?g?}?b?v 1 2 3 4 5 6 7 8             </title><meta http-equiv=Content-Type content="text/html; charset=iso-2022-jp"><meta http-equiv=Content-Type content="text/html; charset=Shift_JIS"><c>?N???W?b?g?J?[?h?\???\e$B%s\e(B???r.com
\ No newline at end of file
diff --git a/mcs/class/I18N/CJK/Test/texts/japanese3-50221.txt b/mcs/class/I18N/CJK/Test/texts/japanese3-50221.txt
new file mode 100644 (file)
index 0000000..2a2212c
--- /dev/null
@@ -0,0 +1,108 @@
+<title>\e$B0lHV$*F@$J%+\e(B?\e$B$l$N%K\e(B?            \e$B0lHV$*F@$J%+$rA*$V$?$a$N5U0z$-\e(B   \e$B!!!!\e(B  \e$B!!\e(B    
+\e$B!!!!!!%/%l%8%C%H%+\e(B> \e$B0lHV$*F@$J%+\e(B     \e$B0lHV$*F@$J%+\e(B            
+\e$B%/%l%8%C%H%+$N5U0z$-L\E*JL!"%i%$%U%9%?%$%kJL!"5!G=JL!"CO0hJL!"G/NpJL$KJ,$+$l$F$*$j!"$=$l$>$l$N%K\e(B  
+\e$B%/%l%8%C%H%+\e(B     <\e$BL\E*JL\e(B>    \e$BHt9T5!$N%^%$%k$,Cy$^$j!"L5NA9R6u7t$,$b$i$($k%+\e(B 
+\e$B('\e(B \e$BF|K\9R6u\e(B JAL\e$B%+\e(B \e$B=iG/EYG/2qHqL5NA\e(B
+\e$B('\e(B \e$BBg4Z9R6u\e(B JCB\e$B%9%+%$%Q%9%+\e(B \e$B=iG/EYG/2qHqL5NA\e(B
+\e$B('\e(B \e$B%3%s%A%M%s%?%k9R6u\e(B \e$B%o%s%Q%9\e(BJCB\e$B%0%"%`%+\e(B \e$B=iG/EYG/2qHqL5NA\e(B 
+\e$B('\e(B \e$B%f%J%$%F%C%I9R6u\e(B \e$B%^%$%l\e(B \e$B=iG/EYG/2qHqL5NA\e(B
+\e$B('\e(B \e$B%?%$9R6u\e(B \e$B%m%$%d%k%*\e(B \e$B=iG/EYG/2qHqL5NA\e(B
+\e$B('\e(B \e$B%7%s%,%]%/%j%9%U%i%$%d\e(B \e$B=iG/EYG/2qHqL5NA\e(B
+\e$B(&\e(B \e$B%(%U%i%$%s%0!&%V%k\e(B \e$B=iG/EYG/2qHqL5NA\e(B   
+\e$B;29M\e(B \e$B!'\e(B \e$B%^%$%k$H$O!)\e(B       \e$B%]%$%s%H$,8zN($h$/Cy$^$k$*F@$J%+\e(B 
+\e$B('\e(B \e$B3ZE7;T>l$G#2!s!"3ZE7%+\e(B \e$BG/2qHqL5NA\e(B
+\e$B('\e(B UFJ \e$B%S%"%=%+\e(B \e$BG/2qHqL5NA\e(B
+\e$B('\e(B \e$B%i%$%U%+\e(B \e$BG/2qHqL5NA\e(B
+\e$B(&\e(B \e$B%*%j%3%"%W%F%#\e(BiD \e$BG/2qHqL5NA\e(B       \e$B3$30N99T=}32J]81$,\e(B 
+\e$B('\e(B \e$B%K%3%9\e(B \e$B%S%"%=%+\e(B \e$BG/2qHqL5NA\e(B
+\e$B('\e(B \e$B%*%j%3%"%W%F%#\e(BiD \e$BG/2qHqL5NA\e(B
+\e$B('\e(B DC\e$B%+\e(B \e$BG/2qHqL5NA\e(B 
+\e$B('\e(B DC\e$B%4\e(B \e$B%+\e(B
+\e$B('\e(B DC\e$B%4\e(B \e$B=iG/EYG/2qHqL5NA\e(B
+\e$B('\e(B DC\e$B%+\e(B \e$B=iG/EYG/2qHqL5NA\e(B 
+\e$B('\e(B  \e$B=iG/EYG/2qHqL5NA\e(B
+\e$B(&\e(B \e$B%=%K\e(B \e$B=iG/EYG/2qHqL5NA\e(B 1\e$BG/$K\e(B1\e$BEY$N7h:Q$GG/2qHqL5NA\e(B
+      \e$B%]%$%s%H$r8=6b2=$G$-$k%/%l%8%C%H%+\e(B 
+\e$B('\e(B P-one\e$B%+\e(B \e$BG/2qHqL5NA\e(B
+\e$B('\e(B UFJ \e$B%S%"%=%+\e(B \e$BG/2qHqL5NA\e(B
+\e$B('\e(B NTT\e$B%0%k\e(B \e$BG/2qHqL5NA\e(B
+\e$B(&\e(B \e$B$5$/$i\e(BJCB\e$B%$\e(B \e$BG/2qHqL5NA\e(B       \e$BA4$F$N>&IJ$,#1!s3d0z$5$l$k%9\e(B 
+\e$B(&\e(B P-one\e$B%+\e(B \e$BG/2qHqL5NA\e(B       \e$B%;%>%s1J5WITLG\e(B.com\e$B$G$N$*Gc$$J*$G%]%$%s%H:GBg#1#9G\!*\e(B 
+\e$B(&\e(B \e$B!T%;%>%s!U%+\e(B       \e$BCB@8F|7n$O%]%$%s%H#5G\!"Cy$^$C$?%]%$%s%H$O>&IJ7t$K8r49\e(B 
+\e$B(&\e(B \e$B%i%$%U%+\e(B \e$BG/2qHqL5NA\e(B       \e$B;}$C$F$$$k$@$1$G@($$!*%9%F%$%?%9%+\e(B 
+\e$B('\e(B \e$B%@%$%J\e(B
+\e$B('\e(B \e$B%"%a%j%+%s!&%(%-%9%W%l%9!&%+\e(B
+\e$B(&\e(B \e$B!T%;%>%s!U%W%i%A%J!&%"%a%j%+%s!&%(%-%9%W%l%9!&%+\e(B       \e$BMxMQ8BEY3[$,L5@)8B$N%9\e(B 
+\e$B(&\e(B \e$B%@%$%J\e(B      \e$B%4\e(B 
+\e$B(&\e(B \e$B%*%j%3%"%W%F%#\e(BiD \e$BG/2qHqL5NA\e(B       \e$B?M5$%3%9%a$,Ev$?$kG/2qHqL5NA$N=w@-8~$1\e(BJCB\e$B%+\e(B 
+\e$B(&\e(B JCB \e$B%j%s%@%+\e(B \e$BG/2qHqL5NA\e(B       \e$B7HBSBe$,:GBg#6#0!s3d0z$5$l$kG/2qHqL5NA%+\e(B 
+\e$B(&\e(B NTT\e$B%0%k\e(B \e$BG/2qHqL5NA\e(B       \e$B%-%c%C%7%s%0$rDc6bMx$G$G$-$k%+\e(B 
+\e$B(&\e(B \e$B$5$/$i\e(B JCB\e$B%$\e(B \e$BG/2qHqL5NA\e(B \e$B#3K|1_$^$G$J$iG/Mx#1!s!*\e(B
+      \e$B%,%=%j%sBe$,%-%c%C%7%e%P%C%/$5$l!"3$30J]81\e(B 
+\e$B(&\e(B  \e$B=iG/EYG/2qHqL5NA\e(B       \e$B1G2h!&%3%s%5\e(B 
+\e$B(&\e(B \e$B$T$"%+\e(B \e$B=iG/EYG/2qHqL5NA\e(B       \e$B0YBX%l\e(B 
+\e$B(&\e(B SBI\e$B%+\e(B       \e$B%H%i%Y%i\e(B.@Z 
+\e$B(&\e(B \e$B%7%F%#\e(B \e$B%(%j\e(B       \e$B%,%=%j%sBe!"9bB.F;O)NA6b$r%-%c%C%7%e%P%C%/\e(B 
+\e$B(&\e(B JCB\e$B%4\e(B       \e$B%m\e(B 
+\e$B(&\e(B \e$B%I%s!&%-%[\e(B(VISA\e$B!"\e(BJCB\e$B$N$_\e(B)
+      FC\e$B%P%m%;%m%J!"\e(BAC\e$B%_%i%s\e(B \e$B%5%C%+\e(B 
+\e$B('\e(B AC\e$B%_%i%s%+\e(B
+\e$B('\e(B AC\e$B%_%i%s%+\e(B
+\e$B(&\e(B FC\e$B%P%k%;%m%J%*%U%#%7%c%k3ZE7%+\e(B       \e$B1G2h$r1G2h4[$G3d0zNA6b$G8+$k!*\e(B 
+\e$B('\e(B \e$B1G2h$r3d0zNA6b$G8+$l$k%/%l%8%C%H%+\e(B
+\e$B(&\e(B \e$B1G2h$r3d0zNA6b$G8+$l$k%/%l%8%C%H%+\e(BF|K\JT)       \e$B%$%s%?\e(B 
+\e$B(&\e(B \e$B%=%K=iG/EYG/2qHqL5NA\e(B
+\e$B#1G/$K#1EY$N7h:Q$GG/2qHqL5NA\e(B
+      \e$BI42_E9$G:GBg#5!s3d0z\e(B 
+\e$B(&\e(B \e$BEl5^%[%F%k%:%3%s%U%)\e(B \e$BG/2qHqL5NA\e(B       \e$BN99TA0$K$f$C$?$jM%2m$J5$J,!*6u9A%i%&%s%8$rL5NA$GMxMQ\e(B 
+\e$B('\e(B  \e$B%@%$%J\e(B
+\e$B('\e(B \e$B%7%F%#\e(B \e$B%4\e(B
+\e$B('\e(B \e$B%N\e(B 
+\e$B('\e(B \e$B%7%F%#\e(B \e$B%(%j\e(B
+\e$B('\e(B \e$B%"%a%j%+%s!&%(%-%9%W%l%9!&%+\e(B
+\e$B('\e(B \e$B%"%a%j%+%s!&%(%-%9%W%l%9!&%4\e(B 
+\e$B('\e(B \e$B%"%a%j%+%s!&%(%-%9%W%l%9!&%S%8%M%9%+\e(B
+\e$B('\e(B UFJ \e$B%4\e(B
+\e$B('\e(B UFJ \e$B%d%s%0%4\e(B 
+\e$B('\e(B DC\e$B%+\e(B
+\e$B('\e(B \e$BEl5^\e(BTOP\e$B%4\e(B(\e$B0lItM-NA$"$j\e(B)
+\e$B('\e(B 
+\e$B(&\e(B \e$B3ZE7%W%l%_%"%`%+\e(B       \e$BM%BT%5\e(B 
+\e$B(&\e(B JCB\e$B%+\e(B \e$B=iG/EYG/2qHqL5NA\e(B       \e$B7c0B$NEBF2%I%s!&%-%[\e(B 
+\e$B(&\e(B \e$B%I%s%-!&%[\e(B \e$BG/2qHqL5NA\e(B        \e$B?3::$N$$$i$J$$\e(BVISA\e$B%G%S%C%H%+\e(B 
+\e$B('\e(B H.I.S.\e$B%o\e(B \e$BG/2qHqL5NA\e(B
+\e$B('\e(B KNT\e$B%H%i%Y%k%-%c%C%7%e%+\e(B \e$BG/2qHqL5NA\e(B
+\e$B(&\e(B \e$B%9%k%,6d9T\e(BVISA\e$B%G%S%C%H%+\e(B \e$BG/2qHqL5NA\e(B       \e$B8x6&NA6b$b3d0z$G$-$k%+\e(B 
+\e$B(&\e(B P-one\e$B%+\e(B \e$BG/2qHqL5NA\e(B       \e$B2?$G$b#1!s3d0z$5$l$k$*F@$J%+\e(B 
+\e$B(&\e(B P-one\e$B%+\e(B \e$BG/2qHqL5NA\e(B      <\e$B%i%$%U%9%?%$%kJL\e(B>    \e$B%3%s%S%K$G$*F@!uJXMx$J%+\e(B 
+\e$B(&\e(B UCS \e$B%+%k%o%6%+\e(B(\e$B%5G/2qHqL5NA\e(B       \e$B%9#5!s!A#1#5!s3d0z$G$*Gc$$J*"v\e(B 
+\e$B('\e(B \e$B@>M'\e(B
+\e$B('\e(B \e$B%$%H\e(B
+\e$B('\e(B \e$B%(%9%Q\e(B 
+\e$B('\e(B \e$B%f%K\e(B
+\e$B('\e(B \e$B%"%T%?\e(B
+\e$B(&\e(B \e$B%f\e(B       \e$BEl5^@~$GDL6P$7$F$$$k?M$N$?$a$N%+\e(B 
+\e$B(&\e(B \e$BEl5^%]%$%s%H%+\e(B \e$BG/2qHqL5NA\e(B       \e$B%$%s%?\e(B 
+\e$B('\e(B UFJ\e$B%K%3%9\e(B \e$B%S%"%=%+\e(B \e$BG/2qHqL5NA\e(B
+\e$B(&\e(B \e$B3ZE7%+\e(B \e$BG/2qHqL5NA\e(B    <\e$B5!G=JL\e(B>    \e$BEE;R%^%M\e(B 
+\e$B('\e(B UCS \e$B%+%k%o%6%+\e(B \e$BG/2qHqL5NA\e(B
+\e$B('\e(B \e$B%M%/%9%3CfF|K\\e(B \e$B%W%l%_%"%`%I%i%$%P\e(B \e$B=iG/EYG/2qHqL5NA\e(B
+\e$B#1G/$K#1EY$N7h:Q$GG/2qHqL5NA\e(B 
+\e$B('\e(B \e$B%=%K\e(B
+\e$B(&\e(B \e$B%=%K\e(B \e$B=iG/EYG/2qHqL5NA\e(B
+\e$B#1G/$K#1EY$N7h:Q$GG/2qHqL5NA\e(B
+   <\e$BCO0hJL\e(B> <\e$B9qFbJT\e(B>   \e$BEl5~ET$K$*=;$^$$$NJ}$,F@$9$k%+\e(B 
+\e$B(&\e(B \e$BEl5^%+\e(B \e$BG/2qHqL5NA\e(B       \e$B0&CN8)!&4tIl8)!&D9Ln8)$K$*=;$^$$$NJ}$,F@$9$k%+\e(B 
+\e$B(&\e(B UCS \e$B%+%k%o%6%+\e(B \e$BG/2qHqL5NA\e(B       \e$BCO0hJL\e(B \e$B$*F@$J\e(BETC\e$B%+\e(B 
+\e$B(&\e(B \e$BElF|K\%$\e(B
+\e$B#1G/$K#1EY$N7h:Q$GG/2qHqL5NA\e(B
+    <\e$B3$30JT\e(B>   \e$B%O%o%$!&%0%"%`!&%"%a%j%+K\EZ$N$*E9$GM%BT!"3d0z$,B?$$%+\e(B 
+\e$B('\e(B \e$B%K%3%9%+\e(B(\e$B%/=iG/EYG/2qHqL5NA\e(B
+\e$B(&\e(B UFJ\e$B%+\e(B(\e$B%/=iG/EYG/2qHqL5NA\e(B       \e$B3$30$N\e(BATM\e$B$G8=CO$N$*6b$r0z$-=P$;$k%+\e(B 
+\e$B('\e(B \e$B%9%k%,6d9T\e(BVISA\e$B%G%S%C%H%+\e(B \e$BG/2qHqL5NA\e(B
+\e$B('\e(B H.I.S.\e$B%o\e(B \e$BG/2qHqL5NA\e(B
+\e$B(&\e(B KNT\e$B%H%i%Y%k%-%c%C%7%e%+\e(B \e$BG/2qHqL5NA\e(B      <\e$BG/NpJL\e(B>    \e$B#1#5:P$+$i;}$F$k\e(BVISA\e$B%G%S%C%H%+\e(B 
+\e$B('\e(B \e$B%9%k%,6d9T\e(BVISA\e$B%G%S%C%H%+\e(B \e$BG/2qHqL5NA\e(B
+\e$B('\e(B H.I.S.\e$B%o\e(B \e$BG/2qHqL5NA\e(B
+\e$B(&\e(B KNT\e$B%H%i%Y%k%-%c%C%7%e%+\e(B \e$BG/2qHqL5NA\e(B                                    \e$BA0$X!'%/%l%8%C%H%+\e(B TOP\e$B!'%/%l%8%C%H%+\e(B                                  ?N???W?b?g?J?[?h?\e(IL\e(B?m?\e(I/\e(B   ?E ?N???W?b?g?J?[?h?\e(IL\e(B?\e(I)\e(B?\e(I_\e(B?I   ?E ?N???W?b?g?J?[?h?\e(IM\e(B?M?p?\e(IL\e(B?\e(IX\e(B?H   ?E ?N???W?b?g?J?[?h?\e(IL\e(B?d?g?\e(I]\e(B   ?E ?N???W?b?g?J?[?h?\e(IL\e(B?@?\??????   ?E ?N???W?b?g?J?[?h?\e(IL\e(B???\e(I^\e(B   ?E ?N???W?b?g?J?[?h?\e(IL\e(B?????N   ?E ?N???W?b?g?J?[?h?\e(IL\e(B?\e(I)\e(B??   ?E IC?`?b?v?????N???W?b?g?J?[?h   ?E ?J?[?h?\e(IL\e(B?x?\e(I%\e(B???@?\e(IL\e(B???\e(I^\e(B   ?E ?N???W?b?g?J?[?h?u?\e(I4\e(B?v???p?p?I       ?N???W?b?g?J?[?h?\e(IL\e(B?\e(I[\e(B?\e(I/\e(B   ?E ?N???W?b?g?J?[?h?\e(IL\e(B?t?\e(IQ\e(B?\e(I[\e(B?\e(I/\e(B   ?E ?C?O?\e(I7\e(B?s?\e(I[\e(B?\e(I/\e(B?\e(IF\e(B?\e(IM\e(B?H   ?E ?C?O?\e(I7\e(B?s?\e(I[\e(B?\e(I/\e(B?\e(IL\e(B???\e(IS\e(B?_   ?E ?C?O?\e(I7\e(B?s?\e(I[\e(B?\e(I/\e(B?\e(IL\e(B???{?p??   ?E ?C?O?\e(I7\e(B?s?\e(I[\e(B?\e(I/\e(B?\e(IL\e(B?e?N?j?b?N   ?E ?\e(I7\e(B?s?\e(IL\e(B?\e(IE\e(B?\e(I-\e(B?N???W?b?g?J?[?h       ?\e(IT\e(B?E?K?\?????????\e(I_\e(B??   ?E ?K?\?????????\e(I_\e(B???\e(IE\e(B?\e(I+\e(B???J?[?h   ?E ???[?h?T?[?r?X?t?\e(I+\e(B?J?[?h   ?E ?K?\?????? ?L???b?V???o?b?N   ?E ?V?[?g?x???g???Q?\e(I[\e(B?\e(I/\e(B?J?[?h   ?E ?\e(IT\e(B?\e(IL\e(B?\e(IE\e(B?\e(I-\e(B?N???W?b?g?J?[?h       ?\e(IA\e(B?T?E?????\e(I)\e(B???I?\e(IT\e(B   ?E ?|?C???g?\e(IR\e(B?\e(I3\e(B?\e(I&\e(BUP?J?[?h   ?E ?A?j???E?L?????N?^?[?n?J?[?h   ?E ?????\e(IA\e(B?T?t?\e(I+\e(B?N???W?b?g?J?[?h       ?N???W?b?g?J?[?h?\e(IL\e(B????   ?E ?N???W?b?g?J?[?h?\e(IF\e(B?\e(I_\e(B?\e(IN\e(B??   ?E ?v???p?[?J?[?h???????r   ?E ?v???p?[?J?[?h?\e(I[\e(B?\e(I/\e(B???r   ?E ?N?????\e(I3\e(B?\e(I?\e(B?J?[?h?\e(II\e(B???\e(IS\e(B?H   ?E ?S?[???h?J?[?h?\e(IL\e(B?????b?g
+      ?@ [?\e(I7\e(B?s?\e(IR\e(B?E?V???b?s???O?\e(IR\e(B?E???`]   ?E ?N???W?b?g?J?[?h?p???\e(I+\e(B?T   ?E ?\e(I(\e(B?\e(I>\e(B?\e(IH\e(B?J?[?h?I?\e(IQ\e(B?t???\e(I+\e(B?\e(I+\e(B?T        ?N???W?b?g?J?[?h?\e(I?\e(B???\e(IF\e(B????   ?E ?N???W?b?g?J?[?h???\e(I-\e(B?????\e(I?\e(B??????   ?E ?N???W?b?g?J?[?h ?\???\e(I]\e(B?\e(IR\e(B   ?E ?N???W?b?g?J?[?h ?R?\e(I8\e(B?\e(IR\e(B   ?E ?N???W?b?g?J?[?h ?V???b?s???O?\e(IR\e(B   ?E ?N???W?b?g?J?[?h ?x?\e(I%\e(B???@?\e(IR\e(B   ?E ?N???W?b?g?J?[?h ?????\e(IR\e(B   ?E ?N???W?b?g?J?[?h ?g???u???\e(IR\e(B         ?\e(I(\e(B?\e(I)\e(B?\e(I_\e(B?T?C?g????    ?E ?\e(IX\e(B?????\e(IP\e(B(?l?b?g?\e(IX\e(B??) ???r     ?E FX(?O???\e(IW\e(B?\e(IV\e(B?\e(IX\e(B????????)    ?E ?\e(I;\e(B???v???[???g?L?????y?[??   ?E ?l?b?g?o???N(?l?b?g???s) ???r   ?E ?Z?MSBI?l?b?g???s ???p?p   ?E ???\e(I/\e(B?N???E???\e(I6\e(B?N??????        ???T?C?g?\e(II\e(B?\e(IB\e(B?\e(I"\e(B?\e(ID\e(B   ?E ???T?C?g?\e(IV\e(B?\e(IL\e(B?????N?\e(II\e(B?\e(IB\e(B?\e(I"\e(B?\e(ID\e(B   ?E ???\e(I]\e(B?????N?\e(II\e(B?\e(IB\e(B?\e(I"\e(B?\e(ID\e(B???????N?W   ?E ?\e(IA\e(B???\e(I$\e(B?????@?\e(II\e(B???\e(IC\e(B?\e(I-\e(B?\?L   ?E ?\e(I(\e(B???\e(I"\e(B?????\e(I9\e(B?t?H?[??   ?E ?T?C?g?}?b?v 1 2 3 4 5 6 7 8             </title><meta http-equiv=Content-Type content="text/html; charset=iso-2022-jp"><meta http-equiv=Content-Type content="text/html; charset=Shift_JIS"><c>?N???W?b?g?J?[?h?\???\e(I]\e(B???r.com
\ No newline at end of file
diff --git a/mcs/class/I18N/CJK/Test/texts/japanese3-utf8.txt b/mcs/class/I18N/CJK/Test/texts/japanese3-utf8.txt
new file mode 100644 (file)
index 0000000..4326777
--- /dev/null
@@ -0,0 +1,108 @@
+<title>一番お得なカ?れのニ?            一番お得なカを選ぶための逆引き            
+   クレジットカ> 一番お得なカ     一番お得なカ            
+クレジットカの逆引き目的別、ライフスタイル別、機能別、地域別、年齢別に分かれており、それぞれのニ  
+クレジットカ     <目的別>    飛行機のマイルが貯まり、無料航空券がもらえるカ 
+├ 日本航空 JALカ 初年度年会費無料
+├ 大韓航空 JCBスカイパスカ 初年度年会費無料
+├ コンチネンタル航空 ワンパスJCBグアムカ 初年度年会費無料 
+├ ユナイテッド航空 マイレ 初年度年会費無料
+├ タイ航空 ロイヤルオ 初年度年会費無料
+├ シンガポクリスフライヤ 初年度年会費無料
+└ エフライング・ブル 初年度年会費無料   
+参考 : マイルとは?       ポイントが効率よく貯まるお得なカ 
+├ 楽天市場で2%、楽天カ 年会費無料
+├ UFJ ビアソカ 年会費無料
+├ ライフカ 年会費無料
+└ オリコアプティiD 年会費無料       海外旅行傷害保険が 
+├ ニコス ビアソカ 年会費無料
+├ オリコアプティiD 年会費無料
+├ DCカ 年会費無料 
+├ DCゴ カ
+├ DCゴ 初年度年会費無料
+├ DCカ 初年度年会費無料 
+├  初年度年会費無料
+└ ソニ 初年度年会費無料 1年に1度の決済で年会費無料
+      ポイントを現金化できるクレジットカ 
+├ P-oneカ 年会費無料
+├ UFJ ビアソカ 年会費無料
+├ NTTグル 年会費無料
+└ さくらJCBイ 年会費無料       全ての商品が1%割引されるス 
+└ P-oneカ 年会費無料       セゾン永久不滅.comでのお買い物でポイント最大19倍! 
+└ 《セゾン》カ       誕生日月はポイント5倍、貯まったポイントは商品券に交換 
+└ ライフカ 年会費無料       持っているだけで凄い!ステイタスカ 
+├ ダイナ
+├ アメリカン・エキスプレス・カ
+└ 《セゾン》プラチナ・アメリカン・エキスプレス・カ       利用限度額が無制限のス 
+└ ダイナ      ゴ 
+└ オリコアプティiD 年会費無料       人気コスメが当たる年会費無料の女性向けJCBカ 
+└ JCB リンダカ 年会費無料       携帯代が最大60%割引される年会費無料カ 
+└ NTTグル 年会費無料       キャッシングを低金利でできるカ 
+└ さくら JCBイ 年会費無料 3万円までなら年利1%!
+      ガソリン代がキャッシュバックされ、海外保険 
+└  初年度年会費無料       映画・コンサ 
+└ ぴあカ 初年度年会費無料       為替レ 
+└ SBIカ       トラベラ.@Z 
+└ シティ エリ       ガソリン代、高速道路料金をキャッシュバック 
+└ JCBゴ       ロ 
+└ ドン・キホ(VISA、JCBのみ)
+      FCバロセロナ、ACミラン サッカ 
+├ ACミランカ
+├ ACミランカ
+└ FCバルセロナオフィシャル楽天カ       映画を映画館で割引料金で見る! 
+├ 映画を割引料金で見れるクレジットカ
+└ 映画を割引料金で見れるクレジットカF|K\JT)       インタ 
+└ ソニ初年度年会費無料
+1年に1度の決済で年会費無料
+      百貨店で最大5%割引 
+└ 東急ホテルズコンフォ 年会費無料       旅行前にゆったり優雅な気分!空港ラウンジを無料で利用 
+├  ダイナ
+├ シティ ゴ
+├ ノ 
+├ シティ エリ
+├ アメリカン・エキスプレス・カ
+├ アメリカン・エキスプレス・ゴ 
+├ アメリカン・エキスプレス・ビジネスカ
+├ UFJ ゴ
+├ UFJ ヤングゴ 
+├ DCカ
+├ 東急TOPゴ(一部有料あり)
+├ 
+└ 楽天プレミアムカ       優待サ 
+└ JCBカ 初年度年会費無料       激安の殿堂ドン・キホ 
+└ ドンキ・ホ 年会費無料        審査のいらないVISAデビットカ 
+├ H.I.S.ワ 年会費無料
+├ KNTトラベルキャッシュカ 年会費無料
+└ スルガ銀行VISAデビットカ 年会費無料       公共料金も割引できるカ 
+└ P-oneカ 年会費無料       何でも1%割引されるお得なカ 
+└ P-oneカ 年会費無料      <ライフスタイル別>    コンビニでお得&便利なカ 
+└ UCS カルワザカ(サ年会費無料       ス5%~15%割引でお買い物♪ 
+├ 西友
+├ イト
+├ エスパ 
+├ ユニ
+├ アピタ
+└ ユ       東急線で通勤している人のためのカ 
+└ 東急ポイントカ 年会費無料       インタ 
+├ UFJニコス ビアソカ 年会費無料
+└ 楽天カ 年会費無料    <機能別>    電子マネ 
+├ UCS カルワザカ 年会費無料
+├ ネクスコ中日本 プレミアムドライバ 初年度年会費無料
+1年に1度の決済で年会費無料 
+├ ソニ
+└ ソニ 初年度年会費無料
+1年に1度の決済で年会費無料
+   <地域別> <国内編>   東京都にお住まいの方が得するカ 
+└ 東急カ 年会費無料       愛知県・岐阜県・長野県にお住まいの方が得するカ 
+└ UCS カルワザカ 年会費無料       地域別 お得なETCカ 
+└ 東日本イ
+1年に1度の決済で年会費無料
+    <海外編>   ハワイ・グアム・アメリカ本土のお店で優待、割引が多いカ 
+├ ニコスカ(ク初年度年会費無料
+└ UFJカ(ク初年度年会費無料       海外のATMで現地のお金を引き出せるカ 
+├ スルガ銀行VISAデビットカ 年会費無料
+├ H.I.S.ワ 年会費無料
+└ KNTトラベルキャッシュカ 年会費無料      <年齢別>    15歳から持てるVISAデビットカ 
+├ スルガ銀行VISAデビットカ 年会費無料
+├ H.I.S.ワ 年会費無料
+└ KNTトラベルキャッシュカ 年会費無料                                    前へ:クレジットカ TOP:クレジットカ                                  \83N\83\8c\83W\83b\83g\83J\81[\83h\82\92m\8eッ   \81\83N\83\8c\83W\83b\83g\83J\81[\83h\82\8a\82\81I   \81\83N\83\8c\83W\83b\83g\83J\81[\83h\82\90M\97p\82\8f\81H   \81\83N\83\8c\83W\83b\83g\83J\81[\83h\82\8ed\91g\82ン   \81\83N\83\8c\83W\83b\83g\83J\81[\83h\82\8b@\94\\82â\96ð\8a\84   \81\83N\83\8c\83W\83b\83g\83J\81[\83h\82\8eí\97゙   \81\83N\83\8c\83W\83b\83g\83J\81[\83h\82\83\89\83\93\83N   \81\83N\83\8c\83W\83b\83g\83J\81[\83h\82\8c\95û   \81E IC\83`\83b\83v\93à\91 \83N\83\8c\83W\83b\83g\83J\81[\83h   \81\83J\81[\83h\82\8ex\95\95û\96@\82\8eí\97゙   \81\83N\83\8c\83W\83b\83g\83J\81[\83h\81u\92\81v\8a\88\97p\8fp\81I       \83N\83\8c\83W\83b\83g\83J\81[\83h\82\95\8cッ   \81\83N\83\8c\83W\83b\83g\83J\81[\83h\82\95t\91\95\8cッ   \81\8aC\8aO\97\8ds\95\8c\82\82\81H   \81\8aC\8aO\97\8ds\95\8c\82\92\8d\88\93_   \81\8aC\8aO\97\8ds\95\8c\82\8aî\96{\97p\8cê   \81\8aC\8aO\97\8ds\95\8c\82\83e\83N\83j\83b\83N   \81\97\8ds\82\8d\8b\83N\83\8c\83W\83b\83g\83J\81[\83h       \8e\81E\83K\83\\83\8a\83\93\91ã\82ð\90\96ñ   \81\83K\83\\83\8a\83\93\91ã\82ð\90\96ñ\82\82\82é\83J\81[\83h   \81\83\8d\81[\83h\83T\81[\83r\83X\95t\82\83J\81[\83h   \81\83K\83\\83\8a\83\93\91ã \83L\83\83\83b\83V\83\85\83o\83b\83N   \81\83V\81[\83g\83x\83\8b\83g\8f\9d\8aQ\95\8c\83J\81[\83h   \81\8e\82\8d\8b\83N\83\8c\83W\83b\83g\83J\81[\83h       \93\93T\81E\8a\84\88ø\82\82ç\91I\82ヤ   \81\83|\83C\83\93\83g\8a\8c\97ヲUP\83J\81[\83h   \81\83A\83j\83\81\81E\83L\83\83\83\89\83N\83^\81[\8cn\83J\81[\83h   \81\8a\84\88ø\93\93T\95t\82\83N\83\8c\83W\83b\83g\83J\81[\83h       \83N\83\8c\83W\83b\83g\83J\81[\83h\82\8fî\95ñ   \81\83N\83\8c\83W\83b\83g\83J\81[\83h\94\8d\91\8dô   \81\83v\83\8d\83p\81[\83J\81[\83h\8bà\97\98\94ä\8ar   \81\83v\83\8d\83p\81[\83J\81[\83h\95\8c\94ä\8ar   \81\94N\89ï\94ï\96\97ソ\83J\81[\83h\82\92\8d\88\81H   \81\83S\81[\83\8b\83h\83J\81[\83h\82\83\81\83\8a\83b\83g
+      \81@ [\97\8ds\95\81E\83V\83\87\83b\83s\83\93\83O\95\81E\8bó\8d`]   \81\83N\83\8c\83W\83b\83g\83J\81[\83h\97p\8cê\8e\93T   \81\82\93\82\83J\81[\83h\91I\82\8bt\88ø\82\8e\93T        \83N\83\8c\83W\83b\83g\83J\81[\83h\8eソ\96â\82\89ñ\93\9a   \81\83N\83\8c\83W\83b\83g\83J\81[\83h\82æ\82\82 \82é\8eソ\96â\88ê\97\97   \81\83N\83\8c\83W\83b\83g\83J\81[\83\90\\8d\9e\82\95メ   \81\83N\83\8c\83W\83b\83g\83J\81[\83\90R\8d\95メ   \81\83N\83\8c\83W\83b\83g\83J\81[\83\83V\83\87\83b\83s\83\93\83O\95メ   \81\83N\83\8c\83W\83b\83g\83J\81[\83\8ex\95\95û\96@\95メ   \81\83N\83\8c\83W\83b\83g\83J\81[\83\89ð\96ñ\95メ   \81\83N\83\8c\83W\83b\83g\83J\81[\83\83g\83\89\83u\83\8b\95メ         \82\8a\82\83T\83C\83g\88ê\97\97    \81\8f\8c\94\89ï\8eミ(\83l\83b\83g\8f\8c\94\94ä\8ar     \81E FX(\8aO\8d\91\88\91\8f\8b\92\8bà\8eæ\88ø)    \81\8c\8bà\83v\83\8c\83[\83\93\83g\83L\83\83\83\93\83y\81[\83\93   \81\83l\83b\83g\83o\83\93\83N(\83l\83b\83g\8bâ\8ds) \94ä\8ar   \81\8fZ\90MSBI\83l\83b\83g\8bâ\8d\8a\88\97p\8fp   \81\8d\91\96\94N\8bà\81E\8cú\90\94N\8bà\93ü\96å        \93\96\83T\83C\83g\82\82\82\82ト   \81\93\96\83T\83C\83g\82\82\83\8a\83\93\83N\82\82\82\82ト   \81\91\8a\8c\83\8a\83\93\83N\82\82\82\82\81\95\83\8a\83\93\83N\8fW   \81\93\92è\8f\8eæ\88ø\96@\82\8aî\82\82\95\\8bL   \81\82\96â\82\8d\87\82í\82\83t\83H\81[\83\80   \81\83T\83C\83g\83}\83b\83v 1 2 3 4 5 6 7 8             </title><meta http-equiv=Content-Type content="text/html; charset=iso-2022-jp"><meta http-equiv=Content-Type content="text/html; charset=Shift_JIS"><c>\83N\83\8c\83W\83b\83g\83J\81[\83h\90\\8d\9e\82\94ä\8ar.com
\ No newline at end of file
index cca54208a686a855d36e56db71dd1165c32995b9..9c2a5c5afdaf1ccb9dab546112e3fff481e9944c 100644 (file)
@@ -47,6 +47,7 @@
    <Compile Include="Handlers.cs" />
    <Compile Include="Manager.cs" />
    <Compile Include="MonoEncoding.cs" />
+   <Compile Include="MonoSafeEncoding.cs" />
    <Compile Include="Strings.cs" />
 \r
   </ItemGroup>\r
index e857e0ab846a2d28e4f78c005c5f42890f68f39a..a66d93ae34a033b18179e2c99433afeac1e81b97 100644 (file)
@@ -47,6 +47,7 @@
    <Compile Include="Handlers.cs" />
    <Compile Include="Manager.cs" />
    <Compile Include="MonoEncoding.cs" />
+   <Compile Include="MonoSafeEncoding.cs" />
    <Compile Include="Strings.cs" />
 \r
   </ItemGroup>\r
index 260db770761e7d28de9b59fcdf13c6e209d12adb..38ba7ed816a20d82e61e70bb4ac46a8f7b26d7c7 100755 (executable)
@@ -4,4 +4,5 @@ ByteEncoding.cs
 Handlers.cs
 Manager.cs
 MonoEncoding.cs
+MonoSafeEncoding.cs
 Strings.cs
index a4f70e8530d9693f069ee83a965e7cf1825dda91..6807fc05d3892b644dec15cdf323c42d1bd6028a 100644 (file)
@@ -33,14 +33,34 @@ namespace I18N.Common
                }
 
 #if NET_2_0
+               /// <summary>
+               /// GetBytes method used internally by state-full encoders/encodings.
+               /// </summary>
+               /// <param name="chars">The chars.</param>
+               /// <param name="charIndex">Index of the char.</param>
+               /// <param name="charCount">The char count.</param>
+               /// <param name="bytes">The bytes.</param>
+               /// <param name="byteIndex">Index of the byte.</param>
+               /// <param name="flush">if set to <c>true</c> [flush].</param>
+               /// <param name="encoding">The encoding class to use (or null if state-less).</param>
+               /// <returns></returns>
+               /// <remarks>
+               /// Only state-full encoders need to implement this method (ie. ISO-2022-JP)
+               /// </remarks>
+               protected unsafe virtual int GetBytesInternal(char *chars, int charCount,
+                               byte *bytes, int byteCount, bool flush, object state)
+               {
+                       throw new NotImplementedException("Statefull encoding is not implemented (yet?) by this encoding class.");
+               }
+
                public unsafe void HandleFallback (ref EncoderFallbackBuffer buffer,
                        char* chars, ref int charIndex, ref int charCount,
-                       byte* bytes, ref int byteIndex, ref int byteCount)
+                       byte* bytes, ref int byteIndex, ref int byteCount, object state)
                {
                        if (buffer == null)
                                buffer = EncoderFallback.CreateFallbackBuffer ();
-                       if (Char.IsSurrogate (chars [charIndex]) && charCount > 0 &&
-                               Char.IsSurrogate (chars [charIndex + 1])) {
+
+                       if (charCount > 1 && (Char.IsSurrogate (chars [charIndex]) && Char.IsSurrogate (chars [charIndex + 1]))) {
                                buffer.Fallback (chars [charIndex], chars [charIndex + 1], charIndex);
                                charIndex++;
                                charCount--;
@@ -51,10 +71,25 @@ namespace I18N.Common
                        int idx = 0;
                        while (buffer.Remaining > 0)
                                tmp [idx++] = buffer.GetNextChar ();
+
                        fixed (char* tmparr = tmp) {
-                               byteIndex += GetBytes (tmparr, tmp.Length, bytes + byteIndex, byteCount);
+                               var outbytes = bytes == null ? null : bytes + byteIndex;
+                               var len = state == null ?
+                                       GetBytes(tmparr, tmp.Length, outbytes, byteCount)
+                                       : GetBytesInternal(tmparr, tmp.Length, outbytes, byteCount, true, state);
+
+                               byteIndex += len;
+                               byteCount -= len;
                        }
                }
+
+               public unsafe void HandleFallback (ref EncoderFallbackBuffer buffer,
+                       char* chars, ref int charIndex, ref int charCount,
+                       byte* bytes, ref int byteIndex, ref int byteCount)
+               {
+                       HandleFallback(ref buffer, chars, ref charIndex, ref charCount,
+                               bytes, ref byteIndex, ref byteCount, null);
+               }
 #endif
 
                // Get the bytes that result from encoding a character buffer.
@@ -262,13 +297,21 @@ namespace I18N.Common
 
                        public unsafe void HandleFallback (
                                char* chars, ref int charIndex, ref int charCount,
-                               byte* bytes, ref int byteIndex, ref int byteCount)
+                               byte* bytes, ref int byteIndex, ref int byteCount, object state)
                        {
                                EncoderFallbackBuffer buffer = FallbackBuffer;
                                encoding.HandleFallback (ref buffer,
                                        chars, ref charIndex, ref charCount,
-                                       bytes, ref byteIndex, ref byteCount);
+                                       bytes, ref byteIndex, ref byteCount, state);
                        }
+
+/*                     public unsafe void HandleFallback(
+                               char* chars, ref int charIndex, ref int charCount,
+                               byte* bytes, ref int byteIndex, ref int byteCount)
+                       {
+                               HandleFallback(chars, ref charIndex, ref charCount,
+                                       bytes, ref byteIndex, ref byteCount, null);
+                       }*/
                #endif
                }
 }
diff --git a/mcs/class/I18N/Common/MonoSafeEncoding.cs b/mcs/class/I18N/Common/MonoSafeEncoding.cs
new file mode 100644 (file)
index 0000000..9b970a5
--- /dev/null
@@ -0,0 +1,112 @@
+//
+// MonoEncoding.cs
+//
+// Author:
+//     Atsushi Enomoto <atsushi@ximian.com>
+//  Pablo Ruiz García <pruiz@netway.org>
+//
+// Copyright (C) 2005 Novell, Inc.  http://www.novell.com
+// Copyright (C) 2011 Pablo Ruiz García
+//
+using System;
+using System.Runtime.InteropServices;
+using System.Text;
+
+namespace I18N.Common
+{
+#if DISABLE_UNSAFE
+       [Serializable]
+       public abstract class MonoSafeEncoding : Encoding
+       {
+               readonly int win_code_page;
+
+               public MonoSafeEncoding (int codePage)
+                       : this (codePage, 0)
+               {
+               }
+
+               public MonoSafeEncoding(int codePage, int windowsCodePage)
+                       : base (codePage)
+               { 
+                       win_code_page = windowsCodePage;
+               }
+
+               public override int WindowsCodePage {
+                       get { return win_code_page != 0 ? win_code_page : base.WindowsCodePage; }
+               }
+
+               /// <summary>
+               /// GetBytes method used internally by state-full encoders/encodings.
+               /// </summary>
+               /// <param name="chars">The chars.</param>
+               /// <param name="charIndex">Index of the char.</param>
+               /// <param name="charCount">The char count.</param>
+               /// <param name="bytes">The bytes.</param>
+               /// <param name="byteIndex">Index of the byte.</param>
+               /// <param name="flush">if set to <c>true</c> [flush].</param>
+               /// <param name="encoding">The encoding class to use (or null if state-less).</param>
+               /// <returns></returns>
+               /// <remarks>
+               /// Only state-full encoders need to implement this method (ie. ISO-2022-JP)
+               /// </remarks>
+               protected virtual int GetBytesInternal(char[] chars, int charIndex, int charCount, 
+                       byte[] bytes, int byteIndex, bool flush, object state)
+               {
+                       throw new NotImplementedException("Statefull encoding is not implemented (yet?) by this encoding class.");
+               }
+
+               public void HandleFallback(ref EncoderFallbackBuffer buffer,
+                       char[] chars, ref int charIndex, ref int charCount,
+                       byte[] bytes, ref int byteIndex, ref int byteCount, object state)
+               {
+                       if (buffer == null)
+                               buffer = EncoderFallback.CreateFallbackBuffer();
+
+                       // THIS IS WERE THE BUG IS!! (pruiz)
+                       if (charCount > 1 && (Char.IsSurrogate(chars[charIndex]) && Char.IsSurrogate(chars[charIndex + 1])))
+                       {
+                               buffer.Fallback (chars[charIndex], chars[charIndex + 1], charIndex);
+                               charIndex++;
+                               charCount--;
+                       }
+                       else
+                               buffer.Fallback (chars[charIndex], charIndex);
+
+                       char[] tmp = new char[buffer.Remaining];
+                       int idx = 0;
+                       while (buffer.Remaining > 0)
+                               tmp[idx++] = buffer.GetNextChar();
+
+                       var len = state == null ?
+                               GetBytes(tmp, 0, tmp.Length, bytes, byteIndex)
+                               : GetBytesInternal(tmp, 0, tmp.Length, bytes, byteIndex, true, state);
+                       byteIndex += len;
+                       byteCount -= len;
+               }
+
+       }
+
+               public abstract class MonoSafeEncoder : Encoder
+               {
+#if NET_2_0
+                       MonoSafeEncoding encoding;
+#endif
+
+                       public MonoSafeEncoder (MonoSafeEncoding encoding)
+                       {
+#if NET_2_0
+                               this.encoding = encoding;
+#endif
+                       }
+
+                       public void HandleFallback(
+                               char[] chars, ref int charIndex, ref int charCount,
+                               byte[] bytes, ref int byteIndex, ref int byteCount, object state)
+                       {
+                               EncoderFallbackBuffer buffer = FallbackBuffer;
+                               encoding.HandleFallback(ref buffer, chars, ref charIndex, ref charCount,
+                                       bytes, ref byteIndex, ref byteCount, state);
+                       }
+               }
+#endif
+}