Merge pull request #3962 from mkorkalo/fix_MonoBtlsContext_memory_leak
[mono.git] / mcs / class / I18N / CJK / GB18030Encoding.cs
index 4ecf2cb39546f1e7ca5a6c050ead8c514d6d9628..f62121e81f042353e77e6952e5f6df720654cf65 100644 (file)
@@ -7,21 +7,30 @@
 using System;
 using System.Reflection;
 using System.Text;
+using I18N.Common;
+
+#if DISABLE_UNSAFE
+using MonoEncoder = I18N.Common.MonoSafeEncoder;
+using MonoEncoding = I18N.Common.MonoSafeEncoding;
+#endif
 
 namespace I18N.CJK
 {
+       [Serializable]
        internal class ENCgb18030 : GB18030Encoding
        {
                public ENCgb18030 (): base () {}
        }
 
+       [Serializable]
        public class CP54936 : GB18030Encoding { }
 
-       public class GB18030Encoding : Encoding
+       [Serializable]
+       public class GB18030Encoding : MonoEncoding
        {
                // Constructor.
                public GB18030Encoding ()
-                       : base (54936)
+                       : base (54936, 936)
                {
                }
 
@@ -29,10 +38,34 @@ namespace I18N.CJK
                        get { return "Chinese Simplified (GB18030)"; }
                }
 
+               public override string HeaderName {
+                       get { return "GB18030"; }
+               }
+
+               public override string BodyName {
+                       get { return "GB18030"; }
+               }
+
                public override string WebName {
                        get { return "GB18030"; }
                }
 
+               public override bool IsMailNewsDisplay {
+                       get { return true; }
+               }
+
+               public override bool IsMailNewsSave {
+                       get { return true; }
+               }
+
+               public override bool IsBrowserDisplay {
+                       get { return true; }
+               }
+
+               public override bool IsBrowserSave {
+                       get { return true; }
+               }
+
                public override int GetMaxByteCount (int len)
                {
                        // non-GB2312 characters in \u0080 - \uFFFF
@@ -44,15 +77,27 @@ namespace I18N.CJK
                        return len;
                }
 
+#if !DISABLE_UNSAFE
+               public unsafe override int GetByteCountImpl (char* chars, int count)
+               {
+                       return new GB18030Encoder (this).GetByteCountImpl (chars, count, true);
+               }
+
+               public unsafe override int GetBytesImpl (char* chars, int charCount, byte* bytes, int byteCount)
+               {
+                       return new GB18030Encoder (this).GetBytesImpl (chars, charCount, bytes, byteCount, true);
+               }
+#else
                public override int GetByteCount (char [] chars, int index, int length)
                {
-                       return new GB18030Encoder ().GetByteCount (chars, index, length, true);
+                       return new GB18030Encoder (this).GetByteCount (chars, index, length, true);
                }
 
-               public override int GetBytes (char [] chars, int charIdx, int srclen, byte [] bytes, int byteIdx)
+               public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
                {
-                       return new GB18030Encoder ().GetBytes (chars, charIdx, srclen, bytes, byteIdx, true);
+                       return new GB18030Encoder (this).GetBytes (chars, charIndex, charCount, bytes, byteIndex, true);
                }
+#endif
 
                public override int GetCharCount (byte [] bytes, int start, int len)
                {
@@ -63,22 +108,32 @@ namespace I18N.CJK
                {
                        return new GB18030Decoder ().GetChars (bytes, byteIdx, srclen, chars, charIdx);
                }
+
+               public override Encoder GetEncoder ()
+               {
+                       return new GB18030Encoder (this);
+               }
+
+               public override Decoder GetDecoder ()
+               {
+                       return new GB18030Decoder ();
+               }
        }
 
-       class GB18030Decoder : Decoder
+       class GB18030Decoder : DbcsEncoding.DbcsDecoder
        {
-               Gb2312Convert gb2312 = Gb2312Convert.Convert;
+               static DbcsConvert gb2312 = DbcsConvert.Gb2312;
                // for now incomplete block is not supported - should we?
                // int incomplete1 = -1, incomplete2 = -1, incomplete3 = -1;
 
+               public GB18030Decoder ()
+                       : base (null)
+               {
+               }
+
                public override int GetCharCount (byte [] bytes, int start, int len)
                {
-                       if (bytes == null)
-                               throw new ArgumentNullException ("bytes");
-                       if (start < 0 || start > bytes.Length)
-                               throw new ArgumentOutOfRangeException ("start");
-                       if (len < 0 || start + len > bytes.Length)
-                               throw new ArgumentOutOfRangeException ("len");
+                       CheckRange (bytes, start, len);
 
                        int end = start + len;
                        int ret = 0;
@@ -152,16 +207,7 @@ namespace I18N.CJK
 
                public override int GetChars (byte [] bytes, int byteIndex, int byteCount, char [] chars, int charIndex)
                {
-                       if (bytes == null)
-                               throw new ArgumentNullException ("bytes");
-                       if (chars == null)
-                               throw new ArgumentNullException ("chars");
-                       if (byteIndex < 0 || byteIndex > bytes.Length)
-                               throw new ArgumentOutOfRangeException ("byteIndex");
-                       if (byteCount < 0 || byteIndex + byteCount > bytes.Length)
-                               throw new ArgumentOutOfRangeException ("byteCount");
-                       if (charIndex < 0 || charIndex > chars.Length)
-                               throw new ArgumentOutOfRangeException ("charIndex");
+                       CheckRange (bytes, byteIndex, byteCount, chars, charIndex);
 
                        int byteEnd = byteIndex + byteCount;
                        int charStart = charIndex;
@@ -224,12 +270,15 @@ namespace I18N.CJK
                                                byteIndex += 4;
                                        }
                                } else {
-                                       // GB2312 mapping, or invalid.
-                                       // ('second' is always valid here).
-                                       int head = bytes [byteIndex];
-                                       char c = gb2312.BytePairToChar (ref head, second);
+                                       byte first = bytes [byteIndex];
+                                       int ord = ((first - 0x81) * 191 + second - 0x40) * 2;
+                                       char c1 = ord < 0 || ord >= gb2312.n2u.Length ?
+                                               '\0' : (char) (gb2312.n2u [ord] + gb2312.n2u [ord + 1] * 256);
+                                       if (c1 == 0)
+                                               chars [charIndex++] = '?';
+                                       else
+                                               chars [charIndex++] = c1;
                                        byteIndex += 2;
-                                       chars [charIndex++] = c == char.MinValue ? '?' : c;
                                }
                        }
 
@@ -237,24 +286,23 @@ namespace I18N.CJK
                }
        }
 
-       class GB18030Encoder : Encoder
+       class GB18030Encoder : MonoEncoder
        {
-               Gb2312Convert gb2312 = Gb2312Convert.Convert;
-               char incomplete;
+               static DbcsConvert gb2312 = DbcsConvert.Gb2312;
 
-               public override int GetByteCount (char [] chars, int start, int len, bool refresh)
+               public GB18030Encoder (MonoEncoding owner)
+                       : base (owner)
                {
-                       if (refresh)
-                               incomplete = char.MinValue;
+               }
 
-                       if (chars == null)
-                               throw new ArgumentNullException ("chars");
-                       if (start < 0 || start > chars.Length)
-                               throw new ArgumentOutOfRangeException ("index");
-                       if (len < 0 || start + len > chars.Length)
-                               throw new ArgumentOutOfRangeException ("count");
+               char incomplete_byte_count;
+               char incomplete_bytes;
 
-                       int end = start + len;
+#if !DISABLE_UNSAFE
+               public unsafe override int GetByteCountImpl (char* chars, int count, bool refresh)
+               {
+                       int start = 0;
+                       int end = count;
                        int ret = 0;
                        while (start < end) {
                                char ch = chars [start];
@@ -265,10 +313,13 @@ namespace I18N.CJK
                                        continue;
                                } else if (Char.IsSurrogate (ch)) {
                                        // Surrogate
-                                       if (start + 1 == end)
-                                               break; // incomplete
-                                       ret += 4;
-                                       start += 2;
+                                       if (start + 1 == end) {
+                                               incomplete_byte_count = ch;
+                                               start++;
+                                       } else {
+                                               ret += 4;
+                                               start += 2;
+                                       }
                                        continue;
                                }
 
@@ -278,8 +329,10 @@ namespace I18N.CJK
                                        start++;
                                        continue;
                                }
-                               long value = gb2312.UcsToGbk (ch);
-                               if (value != 0) {
+
+                               byte b1 = gb2312.u2n [((int) ch) * 2 + 1];
+                               byte b2 = gb2312.u2n [((int) ch) * 2];
+                               if (b1 != 0 && b2 != 0) {
                                        // GB2312
                                        ret += 2;
                                        start++;
@@ -287,34 +340,36 @@ namespace I18N.CJK
                                }
 
                                // non-GB2312
-                               ret += 4;
+                               long value = GB18030Source.FromUCS (ch);
+                               if (value < 0)
+                                       ret++; // invalid(?)
+                               else
+                                       ret += 4;
                                start++;
                        }
+
+                       if (refresh) {
+                               if (incomplete_byte_count != char.MinValue)
+                                       ret++;
+                               incomplete_byte_count = char.MinValue;
+                       }
                        return ret;
                }
 
-               public override int GetBytes (char [] chars, int charIndex, int charCount, byte [] bytes, int byteIndex, bool refresh)
+               public unsafe override int GetBytesImpl (char* chars, int charCount, byte* bytes, int byteCount, bool refresh)
                {
-                       if (chars == null)
-                               throw new ArgumentNullException ("chars");
-                       if (bytes == null)
-                               throw new ArgumentNullException ("bytes");
-                       if (charIndex < 0 || charIndex > chars.Length)
-                               throw new ArgumentOutOfRangeException ("charIndex");
-                       if (charCount < 0 || charIndex + charCount > chars.Length)
-                               throw new ArgumentOutOfRangeException ("charCount");
-                       if (byteIndex < 0 || byteIndex > bytes.Length)
-                               throw new ArgumentOutOfRangeException ("byteIndex");
+                       int charIndex = 0;
+                       int byteIndex = 0;
 
                        int charEnd = charIndex + charCount;
                        int byteStart = byteIndex;
-                       char ch = incomplete;
+                       char ch = incomplete_bytes;
 
                        while (charIndex < charEnd) {
-                               if (incomplete == char.MinValue)
+                               if (incomplete_bytes == char.MinValue)
                                        ch = chars [charIndex++];
                                else
-                                       incomplete = char.MinValue;
+                                       incomplete_bytes = char.MinValue;
 
                                if (ch < 0x80) {
                                        // ASCII
@@ -323,17 +378,19 @@ namespace I18N.CJK
                                } else if (Char.IsSurrogate (ch)) {
                                        // Surrogate
                                        if (charIndex == charEnd) {
-                                               incomplete = ch;
+                                               incomplete_bytes = ch;
                                                break; // incomplete
                                        }
                                        char ch2 = chars [charIndex++];
                                        if (!Char.IsSurrogate (ch2)) {
                                                // invalid surrogate
-                                               bytes [byteIndex++] = (byte) '?';
+                                               HandleFallback (
+                                                       chars, ref charIndex, ref charCount,
+                                                       bytes, ref byteIndex, ref byteCount, null);
                                                continue;
                                        }
                                        int cp = (ch - 0xD800) * 0x400 + ch2 - 0xDC00;
-                                       GB18030Source.Unlinear (bytes, byteIndex, GB18030Source.FromUCSSurrogate (cp));
+                                       GB18030Source.Unlinear (bytes + byteIndex, GB18030Source.FromUCSSurrogate (cp));
                                        byteIndex += 4;
                                        continue;
                                }
@@ -345,19 +402,180 @@ namespace I18N.CJK
                                        continue;
                                }
 
-                               long value = gb2312.UcsToGbk (ch);
-                               if (value != 0) {
-                                       bytes [byteIndex++] = (byte) (value / 0x100);
-                                       bytes [byteIndex++] = (byte) (value % 0x100);
+                               byte b1 = gb2312.u2n [((int) ch) * 2 + 1];
+                               byte b2 = gb2312.u2n [((int) ch) * 2];
+                               if (b1 != 0 && b2 != 0) {
+                                       bytes [byteIndex++] = b1;
+                                       bytes [byteIndex++] = b2;
+                                       continue;
+                               }
+
+                               long value = GB18030Source.FromUCS (ch);
+                               if (value < 0)
+                                       bytes [byteIndex++] = 0x3F; // invalid(?)
+                               else {
+                                       // non-GB2312
+                                       GB18030Source.Unlinear (bytes + byteIndex, value);
+                                       byteIndex += 4;
+                               }
+                       }
+
+                       if (refresh) {
+                               if (incomplete_bytes != char.MinValue)
+                                       bytes [byteIndex++] = 0x3F; // incomplete
+                               incomplete_bytes = char.MinValue;
+                       }
+
+                       return byteIndex - byteStart;
+               }
+#else
+
+               public override int GetByteCount(char[] chars, int index, int count, bool refresh)
+               {
+                       int start = 0;
+                       int end = count;
+                       int ret = 0;
+                       while (start < end)
+                       {
+                               char ch = chars[start];
+                               if (ch < 0x80)
+                               {
+                                       // ASCII
+                                       ret++;
+                                       start++;
+                                       continue;
+                               }
+                               else if (Char.IsSurrogate(ch))
+                               {
+                                       // Surrogate
+                                       if (start + 1 == end)
+                                       {
+                                               incomplete_byte_count = ch;
+                                               start++;
+                                       }
+                                       else
+                                       {
+                                               ret += 4;
+                                               start += 2;
+                                       }
+                                       continue;
+                               }
+
+                               if (ch < 0x80 || ch == 0xFF)
+                               {
+                                       // ASCII
+                                       ret++;
+                                       start++;
+                                       continue;
+                               }
+
+                               byte b1 = gb2312.u2n[((int)ch) * 2 + 1];
+                               byte b2 = gb2312.u2n[((int)ch) * 2];
+                               if (b1 != 0 && b2 != 0)
+                               {
+                                       // GB2312
+                                       ret += 2;
+                                       start++;
                                        continue;
                                }
 
-                               value = GB18030Source.FromUCS (ch);
                                // non-GB2312
-                               GB18030Source.Unlinear (bytes, byteIndex, value);
-                               byteIndex += 4;
+                               long value = GB18030Source.FromUCS(ch);
+                               if (value < 0)
+                                       ret++; // invalid(?)
+                               else
+                                       ret += 4;
+                               start++;
+                       }
+
+                       if (refresh)
+                       {
+                               if (incomplete_byte_count != char.MinValue)
+                                       ret++;
+                               incomplete_byte_count = char.MinValue;
                        }
+                       return ret;
+               }
+
+               public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex, bool refresh)
+               {
+                       int byteCount = bytes.Length;
+                       int charEnd = charIndex + charCount;
+                       int byteStart = byteIndex;
+                       char ch = incomplete_bytes;
+
+                       while (charIndex < charEnd)
+                       {
+                               if (incomplete_bytes == char.MinValue)
+                                       ch = chars[charIndex++];
+                               else
+                                       incomplete_bytes = char.MinValue;
+
+                               if (ch < 0x80)
+                               {
+                                       // ASCII
+                                       bytes[byteIndex++] = (byte)ch;
+                                       continue;
+                               }
+                               else if (Char.IsSurrogate(ch))
+                               {
+                                       // Surrogate
+                                       if (charIndex == charEnd)
+                                       {
+                                               incomplete_bytes = ch;
+                                               break; // incomplete
+                                       }
+                                       char ch2 = chars[charIndex++];
+                                       if (!Char.IsSurrogate(ch2))
+                                       {
+                                               // invalid surrogate
+                                               HandleFallback (chars, ref charIndex, ref charCount,
+                                                       bytes, ref byteIndex, ref byteCount, null);
+                                               continue;
+                                       }
+                                       int cp = (ch - 0xD800) * 0x400 + ch2 - 0xDC00;
+                                       GB18030Source.Unlinear(bytes,  byteIndex, GB18030Source.FromUCSSurrogate(cp));
+                                       byteIndex += 4;
+                                       continue;
+                               }
+
+
+                               if (ch <= 0x80 || ch == 0xFF)
+                               {
+                                       // Character maps to itself
+                                       bytes[byteIndex++] = (byte)ch;
+                                       continue;
+                               }
+
+                               byte b1 = gb2312.u2n[((int)ch) * 2 + 1];
+                               byte b2 = gb2312.u2n[((int)ch) * 2];
+                               if (b1 != 0 && b2 != 0)
+                               {
+                                       bytes[byteIndex++] = b1;
+                                       bytes[byteIndex++] = b2;
+                                       continue;
+                               }
+
+                               long value = GB18030Source.FromUCS(ch);
+                               if (value < 0)
+                                       bytes[byteIndex++] = 0x3F; // invalid(?)
+                               else
+                               {
+                                       // non-GB2312
+                                       GB18030Source.Unlinear(bytes, byteIndex, value);
+                                       byteIndex += 4;
+                               }
+                       }
+
+                       if (refresh)
+                       {
+                               if (incomplete_bytes != char.MinValue)
+                                       bytes[byteIndex++] = 0x3F; // incomplete
+                               incomplete_bytes = char.MinValue;
+                       }
+
                        return byteIndex - byteStart;
                }
+#endif
        }
 }