A little more work of CorCompare work:
[mono.git] / mcs / class / corlib / System.Text / UnicodeEncoding.cs
old mode 100755 (executable)
new mode 100644 (file)
index 405d70a..1d4ef22
@@ -3,6 +3,8 @@
  *             "System.Text.UnicodeEncoding" class.
  *
  * Copyright (c) 2001, 2002  Southern Storm Software, Pty Ltd
+ * Copyright (C) 2003, 2004 Novell, Inc.
+ * Copyright (C) 2006 Kornél Pál <http://www.kornelpal.hu/>
  *
  * Permission is hereby granted, free of charge, to any person obtaining
  * a copy of this software and associated documentation files (the "Software"),
@@ -27,8 +29,13 @@ namespace System.Text
 {
 
 using System;
+using System.Runtime.InteropServices;
 
 [Serializable]
+#if NET_2_0
+[ComVisible (true)]
+#endif
+[MonoTODO ("Serialization format not compatible with .NET")]
 public class UnicodeEncoding : Encoding
 {
        // Magic numbers used by Windows for Unicode.
@@ -45,7 +52,7 @@ public class UnicodeEncoding : Encoding
        private bool byteOrderMark;
 
        // Constructors.
-       public UnicodeEncoding () : base(UNICODE_CODE_PAGE)
+       public UnicodeEncoding () : this (false, true)
        {
                bigEndian = false;
                byteOrderMark = true;
@@ -55,8 +62,34 @@ public class UnicodeEncoding : Encoding
        {
                this.bigEndian = bigEndian;
                this.byteOrderMark = byteOrderMark;
+
+               if (bigEndian){
+                       body_name = "unicodeFFFE";
+                       encoding_name = "Unicode (Big-Endian)";
+                       header_name = "unicodeFFFE";
+                       is_browser_save = false;
+                       web_name = "unicodeFFFE";
+               } else {
+                       body_name = "utf-16";
+                       encoding_name = "Unicode";
+                       header_name = "utf-16";
+                       is_browser_save = true;
+                       web_name = "utf-16";
+               }
+               
+               // Windows reports the same code page number for
+               // both the little-endian and big-endian forms.
+               windows_code_page = UNICODE_CODE_PAGE;
        }
 
+#if NET_2_0
+       [MonoTODO ("Implement throwOnInvalidBytes")]
+       public UnicodeEncoding (bool bigEndian, bool byteOrderMark, bool throwOnInvalidBytes)
+               : this (bigEndian, byteOrderMark)
+       {
+       }
+#endif
+
        // Get the number of bytes needed to encode a character buffer.
        public override int GetByteCount (char[] chars, int index, int count)
        {
@@ -72,7 +105,6 @@ public class UnicodeEncoding : Encoding
                return count * 2;
        }
 
-       // Convenience wrappers for "GetByteCount".
        public override int GetByteCount (String s)
        {
                if (s == null) {
@@ -81,9 +113,23 @@ public class UnicodeEncoding : Encoding
                return s.Length * 2;
        }
 
+#if NET_2_0
+       [CLSCompliantAttribute (false)]
+       [ComVisible (false)]
+       public unsafe override int GetByteCount (char* chars, int count)
+       {
+               if (chars == null)
+                       throw new ArgumentNullException ("chars");
+               if (count < 0)
+                       throw new ArgumentOutOfRangeException ("count");
+
+               return count * 2;
+       }
+#endif
+
        // Get the bytes that result from encoding a character buffer.
-       public override int GetBytes (char[] chars, int charIndex, int charCount,
-                                                                byte[] bytes, int byteIndex)
+       public unsafe override int GetBytes (char [] chars, int charIndex, int charCount,
+                                                                               byte [] bytes, int byteIndex)
        {
                if (chars == null) {
                        throw new ArgumentNullException ("chars");
@@ -100,30 +146,36 @@ public class UnicodeEncoding : Encoding
                if (byteIndex < 0 || byteIndex > bytes.Length) {
                        throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
                }
-               if ((bytes.Length - byteIndex) < (charCount * 2)) {
-                       throw new ArgumentException (_("Arg_InsufficientSpace"));
-               }
-               int posn = byteIndex;
-               char ch;
-               if (bigEndian) {
-                       while (charCount-- > 0) {
-                               ch = chars[charIndex++];
-                               bytes[posn++] = (byte)(ch >> 8);
-                               bytes[posn++] = (byte)ch;
-                       }
-               } else {
-                       while (charCount-- > 0) {
-                               ch = chars[charIndex++];
-                               bytes[posn++] = (byte)ch;
-                               bytes[posn++] = (byte)(ch >> 8);
-                       }
-               }
-               return posn - byteIndex;
+
+               if (charCount == 0)
+                       return 0;
+
+               int byteCount = bytes.Length - byteIndex;
+               if (bytes.Length == 0)
+                       bytes = new byte [1];
+
+               fixed (char* charPtr = chars)
+                       fixed (byte* bytePtr = bytes)
+                               return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
+       }
+
+#if !NET_2_0
+       public override byte [] GetBytes (String s)
+       {
+               if (s == null)
+                       throw new ArgumentNullException ("s");
+
+               int byteCount = GetByteCount (s);
+               byte [] bytes = new byte [byteCount];
+
+               GetBytes (s, 0, s.Length, bytes, 0);
+
+               return bytes;
        }
+#endif
 
-       // Convenience wrappers for "GetBytes".
-       public override int GetBytes (String s, int charIndex, int charCount,
-                                                                byte[] bytes, int byteIndex)
+       public unsafe override int GetBytes (String s, int charIndex, int charCount,
+                                                                               byte [] bytes, int byteIndex)
        {
                if (s == null) {
                        throw new ArgumentNullException ("s");
@@ -140,25 +192,49 @@ public class UnicodeEncoding : Encoding
                if (byteIndex < 0 || byteIndex > bytes.Length) {
                        throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
                }
-               if ((bytes.Length - byteIndex) < (charCount * 2)) {
+
+               // For consistency
+               if (charCount == 0)
+                       return 0;
+
+               int byteCount = bytes.Length - byteIndex;
+               if (bytes.Length == 0)
+                       bytes = new byte [1];
+
+               fixed (char* charPtr = s)
+                       fixed (byte* bytePtr = bytes)
+                               return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
+       }
+
+#if NET_2_0
+       [CLSCompliantAttribute (false)]
+       [ComVisible (false)]
+       public unsafe override int GetBytes (char* chars, int charCount,
+                                                                               byte* bytes, int byteCount)
+       {
+               if (bytes == null)
+                       throw new ArgumentNullException ("bytes");
+               if (chars == null)
+                       throw new ArgumentNullException ("chars");
+               if (charCount < 0)
+                       throw new ArgumentOutOfRangeException ("charCount");
+               if (byteCount < 0)
+                       throw new ArgumentOutOfRangeException ("byteCount");
+
+               return GetBytesInternal (chars, charCount, bytes, byteCount);
+       }
+#endif
+
+       private unsafe int GetBytesInternal (char* chars, int charCount,
+                                                                               byte* bytes, int byteCount)
+       {
+               int count = charCount * 2;
+
+               if (byteCount < count)
                        throw new ArgumentException (_("Arg_InsufficientSpace"));
-               }
-               int posn = byteIndex;
-               char ch;
-               if (bigEndian) {
-                       while (charCount-- > 0) {
-                               ch = s[charIndex++];
-                               bytes[posn++] = (byte)(ch >> 8);
-                               bytes[posn++] = (byte)ch;
-                       }
-               } else {
-                       while (charCount-- > 0) {
-                               ch = s[charIndex++];
-                               bytes[posn++] = (byte)ch;
-                               bytes[posn++] = (byte)(ch >> 8);
-                       }
-               }
-               return posn - byteIndex;
+
+               CopyChars ((byte*) chars, bytes, count, bigEndian);
+               return count;
        }
 
        // Get the number of characters needed to decode a byte buffer.
@@ -173,18 +249,26 @@ public class UnicodeEncoding : Encoding
                if (count < 0 || count > (bytes.Length - index)) {
                        throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
                }
-               if (count >= 2) {
-                       if ((bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) ||
-                                       (bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE)) {
-                               return ((count - 1) / 2);
-                       }
-               }
                return count / 2;
        }
 
+#if NET_2_0
+       [CLSCompliantAttribute (false)]
+       [ComVisible (false)]
+       public unsafe override int GetCharCount (byte* bytes, int count)
+       {
+               if (bytes == null)
+                       throw new ArgumentNullException ("bytes");
+               if (count < 0)
+                       throw new ArgumentOutOfRangeException ("count");
+
+               return count / 2;
+       }
+#endif
+
        // Get the characters that result from decoding a byte buffer.
-       public override int GetChars (byte[] bytes, int byteIndex, int byteCount,
-                                                                char[] chars, int charIndex)
+       public unsafe override int GetChars (byte [] bytes, int byteIndex, int byteCount,
+                                                                               char [] chars, int charIndex)
        {
                if (bytes == null) {
                        throw new ArgumentNullException ("bytes");
@@ -202,49 +286,73 @@ public class UnicodeEncoding : Encoding
                        throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
                }
 
-               // Determine the byte order in the incoming buffer.
-               bool isBigEndian;
-               if (byteCount >= 2) {
-                       if (bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) {
-                               isBigEndian = true;
-                               byteCount -= 2;
-                               byteIndex += 2;
-                       } else if (bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) {
-                               isBigEndian = false;
-                               byteCount -= 2;
-                               byteIndex += 2;
-                       } else {
-                               isBigEndian = bigEndian;
-                       }
-               } else {
-                       isBigEndian = bigEndian;
-               }
+               if (byteCount == 0)
+                       return 0;
+
+               int charCount = chars.Length - charIndex;
+               if (chars.Length == 0)
+                       chars = new char [1];
+
+               fixed (byte* bytePtr = bytes)
+                       fixed (char* charPtr = chars)
+                               return GetCharsInternal (bytePtr + byteIndex, byteCount, charPtr + charIndex, charCount);
+}
+
+#if NET_2_0
+       [CLSCompliantAttribute (false)]
+       [ComVisible (false)]
+       public unsafe override int GetChars (byte* bytes, int byteCount,
+                                                                               char* chars, int charCount)
+       {
+               if (bytes == null)
+                       throw new ArgumentNullException ("bytes");
+               if (chars == null)
+                       throw new ArgumentNullException ("chars");
+               if (charCount < 0)
+                       throw new ArgumentOutOfRangeException ("charCount");
+               if (byteCount < 0)
+                       throw new ArgumentOutOfRangeException ("byteCount");
+
+               return GetCharsInternal (bytes, byteCount, chars, charCount);
+       }
+#endif
+
+       // Decode a buffer of bytes into a string.
+       [ComVisible (false)]
+       public unsafe override String GetString (byte [] bytes, int index, int count)
+       {
+               if (bytes == null)
+                       throw new ArgumentNullException ("bytes");
+               if (index < 0 || index > bytes.Length)
+                       throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
+               if (count < 0 || count > (bytes.Length - index))
+                       throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
+
+               if (count == 0)
+                       return string.Empty;
+
+               // GetCharCountInternal
+               int charCount = count / 2;
+               string s = string.InternalAllocateStr (charCount);
+
+               fixed (byte* bytePtr = bytes)
+                       fixed (char* charPtr = s)
+                               GetCharsInternal (bytePtr + index, count, charPtr, charCount);
+
+               return s;
+       }
+
+       private unsafe int GetCharsInternal (byte* bytes, int byteCount,
+                                                                               char* chars, int charCount)
+       {
+               int count = byteCount / 2;
 
                // Validate that we have sufficient space in "chars".
-               if ((chars.Length - charIndex) < (byteCount / 2)) {
+               if (charCount < count)
                        throw new ArgumentException (_("Arg_InsufficientSpace"));
-               }
 
-               // Convert the characters.
-               int posn = charIndex;
-               if (isBigEndian) {
-                       while (byteCount >= 2) {
-                               chars[posn++] =
-                                       ((char)((((int)(bytes[byteIndex])) << 8) |
-                                                        ((int)(bytes[byteIndex + 1]))));
-                               byteIndex += 2;
-                               byteCount -= 2;
-                       }
-               } else {
-                       while (byteCount >= 2) {
-                               chars[posn++] =
-                                       ((char)((((int)(bytes[byteIndex + 1])) << 8) |
-                                                        ((int)(bytes[byteIndex]))));
-                               byteIndex += 2;
-                               byteCount -= 2;
-                       }
-               }
-               return posn - charIndex;
+               CopyChars (bytes, (byte*) chars, byteCount, bigEndian);
+               return count;
        }
 
        // Get the maximum number of bytes needed to encode a
@@ -311,76 +419,119 @@ public class UnicodeEncoding : Encoding
                return base.GetHashCode ();
        }
 
-#if !ECMA_COMPAT
-
-       // Get the mail body name for this encoding.
-       public override String BodyName
-       {
-               get {
-                       if (bigEndian) {
-                               return "unicodeFFFE";
-                       } else {
-                               return "utf-16";
-                       }
-               }
-       }
-
-       // Get the human-readable name for this encoding.
-       public override String EncodingName
-       {
-               get {
-                       if (bigEndian) {
-                               return "Unicode (Big-Endian)";
-                       } else {
-                               return "Unicode";
-                       }
-               }
-       }
-
-       // Get the mail agent header name for this encoding.
-       public override String HeaderName
-       {
-               get {
-                       if (bigEndian) {
-                               return "unicodeFFFE";
-                       } else {
-                               return "utf-16";
-                       }
-               }
-       }
-
-       // Determine if this encoding can be saved from a Web browser.
-       public override bool IsBrowserSave
+       private unsafe static void CopyChars (byte* src, byte* dest, int count, bool bigEndian)
        {
-               get {
-                       return !bigEndian;
-               }
-       }
-
-       // Get the IANA-preferred Web name for this encoding.
-       public override String WebName
-       {
-               get {
-                       if (bigEndian) {
-                               return "unicodeFFFE";
-                       } else {
-                               return "utf-16";
-                       }
-               }
+               if (BitConverter.IsLittleEndian != bigEndian) {
+                       string.memcpy (dest, src, count & unchecked ((int) 0xFFFFFFFE));
+                       return;
+               }
+
+               switch (count) {
+               case 0:
+                       return;
+               case 1:
+                       return;
+               case 2:
+                       goto Count2;
+               case 3:
+                       goto Count2;
+               case 4:
+                       goto Count4;
+               case 5:
+                       goto Count4;
+               case 6:
+                       goto Count4;
+               case 7:
+                       goto Count4;
+               case 8:
+                       goto Count8;
+               case 9:
+                       goto Count8;
+               case 10:
+                       goto Count8;
+               case 11:
+                       goto Count8;
+               case 12:
+                       goto Count8;
+               case 13:
+                       goto Count8;
+               case 14:
+                       goto Count8;
+               case 15:
+                       goto Count8;
+               }
+
+               do {
+                       dest [0] = src [1];
+                       dest [1] = src [0];
+                       dest [2] = src [3];
+                       dest [3] = src [2];
+                       dest [4] = src [5];
+                       dest [5] = src [4];
+                       dest [6] = src [7];
+                       dest [7] = src [6];
+                       dest [8] = src [9];
+                       dest [9] = src [8];
+                       dest [10] = src [11];
+                       dest [11] = src [10];
+                       dest [12] = src [13];
+                       dest [13] = src [12];
+                       dest [14] = src [15];
+                       dest [15] = src [14];
+                       dest += 16;
+                       src += 16;
+                       count -= 16;
+               } while ((count & unchecked ((int) 0xFFFFFFF0)) != 0);
+
+               switch (count) {
+               case 0:
+                       return;
+               case 1:
+                       return;
+               case 2:
+                       goto Count2;
+               case 3:
+                       goto Count2;
+               case 4:
+                       goto Count4;
+               case 5:
+                       goto Count4;
+               case 6:
+                       goto Count4;
+               case 7:
+                       goto Count4;
+               }
+
+               Count8:;
+               dest [0] = src [1];
+               dest [1] = src [0];
+               dest [2] = src [3];
+               dest [3] = src [2];
+               dest [4] = src [5];
+               dest [5] = src [4];
+               dest [6] = src [7];
+               dest [7] = src [6];
+               dest += 8;
+               src += 8;
+
+               if ((count & 4) == 0)
+                       goto TestCount2;
+               Count4:;
+               dest [0] = src [1];
+               dest [1] = src [0];
+               dest [2] = src [3];
+               dest [3] = src [2];
+               dest += 4;
+               src += 4;
+
+               TestCount2:;
+               if ((count & 2) == 0)
+                       return;
+               Count2:;
+               dest [0] = src [1];
+               dest [1] = src [0];
        }
 
-       // Get the Windows code page represented by this object.
-       public override int WindowsCodePage
-       {
-               get {
-                       // Windows reports the same code page number for
-                       // both the little-endian and big-endian forms.
-                       return UNICODE_CODE_PAGE;
-               }
-       }
-
-#endif // !ECMA_COMPAT
-
        // Unicode decoder implementation.
        private sealed class UnicodeDecoder : Decoder
        {
@@ -412,9 +563,10 @@ public class UnicodeEncoding : Encoding
                                return count / 2;
                        }
                }
-               public override int GetChars (byte[] bytes, int byteIndex,
-                                                                        int byteCount, char[] chars,
-                                                                        int charIndex)
+               
+               public unsafe override int GetChars (byte [] bytes, int byteIndex,
+                                                                                       int byteCount, char [] chars,
+                                                                                       int charIndex)
                {
                        if (bytes == null) {
                                throw new ArgumentNullException ("bytes");
@@ -432,54 +584,41 @@ public class UnicodeEncoding : Encoding
                                throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
                        }
 
-                       // Convert the characters.
-                       int posn = charIndex;
-                       bool isBigEndian = bigEndian;
+                       if (byteCount == 0)
+                               return 0;
+
                        int leftOver = leftOverByte;
-                       int length = chars.Length;
-                       char ch;
-                       while (byteCount > 0) {
-                               if (leftOver != -1) {
-                                       if (isBigEndian) {
-                                               ch = ((char)((leftOver << 8) | ((int)(bytes[byteIndex]))));
-                                       } else {
-                                               ch = ((char)(leftOver |
-                                                                (((int)(bytes[byteIndex])) << 8)));
-                                       }
-                                       leftOver = -1;
-                                       ++byteIndex;
-                                       --byteCount;
-                               } else if (byteCount > 1) {
-                                       if (isBigEndian) {
-                                               ch = ((char)((((int)(bytes[byteIndex])) << 8) |
-                                                                         ((int)(bytes[byteIndex + 1]))));
-                                       } else {
-                                               ch = ((char)((((int)(bytes[byteIndex + 1])) << 8) |
-                                                                     ((int)(bytes[byteIndex]))));
-                                       }
-                                       byteIndex += 2;
-                                       byteCount -= 2;
-                               } else {
-                                       leftOver = (int)(bytes[byteIndex]);
-                                       break;
-                               }
-                               if (ch == '\uFFFE') {
-                                       // Switch byte orders.
-                                       bigEndian = !bigEndian;
-                               } else if (ch != '\uFEFF') {
-                                       // Ordinary character.
-                                       if (posn < length) {
-                                               chars[posn++] = ch;
-                                       } else {
-                                               throw new ArgumentException (_("Arg_InsufficientSpace"));
-                                       }
-                               }
+                       int count;
+
+                       if (leftOver != -1)
+                               count = (byteCount + 1) / 2;
+                       else
+                               count = byteCount / 2;
+
+                       if (chars.Length - charIndex < count)
+                               throw new ArgumentException (_("Arg_InsufficientSpace"));
+
+                       if (leftOver != -1) {
+                               if (bigEndian)
+                                       chars [charIndex] = unchecked ((char) ((leftOver << 8) | (int) bytes [byteIndex]));
+                               else
+                                       chars [charIndex] = unchecked ((char) (((int) bytes [byteIndex] << 8) | leftOver));
+                               charIndex++;
+                               byteIndex++;
+                               byteCount--;
                        }
-                       leftOverByte = leftOver;
-                       bigEndian = isBigEndian;
 
-                       // Finished - return the converted length.
-                       return posn - charIndex;
+                       if ((byteCount & unchecked ((int) 0xFFFFFFFE)) != 0)
+                               fixed (byte* bytePtr = bytes)
+                                       fixed (char* charPtr = chars)
+                                               CopyChars (bytePtr + byteIndex, (byte*) (charPtr + charIndex), byteCount, bigEndian);
+
+                       if ((byteCount & 1) == 0)
+                               leftOverByte = -1;
+                       else
+                               leftOverByte = bytes [byteCount + byteIndex - 1];
+
+                       return count;
                }
 
        } // class UnicodeDecoder