X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=mcs%2Fclass%2Fcorlib%2FSystem.Text%2FUnicodeEncoding.cs;h=1d4ef22469391bfa9e9c6ef4527740a2887d3e38;hb=c694fb8e631e8f79b383b8ff696228abe1689a20;hp=405d70a139f30fba11fb5a90dfcb8d12b39b1d9b;hpb=0fcd47a575383a960b204356ad022a53cb16d2d3;p=mono.git diff --git a/mcs/class/corlib/System.Text/UnicodeEncoding.cs b/mcs/class/corlib/System.Text/UnicodeEncoding.cs old mode 100755 new mode 100644 index 405d70a139f..1d4ef224693 --- a/mcs/class/corlib/System.Text/UnicodeEncoding.cs +++ b/mcs/class/corlib/System.Text/UnicodeEncoding.cs @@ -3,6 +3,8 @@ * "System.Text.UnicodeEncoding" class. * * Copyright (c) 2001, 2002 Southern Storm Software, Pty Ltd + * Copyright (C) 2003, 2004 Novell, Inc. + * Copyright (C) 2006 Kornél Pál * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the "Software"), @@ -27,8 +29,13 @@ namespace System.Text { using System; +using System.Runtime.InteropServices; [Serializable] +#if NET_2_0 +[ComVisible (true)] +#endif +[MonoTODO ("Serialization format not compatible with .NET")] public class UnicodeEncoding : Encoding { // Magic numbers used by Windows for Unicode. @@ -45,7 +52,7 @@ public class UnicodeEncoding : Encoding private bool byteOrderMark; // Constructors. - public UnicodeEncoding () : base(UNICODE_CODE_PAGE) + public UnicodeEncoding () : this (false, true) { bigEndian = false; byteOrderMark = true; @@ -55,8 +62,34 @@ public class UnicodeEncoding : Encoding { this.bigEndian = bigEndian; this.byteOrderMark = byteOrderMark; + + if (bigEndian){ + body_name = "unicodeFFFE"; + encoding_name = "Unicode (Big-Endian)"; + header_name = "unicodeFFFE"; + is_browser_save = false; + web_name = "unicodeFFFE"; + } else { + body_name = "utf-16"; + encoding_name = "Unicode"; + header_name = "utf-16"; + is_browser_save = true; + web_name = "utf-16"; + } + + // Windows reports the same code page number for + // both the little-endian and big-endian forms. + windows_code_page = UNICODE_CODE_PAGE; } +#if NET_2_0 + [MonoTODO ("Implement throwOnInvalidBytes")] + public UnicodeEncoding (bool bigEndian, bool byteOrderMark, bool throwOnInvalidBytes) + : this (bigEndian, byteOrderMark) + { + } +#endif + // Get the number of bytes needed to encode a character buffer. public override int GetByteCount (char[] chars, int index, int count) { @@ -72,7 +105,6 @@ public class UnicodeEncoding : Encoding return count * 2; } - // Convenience wrappers for "GetByteCount". public override int GetByteCount (String s) { if (s == null) { @@ -81,9 +113,23 @@ public class UnicodeEncoding : Encoding return s.Length * 2; } +#if NET_2_0 + [CLSCompliantAttribute (false)] + [ComVisible (false)] + public unsafe override int GetByteCount (char* chars, int count) + { + if (chars == null) + throw new ArgumentNullException ("chars"); + if (count < 0) + throw new ArgumentOutOfRangeException ("count"); + + return count * 2; + } +#endif + // Get the bytes that result from encoding a character buffer. - public override int GetBytes (char[] chars, int charIndex, int charCount, - byte[] bytes, int byteIndex) + public unsafe override int GetBytes (char [] chars, int charIndex, int charCount, + byte [] bytes, int byteIndex) { if (chars == null) { throw new ArgumentNullException ("chars"); @@ -100,30 +146,36 @@ public class UnicodeEncoding : Encoding if (byteIndex < 0 || byteIndex > bytes.Length) { throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array")); } - if ((bytes.Length - byteIndex) < (charCount * 2)) { - throw new ArgumentException (_("Arg_InsufficientSpace")); - } - int posn = byteIndex; - char ch; - if (bigEndian) { - while (charCount-- > 0) { - ch = chars[charIndex++]; - bytes[posn++] = (byte)(ch >> 8); - bytes[posn++] = (byte)ch; - } - } else { - while (charCount-- > 0) { - ch = chars[charIndex++]; - bytes[posn++] = (byte)ch; - bytes[posn++] = (byte)(ch >> 8); - } - } - return posn - byteIndex; + + if (charCount == 0) + return 0; + + int byteCount = bytes.Length - byteIndex; + if (bytes.Length == 0) + bytes = new byte [1]; + + fixed (char* charPtr = chars) + fixed (byte* bytePtr = bytes) + return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount); + } + +#if !NET_2_0 + public override byte [] GetBytes (String s) + { + if (s == null) + throw new ArgumentNullException ("s"); + + int byteCount = GetByteCount (s); + byte [] bytes = new byte [byteCount]; + + GetBytes (s, 0, s.Length, bytes, 0); + + return bytes; } +#endif - // Convenience wrappers for "GetBytes". - public override int GetBytes (String s, int charIndex, int charCount, - byte[] bytes, int byteIndex) + public unsafe override int GetBytes (String s, int charIndex, int charCount, + byte [] bytes, int byteIndex) { if (s == null) { throw new ArgumentNullException ("s"); @@ -140,25 +192,49 @@ public class UnicodeEncoding : Encoding if (byteIndex < 0 || byteIndex > bytes.Length) { throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array")); } - if ((bytes.Length - byteIndex) < (charCount * 2)) { + + // For consistency + if (charCount == 0) + return 0; + + int byteCount = bytes.Length - byteIndex; + if (bytes.Length == 0) + bytes = new byte [1]; + + fixed (char* charPtr = s) + fixed (byte* bytePtr = bytes) + return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount); + } + +#if NET_2_0 + [CLSCompliantAttribute (false)] + [ComVisible (false)] + public unsafe override int GetBytes (char* chars, int charCount, + byte* bytes, int byteCount) + { + if (bytes == null) + throw new ArgumentNullException ("bytes"); + if (chars == null) + throw new ArgumentNullException ("chars"); + if (charCount < 0) + throw new ArgumentOutOfRangeException ("charCount"); + if (byteCount < 0) + throw new ArgumentOutOfRangeException ("byteCount"); + + return GetBytesInternal (chars, charCount, bytes, byteCount); + } +#endif + + private unsafe int GetBytesInternal (char* chars, int charCount, + byte* bytes, int byteCount) + { + int count = charCount * 2; + + if (byteCount < count) throw new ArgumentException (_("Arg_InsufficientSpace")); - } - int posn = byteIndex; - char ch; - if (bigEndian) { - while (charCount-- > 0) { - ch = s[charIndex++]; - bytes[posn++] = (byte)(ch >> 8); - bytes[posn++] = (byte)ch; - } - } else { - while (charCount-- > 0) { - ch = s[charIndex++]; - bytes[posn++] = (byte)ch; - bytes[posn++] = (byte)(ch >> 8); - } - } - return posn - byteIndex; + + CopyChars ((byte*) chars, bytes, count, bigEndian); + return count; } // Get the number of characters needed to decode a byte buffer. @@ -173,18 +249,26 @@ public class UnicodeEncoding : Encoding if (count < 0 || count > (bytes.Length - index)) { throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array")); } - if (count >= 2) { - if ((bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) || - (bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE)) { - return ((count - 1) / 2); - } - } return count / 2; } +#if NET_2_0 + [CLSCompliantAttribute (false)] + [ComVisible (false)] + public unsafe override int GetCharCount (byte* bytes, int count) + { + if (bytes == null) + throw new ArgumentNullException ("bytes"); + if (count < 0) + throw new ArgumentOutOfRangeException ("count"); + + return count / 2; + } +#endif + // Get the characters that result from decoding a byte buffer. - public override int GetChars (byte[] bytes, int byteIndex, int byteCount, - char[] chars, int charIndex) + public unsafe override int GetChars (byte [] bytes, int byteIndex, int byteCount, + char [] chars, int charIndex) { if (bytes == null) { throw new ArgumentNullException ("bytes"); @@ -202,49 +286,73 @@ public class UnicodeEncoding : Encoding throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array")); } - // Determine the byte order in the incoming buffer. - bool isBigEndian; - if (byteCount >= 2) { - if (bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) { - isBigEndian = true; - byteCount -= 2; - byteIndex += 2; - } else if (bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) { - isBigEndian = false; - byteCount -= 2; - byteIndex += 2; - } else { - isBigEndian = bigEndian; - } - } else { - isBigEndian = bigEndian; - } + if (byteCount == 0) + return 0; + + int charCount = chars.Length - charIndex; + if (chars.Length == 0) + chars = new char [1]; + + fixed (byte* bytePtr = bytes) + fixed (char* charPtr = chars) + return GetCharsInternal (bytePtr + byteIndex, byteCount, charPtr + charIndex, charCount); +} + +#if NET_2_0 + [CLSCompliantAttribute (false)] + [ComVisible (false)] + public unsafe override int GetChars (byte* bytes, int byteCount, + char* chars, int charCount) + { + if (bytes == null) + throw new ArgumentNullException ("bytes"); + if (chars == null) + throw new ArgumentNullException ("chars"); + if (charCount < 0) + throw new ArgumentOutOfRangeException ("charCount"); + if (byteCount < 0) + throw new ArgumentOutOfRangeException ("byteCount"); + + return GetCharsInternal (bytes, byteCount, chars, charCount); + } +#endif + + // Decode a buffer of bytes into a string. + [ComVisible (false)] + public unsafe override String GetString (byte [] bytes, int index, int count) + { + if (bytes == null) + throw new ArgumentNullException ("bytes"); + if (index < 0 || index > bytes.Length) + throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array")); + if (count < 0 || count > (bytes.Length - index)) + throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array")); + + if (count == 0) + return string.Empty; + + // GetCharCountInternal + int charCount = count / 2; + string s = string.InternalAllocateStr (charCount); + + fixed (byte* bytePtr = bytes) + fixed (char* charPtr = s) + GetCharsInternal (bytePtr + index, count, charPtr, charCount); + + return s; + } + + private unsafe int GetCharsInternal (byte* bytes, int byteCount, + char* chars, int charCount) + { + int count = byteCount / 2; // Validate that we have sufficient space in "chars". - if ((chars.Length - charIndex) < (byteCount / 2)) { + if (charCount < count) throw new ArgumentException (_("Arg_InsufficientSpace")); - } - // Convert the characters. - int posn = charIndex; - if (isBigEndian) { - while (byteCount >= 2) { - chars[posn++] = - ((char)((((int)(bytes[byteIndex])) << 8) | - ((int)(bytes[byteIndex + 1])))); - byteIndex += 2; - byteCount -= 2; - } - } else { - while (byteCount >= 2) { - chars[posn++] = - ((char)((((int)(bytes[byteIndex + 1])) << 8) | - ((int)(bytes[byteIndex])))); - byteIndex += 2; - byteCount -= 2; - } - } - return posn - charIndex; + CopyChars (bytes, (byte*) chars, byteCount, bigEndian); + return count; } // Get the maximum number of bytes needed to encode a @@ -311,76 +419,119 @@ public class UnicodeEncoding : Encoding return base.GetHashCode (); } -#if !ECMA_COMPAT - - // Get the mail body name for this encoding. - public override String BodyName - { - get { - if (bigEndian) { - return "unicodeFFFE"; - } else { - return "utf-16"; - } - } - } - - // Get the human-readable name for this encoding. - public override String EncodingName - { - get { - if (bigEndian) { - return "Unicode (Big-Endian)"; - } else { - return "Unicode"; - } - } - } - - // Get the mail agent header name for this encoding. - public override String HeaderName - { - get { - if (bigEndian) { - return "unicodeFFFE"; - } else { - return "utf-16"; - } - } - } - - // Determine if this encoding can be saved from a Web browser. - public override bool IsBrowserSave + private unsafe static void CopyChars (byte* src, byte* dest, int count, bool bigEndian) { - get { - return !bigEndian; - } - } - - // Get the IANA-preferred Web name for this encoding. - public override String WebName - { - get { - if (bigEndian) { - return "unicodeFFFE"; - } else { - return "utf-16"; - } - } + if (BitConverter.IsLittleEndian != bigEndian) { + string.memcpy (dest, src, count & unchecked ((int) 0xFFFFFFFE)); + return; + } + + switch (count) { + case 0: + return; + case 1: + return; + case 2: + goto Count2; + case 3: + goto Count2; + case 4: + goto Count4; + case 5: + goto Count4; + case 6: + goto Count4; + case 7: + goto Count4; + case 8: + goto Count8; + case 9: + goto Count8; + case 10: + goto Count8; + case 11: + goto Count8; + case 12: + goto Count8; + case 13: + goto Count8; + case 14: + goto Count8; + case 15: + goto Count8; + } + + do { + dest [0] = src [1]; + dest [1] = src [0]; + dest [2] = src [3]; + dest [3] = src [2]; + dest [4] = src [5]; + dest [5] = src [4]; + dest [6] = src [7]; + dest [7] = src [6]; + dest [8] = src [9]; + dest [9] = src [8]; + dest [10] = src [11]; + dest [11] = src [10]; + dest [12] = src [13]; + dest [13] = src [12]; + dest [14] = src [15]; + dest [15] = src [14]; + dest += 16; + src += 16; + count -= 16; + } while ((count & unchecked ((int) 0xFFFFFFF0)) != 0); + + switch (count) { + case 0: + return; + case 1: + return; + case 2: + goto Count2; + case 3: + goto Count2; + case 4: + goto Count4; + case 5: + goto Count4; + case 6: + goto Count4; + case 7: + goto Count4; + } + + Count8:; + dest [0] = src [1]; + dest [1] = src [0]; + dest [2] = src [3]; + dest [3] = src [2]; + dest [4] = src [5]; + dest [5] = src [4]; + dest [6] = src [7]; + dest [7] = src [6]; + dest += 8; + src += 8; + + if ((count & 4) == 0) + goto TestCount2; + Count4:; + dest [0] = src [1]; + dest [1] = src [0]; + dest [2] = src [3]; + dest [3] = src [2]; + dest += 4; + src += 4; + + TestCount2:; + if ((count & 2) == 0) + return; + Count2:; + dest [0] = src [1]; + dest [1] = src [0]; } - // Get the Windows code page represented by this object. - public override int WindowsCodePage - { - get { - // Windows reports the same code page number for - // both the little-endian and big-endian forms. - return UNICODE_CODE_PAGE; - } - } - -#endif // !ECMA_COMPAT - // Unicode decoder implementation. private sealed class UnicodeDecoder : Decoder { @@ -412,9 +563,10 @@ public class UnicodeEncoding : Encoding return count / 2; } } - public override int GetChars (byte[] bytes, int byteIndex, - int byteCount, char[] chars, - int charIndex) + + public unsafe override int GetChars (byte [] bytes, int byteIndex, + int byteCount, char [] chars, + int charIndex) { if (bytes == null) { throw new ArgumentNullException ("bytes"); @@ -432,54 +584,41 @@ public class UnicodeEncoding : Encoding throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array")); } - // Convert the characters. - int posn = charIndex; - bool isBigEndian = bigEndian; + if (byteCount == 0) + return 0; + int leftOver = leftOverByte; - int length = chars.Length; - char ch; - while (byteCount > 0) { - if (leftOver != -1) { - if (isBigEndian) { - ch = ((char)((leftOver << 8) | ((int)(bytes[byteIndex])))); - } else { - ch = ((char)(leftOver | - (((int)(bytes[byteIndex])) << 8))); - } - leftOver = -1; - ++byteIndex; - --byteCount; - } else if (byteCount > 1) { - if (isBigEndian) { - ch = ((char)((((int)(bytes[byteIndex])) << 8) | - ((int)(bytes[byteIndex + 1])))); - } else { - ch = ((char)((((int)(bytes[byteIndex + 1])) << 8) | - ((int)(bytes[byteIndex])))); - } - byteIndex += 2; - byteCount -= 2; - } else { - leftOver = (int)(bytes[byteIndex]); - break; - } - if (ch == '\uFFFE') { - // Switch byte orders. - bigEndian = !bigEndian; - } else if (ch != '\uFEFF') { - // Ordinary character. - if (posn < length) { - chars[posn++] = ch; - } else { - throw new ArgumentException (_("Arg_InsufficientSpace")); - } - } + int count; + + if (leftOver != -1) + count = (byteCount + 1) / 2; + else + count = byteCount / 2; + + if (chars.Length - charIndex < count) + throw new ArgumentException (_("Arg_InsufficientSpace")); + + if (leftOver != -1) { + if (bigEndian) + chars [charIndex] = unchecked ((char) ((leftOver << 8) | (int) bytes [byteIndex])); + else + chars [charIndex] = unchecked ((char) (((int) bytes [byteIndex] << 8) | leftOver)); + charIndex++; + byteIndex++; + byteCount--; } - leftOverByte = leftOver; - bigEndian = isBigEndian; - // Finished - return the converted length. - return posn - charIndex; + if ((byteCount & unchecked ((int) 0xFFFFFFFE)) != 0) + fixed (byte* bytePtr = bytes) + fixed (char* charPtr = chars) + CopyChars (bytePtr + byteIndex, (byte*) (charPtr + charIndex), byteCount, bigEndian); + + if ((byteCount & 1) == 0) + leftOverByte = -1; + else + leftOverByte = bytes [byteCount + byteIndex - 1]; + + return count; } } // class UnicodeDecoder