X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=mcs%2Fclass%2Fcorlib%2FSystem.Text%2FUnicodeEncoding.cs;h=1d4ef22469391bfa9e9c6ef4527740a2887d3e38;hb=c694fb8e631e8f79b383b8ff696228abe1689a20;hp=6b6ed4ab8c0ae8e9666a8ed91c98c88bd2c06459;hpb=12d3d0ce8c28155947a4d73f08c0139aa1409672;p=mono.git diff --git a/mcs/class/corlib/System.Text/UnicodeEncoding.cs b/mcs/class/corlib/System.Text/UnicodeEncoding.cs old mode 100755 new mode 100644 index 6b6ed4ab8c0..1d4ef224693 --- a/mcs/class/corlib/System.Text/UnicodeEncoding.cs +++ b/mcs/class/corlib/System.Text/UnicodeEncoding.cs @@ -1,53 +1,628 @@ -// -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- -// -// System.Text.UnicodeEncoding.cs -// -// Author: -// Sean MacIsaac (macisaac@ximian.com) -// Dietmar Maurer (dietmar@ximian.com) -// -// (C) Ximian, Inc. http://www.ximian.com -// - -// FIXME: implement byteOrderMark - -namespace System.Text { - - public class UnicodeEncoding : Encoding - { - private bool byteOrderMark; - - private void init (bool byteOrderMark) - { - this.byteOrderMark = byteOrderMark; - encoding_name = "Unicode"; +/* + * UnicodeEncoding.cs - Implementation of the + * "System.Text.UnicodeEncoding" class. + * + * Copyright (c) 2001, 2002 Southern Storm Software, Pty Ltd + * Copyright (C) 2003, 2004 Novell, Inc. + * Copyright (C) 2006 Kornél Pál + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +namespace System.Text +{ + +using System; +using System.Runtime.InteropServices; + +[Serializable] +#if NET_2_0 +[ComVisible (true)] +#endif +[MonoTODO ("Serialization format not compatible with .NET")] +public class UnicodeEncoding : Encoding +{ + // Magic numbers used by Windows for Unicode. + internal const int UNICODE_CODE_PAGE = 1200; + internal const int BIG_UNICODE_CODE_PAGE = 1201; + +#if !ECMA_COMPAT + // Size of characters in this encoding. + public const int CharSize = 2; +#endif + + // Internal state. + private bool bigEndian; + private bool byteOrderMark; + + // Constructors. + public UnicodeEncoding () : this (false, true) + { + bigEndian = false; + byteOrderMark = true; + } + public UnicodeEncoding (bool bigEndian, bool byteOrderMark) + : base ((bigEndian ? BIG_UNICODE_CODE_PAGE : UNICODE_CODE_PAGE)) + { + this.bigEndian = bigEndian; + this.byteOrderMark = byteOrderMark; + + if (bigEndian){ + body_name = "unicodeFFFE"; + encoding_name = "Unicode (Big-Endian)"; + header_name = "unicodeFFFE"; + is_browser_save = false; + web_name = "unicodeFFFE"; + } else { body_name = "utf-16"; + encoding_name = "Unicode"; header_name = "utf-16"; - web_name = "utf-16"; - is_browser_display = false; is_browser_save = true; - is_mail_news_display = false; - is_mail_news_save = false; + web_name = "utf-16"; } - public UnicodeEncoding () : base ("UNICODE", false) + // Windows reports the same code page number for + // both the little-endian and big-endian forms. + windows_code_page = UNICODE_CODE_PAGE; + } + +#if NET_2_0 + [MonoTODO ("Implement throwOnInvalidBytes")] + public UnicodeEncoding (bool bigEndian, bool byteOrderMark, bool throwOnInvalidBytes) + : this (bigEndian, byteOrderMark) + { + } +#endif + + // Get the number of bytes needed to encode a character buffer. + public override int GetByteCount (char[] chars, int index, int count) + { + if (chars == null) { + throw new ArgumentNullException ("chars"); + } + if (index < 0 || index > chars.Length) { + throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array")); + } + if (count < 0 || count > (chars.Length - index)) { + throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array")); + } + return count * 2; + } + + public override int GetByteCount (String s) + { + if (s == null) { + throw new ArgumentNullException ("s"); + } + return s.Length * 2; + } + +#if NET_2_0 + [CLSCompliantAttribute (false)] + [ComVisible (false)] + public unsafe override int GetByteCount (char* chars, int count) + { + if (chars == null) + throw new ArgumentNullException ("chars"); + if (count < 0) + throw new ArgumentOutOfRangeException ("count"); + + return count * 2; + } +#endif + + // Get the bytes that result from encoding a character buffer. + public unsafe override int GetBytes (char [] chars, int charIndex, int charCount, + byte [] bytes, int byteIndex) + { + if (chars == null) { + throw new ArgumentNullException ("chars"); + } + if (bytes == null) { + throw new ArgumentNullException ("bytes"); + } + if (charIndex < 0 || charIndex > chars.Length) { + throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array")); + } + if (charCount < 0 || charCount > (chars.Length - charIndex)) { + throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array")); + } + if (byteIndex < 0 || byteIndex > bytes.Length) { + throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array")); + } + + if (charCount == 0) + return 0; + + int byteCount = bytes.Length - byteIndex; + if (bytes.Length == 0) + bytes = new byte [1]; + + fixed (char* charPtr = chars) + fixed (byte* bytePtr = bytes) + return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount); + } + +#if !NET_2_0 + public override byte [] GetBytes (String s) + { + if (s == null) + throw new ArgumentNullException ("s"); + + int byteCount = GetByteCount (s); + byte [] bytes = new byte [byteCount]; + + GetBytes (s, 0, s.Length, bytes, 0); + + return bytes; + } +#endif + + public unsafe override int GetBytes (String s, int charIndex, int charCount, + byte [] bytes, int byteIndex) + { + if (s == null) { + throw new ArgumentNullException ("s"); + } + if (bytes == null) { + throw new ArgumentNullException ("bytes"); + } + if (charIndex < 0 || charIndex > s.Length) { + throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex")); + } + if (charCount < 0 || charCount > (s.Length - charIndex)) { + throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange")); + } + if (byteIndex < 0 || byteIndex > bytes.Length) { + throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array")); + } + + // For consistency + if (charCount == 0) + return 0; + + int byteCount = bytes.Length - byteIndex; + if (bytes.Length == 0) + bytes = new byte [1]; + + fixed (char* charPtr = s) + fixed (byte* bytePtr = bytes) + return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount); + } + +#if NET_2_0 + [CLSCompliantAttribute (false)] + [ComVisible (false)] + public unsafe override int GetBytes (char* chars, int charCount, + byte* bytes, int byteCount) + { + if (bytes == null) + throw new ArgumentNullException ("bytes"); + if (chars == null) + throw new ArgumentNullException ("chars"); + if (charCount < 0) + throw new ArgumentOutOfRangeException ("charCount"); + if (byteCount < 0) + throw new ArgumentOutOfRangeException ("byteCount"); + + return GetBytesInternal (chars, charCount, bytes, byteCount); + } +#endif + + private unsafe int GetBytesInternal (char* chars, int charCount, + byte* bytes, int byteCount) + { + int count = charCount * 2; + + if (byteCount < count) + throw new ArgumentException (_("Arg_InsufficientSpace")); + + CopyChars ((byte*) chars, bytes, count, bigEndian); + return count; + } + + // Get the number of characters needed to decode a byte buffer. + public override int GetCharCount (byte[] bytes, int index, int count) + { + if (bytes == null) { + throw new ArgumentNullException ("bytes"); + } + if (index < 0 || index > bytes.Length) { + throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array")); + } + if (count < 0 || count > (bytes.Length - index)) { + throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array")); + } + return count / 2; + } + +#if NET_2_0 + [CLSCompliantAttribute (false)] + [ComVisible (false)] + public unsafe override int GetCharCount (byte* bytes, int count) + { + if (bytes == null) + throw new ArgumentNullException ("bytes"); + if (count < 0) + throw new ArgumentOutOfRangeException ("count"); + + return count / 2; + } +#endif + + // Get the characters that result from decoding a byte buffer. + public unsafe override int GetChars (byte [] bytes, int byteIndex, int byteCount, + char [] chars, int charIndex) + { + if (bytes == null) { + throw new ArgumentNullException ("bytes"); + } + if (chars == null) { + throw new ArgumentNullException ("chars"); + } + if (byteIndex < 0 || byteIndex > bytes.Length) { + throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array")); + } + if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) { + throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array")); + } + if (charIndex < 0 || charIndex > chars.Length) { + throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array")); + } + + if (byteCount == 0) + return 0; + + int charCount = chars.Length - charIndex; + if (chars.Length == 0) + chars = new char [1]; + + fixed (byte* bytePtr = bytes) + fixed (char* charPtr = chars) + return GetCharsInternal (bytePtr + byteIndex, byteCount, charPtr + charIndex, charCount); +} + +#if NET_2_0 + [CLSCompliantAttribute (false)] + [ComVisible (false)] + public unsafe override int GetChars (byte* bytes, int byteCount, + char* chars, int charCount) + { + if (bytes == null) + throw new ArgumentNullException ("bytes"); + if (chars == null) + throw new ArgumentNullException ("chars"); + if (charCount < 0) + throw new ArgumentOutOfRangeException ("charCount"); + if (byteCount < 0) + throw new ArgumentOutOfRangeException ("byteCount"); + + return GetCharsInternal (bytes, byteCount, chars, charCount); + } +#endif + + // Decode a buffer of bytes into a string. + [ComVisible (false)] + public unsafe override String GetString (byte [] bytes, int index, int count) + { + if (bytes == null) + throw new ArgumentNullException ("bytes"); + if (index < 0 || index > bytes.Length) + throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array")); + if (count < 0 || count > (bytes.Length - index)) + throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array")); + + if (count == 0) + return string.Empty; + + // GetCharCountInternal + int charCount = count / 2; + string s = string.InternalAllocateStr (charCount); + + fixed (byte* bytePtr = bytes) + fixed (char* charPtr = s) + GetCharsInternal (bytePtr + index, count, charPtr, charCount); + + return s; + } + + private unsafe int GetCharsInternal (byte* bytes, int byteCount, + char* chars, int charCount) + { + int count = byteCount / 2; + + // Validate that we have sufficient space in "chars". + if (charCount < count) + throw new ArgumentException (_("Arg_InsufficientSpace")); + + CopyChars (bytes, (byte*) chars, byteCount, bigEndian); + return count; + } + + // Get the maximum number of bytes needed to encode a + // specified number of characters. + public override int GetMaxByteCount (int charCount) + { + if (charCount < 0) { + throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative")); + } + return charCount * 2; + } + + // Get the maximum number of characters needed to decode a + // specified number of bytes. + public override int GetMaxCharCount (int byteCount) + { + if (byteCount < 0) { + throw new ArgumentOutOfRangeException + ("byteCount", _("ArgRange_NonNegative")); + } + return byteCount / 2; + } + + // Get a Unicode-specific decoder that is attached to this instance. + public override Decoder GetDecoder () + { + return new UnicodeDecoder (bigEndian); + } + + // Get the Unicode preamble. + public override byte[] GetPreamble () + { + if (byteOrderMark) { + byte[] preamble = new byte[2]; + if (bigEndian) { + preamble[0] = (byte)0xFE; + preamble[1] = (byte)0xFF; + } else { + preamble[0] = (byte)0xFF; + preamble[1] = (byte)0xFE; + } + return preamble; + } else { + return new byte [0]; + } + } + + // Determine if this object is equal to another. + public override bool Equals (Object value) + { + UnicodeEncoding enc = (value as UnicodeEncoding); + if (enc != null) { + return (codePage == enc.codePage && + bigEndian == enc.bigEndian && + byteOrderMark == enc.byteOrderMark); + } else { + return false; + } + } + + // Get the hash code for this object. + public override int GetHashCode () + { + return base.GetHashCode (); + } + + private unsafe static void CopyChars (byte* src, byte* dest, int count, bool bigEndian) + { + if (BitConverter.IsLittleEndian != bigEndian) { + string.memcpy (dest, src, count & unchecked ((int) 0xFFFFFFFE)); + return; + } + + switch (count) { + case 0: + return; + case 1: + return; + case 2: + goto Count2; + case 3: + goto Count2; + case 4: + goto Count4; + case 5: + goto Count4; + case 6: + goto Count4; + case 7: + goto Count4; + case 8: + goto Count8; + case 9: + goto Count8; + case 10: + goto Count8; + case 11: + goto Count8; + case 12: + goto Count8; + case 13: + goto Count8; + case 14: + goto Count8; + case 15: + goto Count8; + } + + do { + dest [0] = src [1]; + dest [1] = src [0]; + dest [2] = src [3]; + dest [3] = src [2]; + dest [4] = src [5]; + dest [5] = src [4]; + dest [6] = src [7]; + dest [7] = src [6]; + dest [8] = src [9]; + dest [9] = src [8]; + dest [10] = src [11]; + dest [11] = src [10]; + dest [12] = src [13]; + dest [13] = src [12]; + dest [14] = src [15]; + dest [15] = src [14]; + dest += 16; + src += 16; + count -= 16; + } while ((count & unchecked ((int) 0xFFFFFFF0)) != 0); + + switch (count) { + case 0: + return; + case 1: + return; + case 2: + goto Count2; + case 3: + goto Count2; + case 4: + goto Count4; + case 5: + goto Count4; + case 6: + goto Count4; + case 7: + goto Count4; + } + + Count8:; + dest [0] = src [1]; + dest [1] = src [0]; + dest [2] = src [3]; + dest [3] = src [2]; + dest [4] = src [5]; + dest [5] = src [4]; + dest [6] = src [7]; + dest [7] = src [6]; + dest += 8; + src += 8; + + if ((count & 4) == 0) + goto TestCount2; + Count4:; + dest [0] = src [1]; + dest [1] = src [0]; + dest [2] = src [3]; + dest [3] = src [2]; + dest += 4; + src += 4; + + TestCount2:; + if ((count & 2) == 0) + return; + Count2:; + dest [0] = src [1]; + dest [1] = src [0]; + } + + // Unicode decoder implementation. + private sealed class UnicodeDecoder : Decoder + { + private bool bigEndian; + private int leftOverByte; + + // Constructor. + public UnicodeDecoder (bool bigEndian) { - init (false); + this.bigEndian = bigEndian; + leftOverByte = -1; } - - public UnicodeEncoding (bool bigEndian, bool byteOrderMark) : base ("UNICODE", bigEndian) + + // Override inherited methods. + public override int GetCharCount (byte[] bytes, int index, int count) { - init (byteOrderMark); + if (bytes == null) { + throw new ArgumentNullException ("bytes"); + } + if (index < 0 || index > bytes.Length) { + throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array")); + } + if (count < 0 || count > (bytes.Length - index)) { + throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array")); + } + if (leftOverByte != -1) { + return (count + 1) / 2; + } else { + return count / 2; + } } - public override int GetMaxByteCount (int charCount) + public unsafe override int GetChars (byte [] bytes, int byteIndex, + int byteCount, char [] chars, + int charIndex) { - return charCount; - } + if (bytes == null) { + throw new ArgumentNullException ("bytes"); + } + if (chars == null) { + throw new ArgumentNullException ("chars"); + } + if (byteIndex < 0 || byteIndex > bytes.Length) { + throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array")); + } + if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) { + throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array")); + } + if (charIndex < 0 || charIndex > chars.Length) { + throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array")); + } - public override int GetMaxCharCount (int byteCount) - { - return byteCount / 2; + if (byteCount == 0) + return 0; + + int leftOver = leftOverByte; + int count; + + if (leftOver != -1) + count = (byteCount + 1) / 2; + else + count = byteCount / 2; + + if (chars.Length - charIndex < count) + throw new ArgumentException (_("Arg_InsufficientSpace")); + + if (leftOver != -1) { + if (bigEndian) + chars [charIndex] = unchecked ((char) ((leftOver << 8) | (int) bytes [byteIndex])); + else + chars [charIndex] = unchecked ((char) (((int) bytes [byteIndex] << 8) | leftOver)); + charIndex++; + byteIndex++; + byteCount--; + } + + if ((byteCount & unchecked ((int) 0xFFFFFFFE)) != 0) + fixed (byte* bytePtr = bytes) + fixed (char* charPtr = chars) + CopyChars (bytePtr + byteIndex, (byte*) (charPtr + charIndex), byteCount, bigEndian); + + if ((byteCount & 1) == 0) + leftOverByte = -1; + else + leftOverByte = bytes [byteCount + byteIndex - 1]; + + return count; } - } -} + + } // class UnicodeDecoder + +}; // class UnicodeEncoding + +}; // namespace System.Text