* "System.Text.UnicodeEncoding" class.
*
* Copyright (c) 2001, 2002 Southern Storm Software, Pty Ltd
+ * Copyright (C) 2003, 2004 Novell, Inc.
+ * Copyright (C) 2006 Kornél Pál <http://www.kornelpal.hu/>
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the "Software"),
{
using System;
+using System.Runtime.InteropServices;
[Serializable]
+#if NET_2_0
+[ComVisible (true)]
+#endif
+[MonoTODO ("Serialization format not compatible with .NET")]
public class UnicodeEncoding : Encoding
{
// Magic numbers used by Windows for Unicode.
private bool byteOrderMark;
// Constructors.
- public UnicodeEncoding () : base(UNICODE_CODE_PAGE)
+ public UnicodeEncoding () : this (false, true)
{
bigEndian = false;
byteOrderMark = true;
{
this.bigEndian = bigEndian;
this.byteOrderMark = byteOrderMark;
+
+ if (bigEndian){
+ body_name = "unicodeFFFE";
+ encoding_name = "Unicode (Big-Endian)";
+ header_name = "unicodeFFFE";
+ is_browser_save = false;
+ web_name = "unicodeFFFE";
+ } else {
+ body_name = "utf-16";
+ encoding_name = "Unicode";
+ header_name = "utf-16";
+ is_browser_save = true;
+ web_name = "utf-16";
+ }
+
+ // Windows reports the same code page number for
+ // both the little-endian and big-endian forms.
+ windows_code_page = UNICODE_CODE_PAGE;
}
+#if NET_2_0
+ [MonoTODO ("Implement throwOnInvalidBytes")]
+ public UnicodeEncoding (bool bigEndian, bool byteOrderMark, bool throwOnInvalidBytes)
+ : this (bigEndian, byteOrderMark)
+ {
+ }
+#endif
+
// Get the number of bytes needed to encode a character buffer.
public override int GetByteCount (char[] chars, int index, int count)
{
return count * 2;
}
- // Convenience wrappers for "GetByteCount".
public override int GetByteCount (String s)
{
if (s == null) {
return s.Length * 2;
}
+#if NET_2_0
+ [CLSCompliantAttribute (false)]
+ [ComVisible (false)]
+ public unsafe override int GetByteCount (char* chars, int count)
+ {
+ if (chars == null)
+ throw new ArgumentNullException ("chars");
+ if (count < 0)
+ throw new ArgumentOutOfRangeException ("count");
+
+ return count * 2;
+ }
+#endif
+
// Get the bytes that result from encoding a character buffer.
- public override int GetBytes (char[] chars, int charIndex, int charCount,
- byte[] bytes, int byteIndex)
+ public unsafe override int GetBytes (char [] chars, int charIndex, int charCount,
+ byte [] bytes, int byteIndex)
{
if (chars == null) {
throw new ArgumentNullException ("chars");
if (byteIndex < 0 || byteIndex > bytes.Length) {
throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
}
- if ((bytes.Length - byteIndex) < (charCount * 2)) {
- throw new ArgumentException (_("Arg_InsufficientSpace"));
- }
- int posn = byteIndex;
- char ch;
- if (bigEndian) {
- while (charCount-- > 0) {
- ch = chars[charIndex++];
- bytes[posn++] = (byte)(ch >> 8);
- bytes[posn++] = (byte)ch;
- }
- } else {
- while (charCount-- > 0) {
- ch = chars[charIndex++];
- bytes[posn++] = (byte)ch;
- bytes[posn++] = (byte)(ch >> 8);
- }
- }
- return posn - byteIndex;
+
+ if (charCount == 0)
+ return 0;
+
+ int byteCount = bytes.Length - byteIndex;
+ if (bytes.Length == 0)
+ bytes = new byte [1];
+
+ fixed (char* charPtr = chars)
+ fixed (byte* bytePtr = bytes)
+ return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
+ }
+
+#if !NET_2_0
+ public override byte [] GetBytes (String s)
+ {
+ if (s == null)
+ throw new ArgumentNullException ("s");
+
+ int byteCount = GetByteCount (s);
+ byte [] bytes = new byte [byteCount];
+
+ GetBytes (s, 0, s.Length, bytes, 0);
+
+ return bytes;
}
+#endif
- // Convenience wrappers for "GetBytes".
- public override int GetBytes (String s, int charIndex, int charCount,
- byte[] bytes, int byteIndex)
+ public unsafe override int GetBytes (String s, int charIndex, int charCount,
+ byte [] bytes, int byteIndex)
{
if (s == null) {
throw new ArgumentNullException ("s");
if (byteIndex < 0 || byteIndex > bytes.Length) {
throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
}
- if ((bytes.Length - byteIndex) < (charCount * 2)) {
+
+ // For consistency
+ if (charCount == 0)
+ return 0;
+
+ int byteCount = bytes.Length - byteIndex;
+ if (bytes.Length == 0)
+ bytes = new byte [1];
+
+ fixed (char* charPtr = s)
+ fixed (byte* bytePtr = bytes)
+ return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
+ }
+
+#if NET_2_0
+ [CLSCompliantAttribute (false)]
+ [ComVisible (false)]
+ public unsafe override int GetBytes (char* chars, int charCount,
+ byte* bytes, int byteCount)
+ {
+ if (bytes == null)
+ throw new ArgumentNullException ("bytes");
+ if (chars == null)
+ throw new ArgumentNullException ("chars");
+ if (charCount < 0)
+ throw new ArgumentOutOfRangeException ("charCount");
+ if (byteCount < 0)
+ throw new ArgumentOutOfRangeException ("byteCount");
+
+ return GetBytesInternal (chars, charCount, bytes, byteCount);
+ }
+#endif
+
+ private unsafe int GetBytesInternal (char* chars, int charCount,
+ byte* bytes, int byteCount)
+ {
+ int count = charCount * 2;
+
+ if (byteCount < count)
throw new ArgumentException (_("Arg_InsufficientSpace"));
- }
- int posn = byteIndex;
- char ch;
- if (bigEndian) {
- while (charCount-- > 0) {
- ch = s[charIndex++];
- bytes[posn++] = (byte)(ch >> 8);
- bytes[posn++] = (byte)ch;
- }
- } else {
- while (charCount-- > 0) {
- ch = s[charIndex++];
- bytes[posn++] = (byte)ch;
- bytes[posn++] = (byte)(ch >> 8);
- }
- }
- return posn - byteIndex;
+
+ CopyChars ((byte*) chars, bytes, count, bigEndian);
+ return count;
}
// Get the number of characters needed to decode a byte buffer.
if (count < 0 || count > (bytes.Length - index)) {
throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
}
- if (count >= 2) {
- if ((bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) ||
- (bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE)) {
- return ((count - 1) / 2);
- }
- }
return count / 2;
}
+#if NET_2_0
+ [CLSCompliantAttribute (false)]
+ [ComVisible (false)]
+ public unsafe override int GetCharCount (byte* bytes, int count)
+ {
+ if (bytes == null)
+ throw new ArgumentNullException ("bytes");
+ if (count < 0)
+ throw new ArgumentOutOfRangeException ("count");
+
+ return count / 2;
+ }
+#endif
+
// Get the characters that result from decoding a byte buffer.
- public override int GetChars (byte[] bytes, int byteIndex, int byteCount,
- char[] chars, int charIndex)
+ public unsafe override int GetChars (byte [] bytes, int byteIndex, int byteCount,
+ char [] chars, int charIndex)
{
if (bytes == null) {
throw new ArgumentNullException ("bytes");
throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
}
- // Determine the byte order in the incoming buffer.
- bool isBigEndian;
- if (byteCount >= 2) {
- if (bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) {
- isBigEndian = true;
- byteCount -= 2;
- byteIndex += 2;
- } else if (bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) {
- isBigEndian = false;
- byteCount -= 2;
- byteIndex += 2;
- } else {
- isBigEndian = bigEndian;
- }
- } else {
- isBigEndian = bigEndian;
- }
+ if (byteCount == 0)
+ return 0;
+
+ int charCount = chars.Length - charIndex;
+ if (chars.Length == 0)
+ chars = new char [1];
+
+ fixed (byte* bytePtr = bytes)
+ fixed (char* charPtr = chars)
+ return GetCharsInternal (bytePtr + byteIndex, byteCount, charPtr + charIndex, charCount);
+}
+
+#if NET_2_0
+ [CLSCompliantAttribute (false)]
+ [ComVisible (false)]
+ public unsafe override int GetChars (byte* bytes, int byteCount,
+ char* chars, int charCount)
+ {
+ if (bytes == null)
+ throw new ArgumentNullException ("bytes");
+ if (chars == null)
+ throw new ArgumentNullException ("chars");
+ if (charCount < 0)
+ throw new ArgumentOutOfRangeException ("charCount");
+ if (byteCount < 0)
+ throw new ArgumentOutOfRangeException ("byteCount");
+
+ return GetCharsInternal (bytes, byteCount, chars, charCount);
+ }
+#endif
+
+ // Decode a buffer of bytes into a string.
+ [ComVisible (false)]
+ public unsafe override String GetString (byte [] bytes, int index, int count)
+ {
+ if (bytes == null)
+ throw new ArgumentNullException ("bytes");
+ if (index < 0 || index > bytes.Length)
+ throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
+ if (count < 0 || count > (bytes.Length - index))
+ throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
+
+ if (count == 0)
+ return string.Empty;
+
+ // GetCharCountInternal
+ int charCount = count / 2;
+ string s = string.InternalAllocateStr (charCount);
+
+ fixed (byte* bytePtr = bytes)
+ fixed (char* charPtr = s)
+ GetCharsInternal (bytePtr + index, count, charPtr, charCount);
+
+ return s;
+ }
+
+ private unsafe int GetCharsInternal (byte* bytes, int byteCount,
+ char* chars, int charCount)
+ {
+ int count = byteCount / 2;
// Validate that we have sufficient space in "chars".
- if ((chars.Length - charIndex) < (byteCount / 2)) {
+ if (charCount < count)
throw new ArgumentException (_("Arg_InsufficientSpace"));
- }
- // Convert the characters.
- int posn = charIndex;
- if (isBigEndian) {
- while (byteCount >= 2) {
- chars[posn++] =
- ((char)((((int)(bytes[byteIndex])) << 8) |
- ((int)(bytes[byteIndex + 1]))));
- byteIndex += 2;
- byteCount -= 2;
- }
- } else {
- while (byteCount >= 2) {
- chars[posn++] =
- ((char)((((int)(bytes[byteIndex + 1])) << 8) |
- ((int)(bytes[byteIndex]))));
- byteIndex += 2;
- byteCount -= 2;
- }
- }
- return posn - charIndex;
+ CopyChars (bytes, (byte*) chars, byteCount, bigEndian);
+ return count;
}
// Get the maximum number of bytes needed to encode a
return base.GetHashCode ();
}
-#if !ECMA_COMPAT
-
- // Get the mail body name for this encoding.
- public override String BodyName
- {
- get {
- if (bigEndian) {
- return "unicodeFFFE";
- } else {
- return "utf-16";
- }
- }
- }
-
- // Get the human-readable name for this encoding.
- public override String EncodingName
- {
- get {
- if (bigEndian) {
- return "Unicode (Big-Endian)";
- } else {
- return "Unicode";
- }
- }
- }
-
- // Get the mail agent header name for this encoding.
- public override String HeaderName
- {
- get {
- if (bigEndian) {
- return "unicodeFFFE";
- } else {
- return "utf-16";
- }
- }
- }
-
- // Determine if this encoding can be saved from a Web browser.
- public override bool IsBrowserSave
+ private unsafe static void CopyChars (byte* src, byte* dest, int count, bool bigEndian)
{
- get {
- return !bigEndian;
- }
- }
-
- // Get the IANA-preferred Web name for this encoding.
- public override String WebName
- {
- get {
- if (bigEndian) {
- return "unicodeFFFE";
- } else {
- return "utf-16";
- }
- }
+ if (BitConverter.IsLittleEndian != bigEndian) {
+ string.memcpy (dest, src, count & unchecked ((int) 0xFFFFFFFE));
+ return;
+ }
+
+ switch (count) {
+ case 0:
+ return;
+ case 1:
+ return;
+ case 2:
+ goto Count2;
+ case 3:
+ goto Count2;
+ case 4:
+ goto Count4;
+ case 5:
+ goto Count4;
+ case 6:
+ goto Count4;
+ case 7:
+ goto Count4;
+ case 8:
+ goto Count8;
+ case 9:
+ goto Count8;
+ case 10:
+ goto Count8;
+ case 11:
+ goto Count8;
+ case 12:
+ goto Count8;
+ case 13:
+ goto Count8;
+ case 14:
+ goto Count8;
+ case 15:
+ goto Count8;
+ }
+
+ do {
+ dest [0] = src [1];
+ dest [1] = src [0];
+ dest [2] = src [3];
+ dest [3] = src [2];
+ dest [4] = src [5];
+ dest [5] = src [4];
+ dest [6] = src [7];
+ dest [7] = src [6];
+ dest [8] = src [9];
+ dest [9] = src [8];
+ dest [10] = src [11];
+ dest [11] = src [10];
+ dest [12] = src [13];
+ dest [13] = src [12];
+ dest [14] = src [15];
+ dest [15] = src [14];
+ dest += 16;
+ src += 16;
+ count -= 16;
+ } while ((count & unchecked ((int) 0xFFFFFFF0)) != 0);
+
+ switch (count) {
+ case 0:
+ return;
+ case 1:
+ return;
+ case 2:
+ goto Count2;
+ case 3:
+ goto Count2;
+ case 4:
+ goto Count4;
+ case 5:
+ goto Count4;
+ case 6:
+ goto Count4;
+ case 7:
+ goto Count4;
+ }
+
+ Count8:;
+ dest [0] = src [1];
+ dest [1] = src [0];
+ dest [2] = src [3];
+ dest [3] = src [2];
+ dest [4] = src [5];
+ dest [5] = src [4];
+ dest [6] = src [7];
+ dest [7] = src [6];
+ dest += 8;
+ src += 8;
+
+ if ((count & 4) == 0)
+ goto TestCount2;
+ Count4:;
+ dest [0] = src [1];
+ dest [1] = src [0];
+ dest [2] = src [3];
+ dest [3] = src [2];
+ dest += 4;
+ src += 4;
+
+ TestCount2:;
+ if ((count & 2) == 0)
+ return;
+ Count2:;
+ dest [0] = src [1];
+ dest [1] = src [0];
}
- // Get the Windows code page represented by this object.
- public override int WindowsCodePage
- {
- get {
- // Windows reports the same code page number for
- // both the little-endian and big-endian forms.
- return UNICODE_CODE_PAGE;
- }
- }
-
-#endif // !ECMA_COMPAT
-
// Unicode decoder implementation.
private sealed class UnicodeDecoder : Decoder
{
return count / 2;
}
}
- public override int GetChars (byte[] bytes, int byteIndex,
- int byteCount, char[] chars,
- int charIndex)
+
+ public unsafe override int GetChars (byte [] bytes, int byteIndex,
+ int byteCount, char [] chars,
+ int charIndex)
{
if (bytes == null) {
throw new ArgumentNullException ("bytes");
throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
}
- // Convert the characters.
- int posn = charIndex;
- bool isBigEndian = bigEndian;
+ if (byteCount == 0)
+ return 0;
+
int leftOver = leftOverByte;
- int length = chars.Length;
- char ch;
- while (byteCount > 0) {
- if (leftOver != -1) {
- if (isBigEndian) {
- ch = ((char)((leftOver << 8) | ((int)(bytes[byteIndex]))));
- } else {
- ch = ((char)(leftOver |
- (((int)(bytes[byteIndex])) << 8)));
- }
- leftOver = -1;
- ++byteIndex;
- --byteCount;
- } else if (byteCount > 1) {
- if (isBigEndian) {
- ch = ((char)((((int)(bytes[byteIndex])) << 8) |
- ((int)(bytes[byteIndex + 1]))));
- } else {
- ch = ((char)((((int)(bytes[byteIndex + 1])) << 8) |
- ((int)(bytes[byteIndex]))));
- }
- byteIndex += 2;
- byteCount -= 2;
- } else {
- leftOver = (int)(bytes[byteIndex]);
- break;
- }
- if (ch == '\uFFFE') {
- // Switch byte orders.
- bigEndian = !bigEndian;
- } else if (ch != '\uFEFF') {
- // Ordinary character.
- if (posn < length) {
- chars[posn++] = ch;
- } else {
- throw new ArgumentException (_("Arg_InsufficientSpace"));
- }
- }
+ int count;
+
+ if (leftOver != -1)
+ count = (byteCount + 1) / 2;
+ else
+ count = byteCount / 2;
+
+ if (chars.Length - charIndex < count)
+ throw new ArgumentException (_("Arg_InsufficientSpace"));
+
+ if (leftOver != -1) {
+ if (bigEndian)
+ chars [charIndex] = unchecked ((char) ((leftOver << 8) | (int) bytes [byteIndex]));
+ else
+ chars [charIndex] = unchecked ((char) (((int) bytes [byteIndex] << 8) | leftOver));
+ charIndex++;
+ byteIndex++;
+ byteCount--;
}
- leftOverByte = leftOver;
- bigEndian = isBigEndian;
- // Finished - return the converted length.
- return posn - charIndex;
+ if ((byteCount & unchecked ((int) 0xFFFFFFFE)) != 0)
+ fixed (byte* bytePtr = bytes)
+ fixed (char* charPtr = chars)
+ CopyChars (bytePtr + byteIndex, (byte*) (charPtr + charIndex), byteCount, bigEndian);
+
+ if ((byteCount & 1) == 0)
+ leftOverByte = -1;
+ else
+ leftOverByte = bytes [byteCount + byteIndex - 1];
+
+ return count;
}
} // class UnicodeDecoder