2 * UnicodeEncoding.cs - Implementation of the
3 * "System.Text.UnicodeEncoding" class.
5 * Copyright (c) 2001, 2002 Southern Storm Software, Pty Ltd
6 * Copyright (C) 2003, 2004 Novell, Inc.
7 * Copyright (C) 2006 Kornél Pál <http://www.kornelpal.hu/>
9 * Permission is hereby granted, free of charge, to any person obtaining
10 * a copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
23 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
24 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 * OTHER DEALINGS IN THE SOFTWARE.
32 using System.Runtime.InteropServices;
38 [MonoTODO ("Fix serialization compatibility with MS.NET")]
39 public class UnicodeEncoding : Encoding
41 // Magic numbers used by Windows for Unicode.
42 internal const int UNICODE_CODE_PAGE = 1200;
43 internal const int BIG_UNICODE_CODE_PAGE = 1201;
46 // Size of characters in this encoding.
47 public const int CharSize = 2;
51 private bool bigEndian;
52 private bool byteOrderMark;
55 public UnicodeEncoding () : this (false, true)
60 public UnicodeEncoding (bool bigEndian, bool byteOrderMark)
61 : base ((bigEndian ? BIG_UNICODE_CODE_PAGE : UNICODE_CODE_PAGE))
63 this.bigEndian = bigEndian;
64 this.byteOrderMark = byteOrderMark;
67 body_name = "unicodeFFFE";
68 encoding_name = "Unicode (Big-Endian)";
69 header_name = "unicodeFFFE";
70 is_browser_save = false;
71 web_name = "unicodeFFFE";
74 encoding_name = "Unicode";
75 header_name = "utf-16";
76 is_browser_save = true;
80 // Windows reports the same code page number for
81 // both the little-endian and big-endian forms.
82 windows_code_page = UNICODE_CODE_PAGE;
86 [MonoTODO ("Implement throwOnInvalidBytes")]
87 public UnicodeEncoding (bool bigEndian, bool byteOrderMark, bool throwOnInvalidBytes)
88 : this (bigEndian, byteOrderMark)
93 // Get the number of bytes needed to encode a character buffer.
94 public override int GetByteCount (char[] chars, int index, int count)
97 throw new ArgumentNullException ("chars");
99 if (index < 0 || index > chars.Length) {
100 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
102 if (count < 0 || count > (chars.Length - index)) {
103 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
108 public override int GetByteCount (String s)
111 throw new ArgumentNullException ("s");
117 [CLSCompliantAttribute (false)]
119 public unsafe override int GetByteCount (char* chars, int count)
122 throw new ArgumentNullException ("chars");
124 throw new ArgumentOutOfRangeException ("count");
130 // Get the bytes that result from encoding a character buffer.
131 public unsafe override int GetBytes (char [] chars, int charIndex, int charCount,
132 byte [] bytes, int byteIndex)
135 throw new ArgumentNullException ("chars");
138 throw new ArgumentNullException ("bytes");
140 if (charIndex < 0 || charIndex > chars.Length) {
141 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
143 if (charCount < 0 || charCount > (chars.Length - charIndex)) {
144 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
146 if (byteIndex < 0 || byteIndex > bytes.Length) {
147 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
153 int byteCount = bytes.Length - byteIndex;
154 if (bytes.Length == 0)
155 bytes = new byte [1];
157 fixed (char* charPtr = chars)
158 fixed (byte* bytePtr = bytes)
159 return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
163 public override byte [] GetBytes (String s)
166 throw new ArgumentNullException ("s");
168 int byteCount = GetByteCount (s);
169 byte [] bytes = new byte [byteCount];
171 GetBytes (s, 0, s.Length, bytes, 0);
177 public unsafe override int GetBytes (String s, int charIndex, int charCount,
178 byte [] bytes, int byteIndex)
181 throw new ArgumentNullException ("s");
184 throw new ArgumentNullException ("bytes");
186 if (charIndex < 0 || charIndex > s.Length) {
187 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
189 if (charCount < 0 || charCount > (s.Length - charIndex)) {
190 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
192 if (byteIndex < 0 || byteIndex > bytes.Length) {
193 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
200 int byteCount = bytes.Length - byteIndex;
201 if (bytes.Length == 0)
202 bytes = new byte [1];
204 fixed (char* charPtr = s)
205 fixed (byte* bytePtr = bytes)
206 return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
210 [CLSCompliantAttribute (false)]
212 public unsafe override int GetBytes (char* chars, int charCount,
213 byte* bytes, int byteCount)
216 throw new ArgumentNullException ("bytes");
218 throw new ArgumentNullException ("chars");
220 throw new ArgumentOutOfRangeException ("charCount");
222 throw new ArgumentOutOfRangeException ("byteCount");
224 return GetBytesInternal (chars, charCount, bytes, byteCount);
228 private unsafe int GetBytesInternal (char* chars, int charCount,
229 byte* bytes, int byteCount)
231 int count = charCount * 2;
233 if (byteCount < count)
234 throw new ArgumentException (_("Arg_InsufficientSpace"));
236 CopyChars ((byte*) chars, bytes, count, bigEndian);
240 // Get the number of characters needed to decode a byte buffer.
241 public override int GetCharCount (byte[] bytes, int index, int count)
244 throw new ArgumentNullException ("bytes");
246 if (index < 0 || index > bytes.Length) {
247 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
249 if (count < 0 || count > (bytes.Length - index)) {
250 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
256 [CLSCompliantAttribute (false)]
258 public unsafe override int GetCharCount (byte* bytes, int count)
261 throw new ArgumentNullException ("bytes");
263 throw new ArgumentOutOfRangeException ("count");
269 // Get the characters that result from decoding a byte buffer.
270 public unsafe override int GetChars (byte [] bytes, int byteIndex, int byteCount,
271 char [] chars, int charIndex)
274 throw new ArgumentNullException ("bytes");
277 throw new ArgumentNullException ("chars");
279 if (byteIndex < 0 || byteIndex > bytes.Length) {
280 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
282 if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
283 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
285 if (charIndex < 0 || charIndex > chars.Length) {
286 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
292 int charCount = chars.Length - charIndex;
293 if (chars.Length == 0)
294 chars = new char [1];
296 fixed (byte* bytePtr = bytes)
297 fixed (char* charPtr = chars)
298 return GetCharsInternal (bytePtr + byteIndex, byteCount, charPtr + charIndex, charCount);
302 [CLSCompliantAttribute (false)]
304 public unsafe override int GetChars (byte* bytes, int byteCount,
305 char* chars, int charCount)
308 throw new ArgumentNullException ("bytes");
310 throw new ArgumentNullException ("chars");
312 throw new ArgumentOutOfRangeException ("charCount");
314 throw new ArgumentOutOfRangeException ("byteCount");
316 return GetCharsInternal (bytes, byteCount, chars, charCount);
320 // Decode a buffer of bytes into a string.
322 public unsafe override String GetString (byte [] bytes, int index, int count)
325 throw new ArgumentNullException ("bytes");
326 if (index < 0 || index > bytes.Length)
327 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
328 if (count < 0 || count > (bytes.Length - index))
329 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
334 // GetCharCountInternal
335 int charCount = count / 2;
336 string s = string.InternalAllocateStr (charCount);
338 fixed (byte* bytePtr = bytes)
339 fixed (char* charPtr = s)
340 GetCharsInternal (bytePtr + index, count, charPtr, charCount);
345 private unsafe int GetCharsInternal (byte* bytes, int byteCount,
346 char* chars, int charCount)
348 int count = byteCount / 2;
350 // Validate that we have sufficient space in "chars".
351 if (charCount < count)
352 throw new ArgumentException (_("Arg_InsufficientSpace"));
354 CopyChars (bytes, (byte*) chars, byteCount, bigEndian);
358 // Get the maximum number of bytes needed to encode a
359 // specified number of characters.
360 public override int GetMaxByteCount (int charCount)
363 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
365 return charCount * 2;
368 // Get the maximum number of characters needed to decode a
369 // specified number of bytes.
370 public override int GetMaxCharCount (int byteCount)
373 throw new ArgumentOutOfRangeException
374 ("byteCount", _("ArgRange_NonNegative"));
376 return byteCount / 2;
379 // Get a Unicode-specific decoder that is attached to this instance.
380 public override Decoder GetDecoder ()
382 return new UnicodeDecoder (bigEndian);
385 // Get the Unicode preamble.
386 public override byte[] GetPreamble ()
389 byte[] preamble = new byte[2];
391 preamble[0] = (byte)0xFE;
392 preamble[1] = (byte)0xFF;
394 preamble[0] = (byte)0xFF;
395 preamble[1] = (byte)0xFE;
403 // Determine if this object is equal to another.
404 public override bool Equals (Object value)
406 UnicodeEncoding enc = (value as UnicodeEncoding);
408 return (codePage == enc.codePage &&
409 bigEndian == enc.bigEndian &&
410 byteOrderMark == enc.byteOrderMark);
416 // Get the hash code for this object.
417 public override int GetHashCode ()
419 return base.GetHashCode ();
422 private unsafe static void CopyChars (byte* src, byte* dest, int count, bool bigEndian)
424 if (BitConverter.IsLittleEndian != bigEndian) {
425 string.memcpy (dest, src, count & unchecked ((int) 0xFFFFFFFE));
475 dest [10] = src [11];
476 dest [11] = src [10];
477 dest [12] = src [13];
478 dest [13] = src [12];
479 dest [14] = src [15];
480 dest [15] = src [14];
484 } while ((count & unchecked ((int) 0xFFFFFFF0)) != 0);
517 if ((count & 4) == 0)
528 if ((count & 2) == 0)
535 // Unicode decoder implementation.
536 private sealed class UnicodeDecoder : Decoder
538 private bool bigEndian;
539 private int leftOverByte;
542 public UnicodeDecoder (bool bigEndian)
544 this.bigEndian = bigEndian;
548 // Override inherited methods.
549 public override int GetCharCount (byte[] bytes, int index, int count)
552 throw new ArgumentNullException ("bytes");
554 if (index < 0 || index > bytes.Length) {
555 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
557 if (count < 0 || count > (bytes.Length - index)) {
558 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
560 if (leftOverByte != -1) {
561 return (count + 1) / 2;
567 public unsafe override int GetChars (byte [] bytes, int byteIndex,
568 int byteCount, char [] chars,
572 throw new ArgumentNullException ("bytes");
575 throw new ArgumentNullException ("chars");
577 if (byteIndex < 0 || byteIndex > bytes.Length) {
578 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
580 if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
581 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
583 if (charIndex < 0 || charIndex > chars.Length) {
584 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
590 int leftOver = leftOverByte;
594 count = (byteCount + 1) / 2;
596 count = byteCount / 2;
598 if (chars.Length - charIndex < count)
599 throw new ArgumentException (_("Arg_InsufficientSpace"));
601 if (leftOver != -1) {
603 chars [charIndex] = unchecked ((char) ((leftOver << 8) | (int) bytes [byteIndex]));
605 chars [charIndex] = unchecked ((char) (((int) bytes [byteIndex] << 8) | leftOver));
611 if ((byteCount & unchecked ((int) 0xFFFFFFFE)) != 0)
612 fixed (byte* bytePtr = bytes)
613 fixed (char* charPtr = chars)
614 CopyChars (bytePtr + byteIndex, (byte*) (charPtr + charIndex), byteCount, bigEndian);
616 if ((byteCount & 1) == 0)
619 leftOverByte = bytes [byteCount + byteIndex - 1];
624 } // class UnicodeDecoder
626 }; // class UnicodeEncoding
628 }; // namespace System.Text