2 * UnicodeEncoding.cs - Implementation of the
3 * "System.Text.UnicodeEncoding" class.
5 * Copyright (c) 2001, 2002 Southern Storm Software, Pty Ltd
6 * Copyright (C) 2003, 2004 Novell, Inc.
7 * Copyright (C) 2006 Kornél Pál <http://www.kornelpal.hu/>
9 * Permission is hereby granted, free of charge, to any person obtaining
10 * a copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
23 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
24 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 * OTHER DEALINGS IN THE SOFTWARE.
34 [MonoTODO ("Fix serialization compatibility with MS.NET")]
35 public class UnicodeEncoding : Encoding
37 // Magic numbers used by Windows for Unicode.
38 internal const int UNICODE_CODE_PAGE = 1200;
39 internal const int BIG_UNICODE_CODE_PAGE = 1201;
42 // Size of characters in this encoding.
43 public const int CharSize = 2;
47 private bool bigEndian;
48 private bool byteOrderMark;
51 public UnicodeEncoding () : this (false, true)
56 public UnicodeEncoding (bool bigEndian, bool byteOrderMark)
57 : base ((bigEndian ? BIG_UNICODE_CODE_PAGE : UNICODE_CODE_PAGE))
59 this.bigEndian = bigEndian;
60 this.byteOrderMark = byteOrderMark;
63 body_name = "unicodeFFFE";
64 encoding_name = "Unicode (Big-Endian)";
65 header_name = "unicodeFFFE";
66 is_browser_save = false;
67 web_name = "unicodeFFFE";
70 encoding_name = "Unicode";
71 header_name = "utf-16";
72 is_browser_save = true;
76 // Windows reports the same code page number for
77 // both the little-endian and big-endian forms.
78 windows_code_page = UNICODE_CODE_PAGE;
81 // Get the number of bytes needed to encode a character buffer.
82 public override int GetByteCount (char[] chars, int index, int count)
85 throw new ArgumentNullException ("chars");
87 if (index < 0 || index > chars.Length) {
88 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
90 if (count < 0 || count > (chars.Length - index)) {
91 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
96 public override int GetByteCount (String s)
99 throw new ArgumentNullException ("s");
105 [CLSCompliantAttribute (false)]
106 public unsafe override int GetByteCount (char* chars, int count)
109 throw new ArgumentNullException ("chars");
111 throw new ArgumentOutOfRangeException ("count");
117 // Get the bytes that result from encoding a character buffer.
118 public unsafe override int GetBytes (char [] chars, int charIndex, int charCount,
119 byte [] bytes, int byteIndex)
122 throw new ArgumentNullException ("chars");
125 throw new ArgumentNullException ("bytes");
127 if (charIndex < 0 || charIndex > chars.Length) {
128 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
130 if (charCount < 0 || charCount > (chars.Length - charIndex)) {
131 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
133 if (byteIndex < 0 || byteIndex > bytes.Length) {
134 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
140 int byteCount = bytes.Length - byteIndex;
141 if (bytes.Length == 0)
142 bytes = new byte [1];
144 fixed (char* charPtr = chars)
145 fixed (byte* bytePtr = bytes)
146 return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
150 public override byte [] GetBytes (String s)
153 throw new ArgumentNullException ("s");
155 int byteCount = GetByteCount (s);
156 byte [] bytes = new byte [byteCount];
158 GetBytes (s, 0, s.Length, bytes, 0);
164 public unsafe override int GetBytes (String s, int charIndex, int charCount,
165 byte [] bytes, int byteIndex)
168 throw new ArgumentNullException ("s");
171 throw new ArgumentNullException ("bytes");
173 if (charIndex < 0 || charIndex > s.Length) {
174 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
176 if (charCount < 0 || charCount > (s.Length - charIndex)) {
177 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
179 if (byteIndex < 0 || byteIndex > bytes.Length) {
180 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
187 int byteCount = bytes.Length - byteIndex;
188 if (bytes.Length == 0)
189 bytes = new byte [1];
191 fixed (char* charPtr = s)
192 fixed (byte* bytePtr = bytes)
193 return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
197 [CLSCompliantAttribute (false)]
198 public unsafe override int GetBytes (char* chars, int charCount,
199 byte* bytes, int byteCount)
202 throw new ArgumentNullException ("bytes");
204 throw new ArgumentNullException ("chars");
206 throw new ArgumentOutOfRangeException ("charCount");
208 throw new ArgumentOutOfRangeException ("byteCount");
210 return GetBytesInternal (chars, charCount, bytes, byteCount);
214 private unsafe int GetBytesInternal (char* chars, int charCount,
215 byte* bytes, int byteCount)
217 int count = charCount * 2;
219 if (byteCount < count)
220 throw new ArgumentException (_("Arg_InsufficientSpace"));
222 CopyChars ((byte*) chars, bytes, count, bigEndian);
226 // Get the number of characters needed to decode a byte buffer.
227 public override int GetCharCount (byte[] bytes, int index, int count)
230 throw new ArgumentNullException ("bytes");
232 if (index < 0 || index > bytes.Length) {
233 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
235 if (count < 0 || count > (bytes.Length - index)) {
236 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
242 [CLSCompliantAttribute (false)]
243 public unsafe override int GetCharCount (byte* bytes, int count)
246 throw new ArgumentNullException ("bytes");
248 throw new ArgumentOutOfRangeException ("count");
254 // Get the characters that result from decoding a byte buffer.
255 public unsafe override int GetChars (byte [] bytes, int byteIndex, int byteCount,
256 char [] chars, int charIndex)
259 throw new ArgumentNullException ("bytes");
262 throw new ArgumentNullException ("chars");
264 if (byteIndex < 0 || byteIndex > bytes.Length) {
265 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
267 if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
268 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
270 if (charIndex < 0 || charIndex > chars.Length) {
271 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
277 int charCount = chars.Length - charIndex;
278 if (chars.Length == 0)
279 chars = new char [1];
281 fixed (byte* bytePtr = bytes)
282 fixed (char* charPtr = chars)
283 return GetCharsInternal (bytePtr + byteIndex, byteCount, charPtr + charIndex, charCount);
287 [CLSCompliantAttribute (false)]
288 public unsafe override int GetChars (byte* bytes, int byteCount,
289 char* chars, int charCount)
292 throw new ArgumentNullException ("bytes");
294 throw new ArgumentNullException ("chars");
296 throw new ArgumentOutOfRangeException ("charCount");
298 throw new ArgumentOutOfRangeException ("byteCount");
300 return GetCharsInternal (bytes, byteCount, chars, charCount);
304 // Decode a buffer of bytes into a string.
305 public unsafe override String GetString (byte [] bytes, int index, int count)
308 throw new ArgumentNullException ("bytes");
309 if (index < 0 || index > bytes.Length)
310 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
311 if (count < 0 || count > (bytes.Length - index))
312 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
317 // GetCharCountInternal
318 int charCount = count / 2;
319 string s = string.InternalAllocateStr (charCount);
321 fixed (byte* bytePtr = bytes)
322 fixed (char* charPtr = s)
323 GetCharsInternal (bytePtr + index, count, charPtr, charCount);
328 private unsafe int GetCharsInternal (byte* bytes, int byteCount,
329 char* chars, int charCount)
331 int count = byteCount / 2;
334 // Determine the byte order in the incoming buffer.
337 if (bytes [0] == (byte) 0xFE && bytes [1] == (byte) 0xFF)
339 else if (bytes [0] == (byte) 0xFF && bytes [1] == (byte) 0xFE)
342 isBigEndian = bigEndian;
344 isBigEndian = bigEndian;
347 // Validate that we have sufficient space in "chars".
348 if (charCount < count)
349 throw new ArgumentException (_("Arg_InsufficientSpace"));
351 CopyChars (bytes, (byte*) chars, byteCount, isBigEndian);
355 // Get the maximum number of bytes needed to encode a
356 // specified number of characters.
357 public override int GetMaxByteCount (int charCount)
360 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
362 return charCount * 2;
365 // Get the maximum number of characters needed to decode a
366 // specified number of bytes.
367 public override int GetMaxCharCount (int byteCount)
370 throw new ArgumentOutOfRangeException
371 ("byteCount", _("ArgRange_NonNegative"));
373 return byteCount / 2;
376 // Get a Unicode-specific decoder that is attached to this instance.
377 public override Decoder GetDecoder ()
379 return new UnicodeDecoder (bigEndian);
382 // Get the Unicode preamble.
383 public override byte[] GetPreamble ()
386 byte[] preamble = new byte[2];
388 preamble[0] = (byte)0xFE;
389 preamble[1] = (byte)0xFF;
391 preamble[0] = (byte)0xFF;
392 preamble[1] = (byte)0xFE;
400 // Determine if this object is equal to another.
401 public override bool Equals (Object value)
403 UnicodeEncoding enc = (value as UnicodeEncoding);
405 return (codePage == enc.codePage &&
406 bigEndian == enc.bigEndian &&
407 byteOrderMark == enc.byteOrderMark);
413 // Get the hash code for this object.
414 public override int GetHashCode ()
416 return base.GetHashCode ();
419 private unsafe static void CopyChars (byte* src, byte* dest, int count, bool bigEndian)
421 if (BitConverter.IsLittleEndian != bigEndian) {
422 string.memcpy (dest, src, count & unchecked ((int) 0xFFFFFFFE));
472 dest [10] = src [11];
473 dest [11] = src [10];
474 dest [12] = src [13];
475 dest [13] = src [12];
476 dest [14] = src [15];
477 dest [15] = src [14];
481 } while ((count & unchecked ((int) 0xFFFFFFF0)) != 0);
514 if ((count & 4) == 0)
525 if ((count & 2) == 0)
532 // Unicode decoder implementation.
533 private sealed class UnicodeDecoder : Decoder
535 private bool bigEndian;
536 private int leftOverByte;
539 public UnicodeDecoder (bool bigEndian)
541 this.bigEndian = bigEndian;
545 // Override inherited methods.
546 public override int GetCharCount (byte[] bytes, int index, int count)
549 throw new ArgumentNullException ("bytes");
551 if (index < 0 || index > bytes.Length) {
552 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
554 if (count < 0 || count > (bytes.Length - index)) {
555 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
557 if (leftOverByte != -1) {
558 return (count + 1) / 2;
564 public unsafe override int GetChars (byte [] bytes, int byteIndex,
565 int byteCount, char [] chars,
569 throw new ArgumentNullException ("bytes");
572 throw new ArgumentNullException ("chars");
574 if (byteIndex < 0 || byteIndex > bytes.Length) {
575 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
577 if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
578 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
580 if (charIndex < 0 || charIndex > chars.Length) {
581 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
587 bool isBigEndian = bigEndian;
588 int leftOver = leftOverByte;
592 count = (byteCount + 1) / 2;
594 count = byteCount / 2;
596 if (chars.Length - charIndex < count)
597 throw new ArgumentException (_("Arg_InsufficientSpace"));
599 if (leftOver != -1) {
601 chars [charIndex] = unchecked ((char) ((leftOver << 8) | (int) bytes [byteIndex]));
603 chars [charIndex] = unchecked ((char) (((int) bytes [byteIndex] << 8) | leftOver));
609 if ((byteCount & unchecked ((int) 0xFFFFFFFE)) != 0)
610 fixed (byte* bytePtr = bytes)
611 fixed (char* charPtr = chars)
612 CopyChars (bytePtr + byteIndex, (byte*) (charPtr + charIndex), byteCount, isBigEndian);
614 if ((byteCount & 1) == 0)
617 leftOverByte = bytes [byteCount + byteIndex - 1];
622 } // class UnicodeDecoder
624 }; // class UnicodeEncoding
626 }; // namespace System.Text