2 * UnicodeEncoding.cs - Implementation of the
3 * "System.Text.UnicodeEncoding" class.
5 * Copyright (c) 2001, 2002 Southern Storm Software, Pty Ltd
6 * Copyright (C) 2003, 2004 Novell, Inc.
7 * Copyright (C) 2006 Kornél Pál <http://www.kornelpal.hu/>
9 * Permission is hereby granted, free of charge, to any person obtaining
10 * a copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
23 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
24 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 * OTHER DEALINGS IN THE SOFTWARE.
34 [MonoTODO ("Fix serialization compatibility with MS.NET")]
35 public class UnicodeEncoding : Encoding
37 // Magic numbers used by Windows for Unicode.
38 internal const int UNICODE_CODE_PAGE = 1200;
39 internal const int BIG_UNICODE_CODE_PAGE = 1201;
42 // Size of characters in this encoding.
43 public const int CharSize = 2;
47 private bool bigEndian;
48 private bool byteOrderMark;
51 public UnicodeEncoding () : this (false, true)
56 public UnicodeEncoding (bool bigEndian, bool byteOrderMark)
57 : base ((bigEndian ? BIG_UNICODE_CODE_PAGE : UNICODE_CODE_PAGE))
59 this.bigEndian = bigEndian;
60 this.byteOrderMark = byteOrderMark;
63 body_name = "unicodeFFFE";
64 encoding_name = "Unicode (Big-Endian)";
65 header_name = "unicodeFFFE";
66 is_browser_save = false;
67 web_name = "unicodeFFFE";
70 encoding_name = "Unicode";
71 header_name = "utf-16";
72 is_browser_save = true;
76 // Windows reports the same code page number for
77 // both the little-endian and big-endian forms.
78 windows_code_page = UNICODE_CODE_PAGE;
81 // Get the number of bytes needed to encode a character buffer.
82 public override int GetByteCount (char[] chars, int index, int count)
85 throw new ArgumentNullException ("chars");
87 if (index < 0 || index > chars.Length) {
88 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
90 if (count < 0 || count > (chars.Length - index)) {
91 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
96 public override int GetByteCount (String s)
99 throw new ArgumentNullException ("s");
105 [CLSCompliantAttribute (false)]
106 public unsafe override int GetByteCount (char* chars, int count)
109 throw new ArgumentNullException ("chars");
111 throw new ArgumentOutOfRangeException ("count");
117 // Get the bytes that result from encoding a character buffer.
118 public unsafe override int GetBytes (char [] chars, int charIndex, int charCount,
119 byte [] bytes, int byteIndex)
122 throw new ArgumentNullException ("chars");
125 throw new ArgumentNullException ("bytes");
127 if (charIndex < 0 || charIndex > chars.Length) {
128 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
130 if (charCount < 0 || charCount > (chars.Length - charIndex)) {
131 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
133 if (byteIndex < 0 || byteIndex > bytes.Length) {
134 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
140 int byteCount = bytes.Length - byteIndex;
141 if (bytes.Length == 0)
142 bytes = new byte [1];
144 fixed (char* charPtr = chars)
145 fixed (byte* bytePtr = bytes)
146 return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
150 public unsafe override byte [] GetBytes (String s)
153 throw new ArgumentNullException ("s");
155 int byteCount = GetByteCount (s);
156 byte [] bytes = new byte [byteCount];
159 fixed (char* charPtr = s)
160 fixed (byte* bytePtr = bytes)
161 GetBytesInternal (charPtr, s.Length, bytePtr, byteCount);
167 public unsafe override int GetBytes (String s, int charIndex, int charCount,
168 byte [] bytes, int byteIndex)
171 throw new ArgumentNullException ("s");
174 throw new ArgumentNullException ("bytes");
176 if (charIndex < 0 || charIndex > s.Length) {
177 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
179 if (charCount < 0 || charCount > (s.Length - charIndex)) {
180 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
182 if (byteIndex < 0 || byteIndex > bytes.Length) {
183 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
186 int byteCount = bytes.Length - byteIndex;
187 if (bytes.Length == 0)
188 bytes = new byte [1];
190 fixed (char* charPtr = s)
191 fixed (byte* bytePtr = bytes)
192 return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
196 [CLSCompliantAttribute (false)]
197 public unsafe override int GetBytes (char* chars, int charCount,
198 byte* bytes, int byteCount)
201 throw new ArgumentNullException ("bytes");
203 throw new ArgumentNullException ("chars");
205 throw new ArgumentOutOfRangeException ("charCount");
207 throw new ArgumentOutOfRangeException ("byteCount");
209 return GetBytesInternal (chars, charCount, bytes, byteCount);
213 private unsafe int GetBytesInternal (char* chars, int charCount,
214 byte* bytes, int byteCount)
216 int count = charCount * 2;
218 if (byteCount < count)
219 throw new ArgumentException (_("Arg_InsufficientSpace"));
221 CopyChars ((byte*) chars, bytes, count, bigEndian);
225 // Get the number of characters needed to decode a byte buffer.
226 public override int GetCharCount (byte[] bytes, int index, int count)
229 throw new ArgumentNullException ("bytes");
231 if (index < 0 || index > bytes.Length) {
232 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
234 if (count < 0 || count > (bytes.Length - index)) {
235 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
241 [CLSCompliantAttribute (false)]
242 public unsafe override int GetCharCount (byte* bytes, int count)
245 throw new ArgumentNullException ("bytes");
247 throw new ArgumentOutOfRangeException ("count");
253 // Get the characters that result from decoding a byte buffer.
254 public unsafe override int GetChars (byte [] bytes, int byteIndex, int byteCount,
255 char [] chars, int charIndex)
258 throw new ArgumentNullException ("bytes");
261 throw new ArgumentNullException ("chars");
263 if (byteIndex < 0 || byteIndex > bytes.Length) {
264 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
266 if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
267 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
269 if (charIndex < 0 || charIndex > chars.Length) {
270 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
276 int charCount = chars.Length - charIndex;
277 if (chars.Length == 0)
278 chars = new char [1];
280 fixed (byte* bytePtr = bytes)
281 fixed (char* charPtr = chars)
282 return GetCharsInternal (bytePtr + byteIndex, byteCount, charPtr + charIndex, charCount);
286 [CLSCompliantAttribute (false)]
287 public unsafe override int GetChars (byte* bytes, int byteCount,
288 char* chars, int charCount)
291 throw new ArgumentNullException ("bytes");
293 throw new ArgumentNullException ("chars");
295 throw new ArgumentOutOfRangeException ("charCount");
297 throw new ArgumentOutOfRangeException ("byteCount");
299 return GetCharsInternal (bytes, byteCount, chars, charCount);
303 private unsafe int GetCharsInternal (byte* bytes, int byteCount,
304 char* chars, int charCount)
306 int count = byteCount / 2;
309 // Determine the byte order in the incoming buffer.
312 if (bytes [0] == (byte) 0xFE && bytes [1] == (byte) 0xFF)
314 else if (bytes [0] == (byte) 0xFF && bytes [1] == (byte) 0xFE)
317 isBigEndian = bigEndian;
319 isBigEndian = bigEndian;
322 // Validate that we have sufficient space in "chars".
323 if (charCount < count)
324 throw new ArgumentException (_("Arg_InsufficientSpace"));
326 CopyChars (bytes, (byte*) chars, byteCount, isBigEndian);
330 // Get the maximum number of bytes needed to encode a
331 // specified number of characters.
332 public override int GetMaxByteCount (int charCount)
335 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
337 return charCount * 2;
340 // Get the maximum number of characters needed to decode a
341 // specified number of bytes.
342 public override int GetMaxCharCount (int byteCount)
345 throw new ArgumentOutOfRangeException
346 ("byteCount", _("ArgRange_NonNegative"));
348 return byteCount / 2;
351 // Get a Unicode-specific decoder that is attached to this instance.
352 public override Decoder GetDecoder ()
354 return new UnicodeDecoder (bigEndian);
357 // Get the Unicode preamble.
358 public override byte[] GetPreamble ()
361 byte[] preamble = new byte[2];
363 preamble[0] = (byte)0xFE;
364 preamble[1] = (byte)0xFF;
366 preamble[0] = (byte)0xFF;
367 preamble[1] = (byte)0xFE;
375 // Determine if this object is equal to another.
376 public override bool Equals (Object value)
378 UnicodeEncoding enc = (value as UnicodeEncoding);
380 return (codePage == enc.codePage &&
381 bigEndian == enc.bigEndian &&
382 byteOrderMark == enc.byteOrderMark);
388 // Get the hash code for this object.
389 public override int GetHashCode ()
391 return base.GetHashCode ();
394 private unsafe static void CopyChars (byte* src, byte* dest, int count, bool bigEndian)
396 if (BitConverter.IsLittleEndian != bigEndian) {
397 string.memcpy (dest, src, count & unchecked ((int) 0xFFFFFFFE));
447 dest [10] = src [11];
448 dest [11] = src [10];
449 dest [12] = src [13];
450 dest [13] = src [12];
451 dest [14] = src [15];
452 dest [15] = src [14];
456 } while ((count & unchecked ((int) 0xFFFFFFF0)) != 0);
489 if ((count & 4) == 0)
500 if ((count & 2) == 0)
507 // Unicode decoder implementation.
508 private sealed class UnicodeDecoder : Decoder
510 private bool bigEndian;
511 private int leftOverByte;
514 public UnicodeDecoder (bool bigEndian)
516 this.bigEndian = bigEndian;
520 // Override inherited methods.
521 public override int GetCharCount (byte[] bytes, int index, int count)
524 throw new ArgumentNullException ("bytes");
526 if (index < 0 || index > bytes.Length) {
527 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
529 if (count < 0 || count > (bytes.Length - index)) {
530 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
532 if (leftOverByte != -1) {
533 return (count + 1) / 2;
539 public unsafe override int GetChars (byte [] bytes, int byteIndex,
540 int byteCount, char [] chars,
544 throw new ArgumentNullException ("bytes");
547 throw new ArgumentNullException ("chars");
549 if (byteIndex < 0 || byteIndex > bytes.Length) {
550 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
552 if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
553 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
555 if (charIndex < 0 || charIndex > chars.Length) {
556 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
562 bool isBigEndian = bigEndian;
563 int leftOver = leftOverByte;
567 count = (byteCount + 1) / 2;
569 count = byteCount / 2;
571 if (chars.Length - charIndex < count)
572 throw new ArgumentException (_("Arg_InsufficientSpace"));
574 if (leftOver != -1) {
576 chars [charIndex] = unchecked ((char) ((leftOver << 8) | (int) bytes [byteIndex]));
578 chars [charIndex] = unchecked ((char) (((int) bytes [byteIndex] << 8) | leftOver));
584 if ((byteCount & unchecked ((int) 0xFFFFFFFE)) != 0)
585 fixed (byte* bytePtr = bytes)
586 fixed (char* charPtr = chars)
587 CopyChars (bytePtr + byteIndex, (byte*) (charPtr + charIndex), byteCount, isBigEndian);
589 if ((byteCount & 1) == 0)
592 leftOverByte = bytes [byteCount + byteIndex - 1];
597 } // class UnicodeDecoder
599 }; // class UnicodeEncoding
601 }; // namespace System.Text