Merge pull request #347 from JamesB7/master
[mono.git] / mcs / class / corlib / System.Text / UnicodeEncoding.cs
1 /*
2  * UnicodeEncoding.cs - Implementation of the
3  *              "System.Text.UnicodeEncoding" class.
4  *
5  * Copyright (c) 2001, 2002  Southern Storm Software, Pty Ltd
6  * Copyright (C) 2003, 2004 Novell, Inc.
7  * Copyright (C) 2006 Kornél Pál <http://www.kornelpal.hu/>
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining
10  * a copy of this software and associated documentation files (the "Software"),
11  * to deal in the Software without restriction, including without limitation
12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13  * and/or sell copies of the Software, and to permit persons to whom the
14  * Software is furnished to do so, subject to the following conditions:
15  *
16  * The above copyright notice and this permission notice shall be included
17  * in all copies or substantial portions of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
23  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
24  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25  * OTHER DEALINGS IN THE SOFTWARE.
26  */
27
28 namespace System.Text
29 {
30
31 using System;
32 using System.Runtime.InteropServices;
33
34 [Serializable]
35 [ComVisible (true)]
36 [MonoLimitation ("Serialization format not compatible with .NET")]
37 public class UnicodeEncoding : Encoding
38 {
39         // Magic numbers used by Windows for Unicode.
40         internal const int UNICODE_CODE_PAGE     = 1200;
41         internal const int BIG_UNICODE_CODE_PAGE = 1201;
42
43 #if !ECMA_COMPAT
44         // Size of characters in this encoding.
45         public const int CharSize = 2;
46 #endif
47
48         // Internal state.
49         private bool bigEndian;
50         private bool byteOrderMark;
51
52         // Constructors.
53         public UnicodeEncoding () : this (false, true)
54         {
55                 bigEndian = false;
56                 byteOrderMark = true;
57         }
58         public UnicodeEncoding (bool bigEndian, bool byteOrderMark)
59                 : this (bigEndian, byteOrderMark, false)
60         {
61         }
62
63         public UnicodeEncoding (bool bigEndian, bool byteOrderMark, bool throwOnInvalidBytes)
64                 : base ((bigEndian ? BIG_UNICODE_CODE_PAGE : UNICODE_CODE_PAGE))
65         {
66                 if (throwOnInvalidBytes)
67                         SetFallbackInternal (null, new DecoderExceptionFallback ());
68                 else
69                         SetFallbackInternal (null, new DecoderReplacementFallback ("\uFFFD"));
70
71                 this.bigEndian = bigEndian;
72                 this.byteOrderMark = byteOrderMark;
73
74                 if (bigEndian){
75                         body_name = "unicodeFFFE";
76                         encoding_name = "Unicode (Big-Endian)";
77                         header_name = "unicodeFFFE";
78                         is_browser_save = false;
79                         web_name = "unicodeFFFE";
80                 } else {
81                         body_name = "utf-16";
82                         encoding_name = "Unicode";
83                         header_name = "utf-16";
84                         is_browser_save = true;
85                         web_name = "utf-16";
86                 }
87                 
88                 // Windows reports the same code page number for
89                 // both the little-endian and big-endian forms.
90                 windows_code_page = UNICODE_CODE_PAGE;
91         }
92
93         // Get the number of bytes needed to encode a character buffer.
94         public override int GetByteCount (char[] chars, int index, int count)
95         {
96                 if (chars == null) {
97                         throw new ArgumentNullException ("chars");
98                 }
99                 if (index < 0 || index > chars.Length) {
100                         throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
101                 }
102                 if (count < 0 || count > (chars.Length - index)) {
103                         throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
104                 }
105                 return count * 2;
106         }
107
108         public override int GetByteCount (String s)
109         {
110                 if (s == null) {
111                         throw new ArgumentNullException ("s");
112                 }
113                 return s.Length * 2;
114         }
115
116         [CLSCompliantAttribute (false)]
117         [ComVisible (false)]
118         public unsafe override int GetByteCount (char* chars, int count)
119         {
120                 if (chars == null)
121                         throw new ArgumentNullException ("chars");
122                 if (count < 0)
123                         throw new ArgumentOutOfRangeException ("count");
124
125                 return count * 2;
126         }
127
128         // Get the bytes that result from encoding a character buffer.
129         public unsafe override int GetBytes (char [] chars, int charIndex, int charCount,
130                                                                                 byte [] bytes, int byteIndex)
131         {
132                 if (chars == null) {
133                         throw new ArgumentNullException ("chars");
134                 }
135                 if (bytes == null) {
136                         throw new ArgumentNullException ("bytes");
137                 }
138                 if (charIndex < 0 || charIndex > chars.Length) {
139                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
140                 }
141                 if (charCount < 0 || charCount > (chars.Length - charIndex)) {
142                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
143                 }
144                 if (byteIndex < 0 || byteIndex > bytes.Length) {
145                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
146                 }
147
148                 if (charCount == 0)
149                         return 0;
150
151                 int byteCount = bytes.Length - byteIndex;
152                 if (bytes.Length == 0)
153                         bytes = new byte [1];
154
155                 fixed (char* charPtr = chars)
156                         fixed (byte* bytePtr = bytes)
157                                 return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
158         }
159
160         public unsafe override int GetBytes (String s, int charIndex, int charCount,
161                                                                                 byte [] bytes, int byteIndex)
162         {
163                 if (s == null) {
164                         throw new ArgumentNullException ("s");
165                 }
166                 if (bytes == null) {
167                         throw new ArgumentNullException ("bytes");
168                 }
169                 if (charIndex < 0 || charIndex > s.Length) {
170                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
171                 }
172                 if (charCount < 0 || charCount > (s.Length - charIndex)) {
173                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
174                 }
175                 if (byteIndex < 0 || byteIndex > bytes.Length) {
176                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
177                 }
178
179                 // For consistency
180                 if (charCount == 0)
181                         return 0;
182
183                 int byteCount = bytes.Length - byteIndex;
184                 if (bytes.Length == 0)
185                         bytes = new byte [1];
186
187                 fixed (char* charPtr = s)
188                         fixed (byte* bytePtr = bytes)
189                                 return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
190         }
191
192         [CLSCompliantAttribute (false)]
193         [ComVisible (false)]
194         public unsafe override int GetBytes (char* chars, int charCount,
195                                                                                 byte* bytes, int byteCount)
196         {
197                 if (bytes == null)
198                         throw new ArgumentNullException ("bytes");
199                 if (chars == null)
200                         throw new ArgumentNullException ("chars");
201                 if (charCount < 0)
202                         throw new ArgumentOutOfRangeException ("charCount");
203                 if (byteCount < 0)
204                         throw new ArgumentOutOfRangeException ("byteCount");
205
206                 return GetBytesInternal (chars, charCount, bytes, byteCount);
207         }
208
209         private unsafe int GetBytesInternal (char* chars, int charCount,
210                                                                                 byte* bytes, int byteCount)
211         {
212                 int count = charCount * 2;
213
214                 if (byteCount < count)
215                         throw new ArgumentException (_("Arg_InsufficientSpace"));
216
217                 CopyChars ((byte*) chars, bytes, count, bigEndian);
218                 return count;
219         }
220
221         // Get the number of characters needed to decode a byte buffer.
222         public override int GetCharCount (byte[] bytes, int index, int count)
223         {
224                 if (bytes == null) {
225                         throw new ArgumentNullException ("bytes");
226                 }
227                 if (index < 0 || index > bytes.Length) {
228                         throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
229                 }
230                 if (count < 0 || count > (bytes.Length - index)) {
231                         throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
232                 }
233                 return count / 2;
234         }
235
236         [CLSCompliantAttribute (false)]
237         [ComVisible (false)]
238         public unsafe override int GetCharCount (byte* bytes, int count)
239         {
240                 if (bytes == null)
241                         throw new ArgumentNullException ("bytes");
242                 if (count < 0)
243                         throw new ArgumentOutOfRangeException ("count");
244
245                 return count / 2;
246         }
247
248         // Get the characters that result from decoding a byte buffer.
249         public unsafe override int GetChars (byte [] bytes, int byteIndex, int byteCount,
250                                                                                 char [] chars, int charIndex)
251         {
252                 if (bytes == null) {
253                         throw new ArgumentNullException ("bytes");
254                 }
255                 if (chars == null) {
256                         throw new ArgumentNullException ("chars");
257                 }
258                 if (byteIndex < 0 || byteIndex > bytes.Length) {
259                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
260                 }
261                 if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
262                         throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
263                 }
264                 if (charIndex < 0 || charIndex > chars.Length) {
265                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
266                 }
267
268                 if (byteCount == 0)
269                         return 0;
270
271                 int charCount = chars.Length - charIndex;
272                 if (chars.Length == 0)
273                         chars = new char [1];
274
275                 fixed (byte* bytePtr = bytes)
276                         fixed (char* charPtr = chars)
277                                 return GetCharsInternal (bytePtr + byteIndex, byteCount, charPtr + charIndex, charCount);
278 }
279
280         [CLSCompliantAttribute (false)]
281         [ComVisible (false)]
282         public unsafe override int GetChars (byte* bytes, int byteCount,
283                                                                                 char* chars, int charCount)
284         {
285                 if (bytes == null)
286                         throw new ArgumentNullException ("bytes");
287                 if (chars == null)
288                         throw new ArgumentNullException ("chars");
289                 if (charCount < 0)
290                         throw new ArgumentOutOfRangeException ("charCount");
291                 if (byteCount < 0)
292                         throw new ArgumentOutOfRangeException ("byteCount");
293
294                 return GetCharsInternal (bytes, byteCount, chars, charCount);
295         }
296
297         // Decode a buffer of bytes into a string.
298         [ComVisible (false)]
299         public unsafe override String GetString (byte [] bytes, int index, int count)
300         {
301                 if (bytes == null)
302                         throw new ArgumentNullException ("bytes");
303                 if (index < 0 || index > bytes.Length)
304                         throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
305                 if (count < 0 || count > (bytes.Length - index))
306                         throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
307
308                 if (count == 0)
309                         return string.Empty;
310
311                 // GetCharCountInternal
312                 int charCount = count / 2;
313                 string s = string.InternalAllocateStr (charCount);
314
315                 fixed (byte* bytePtr = bytes)
316                         fixed (char* charPtr = s)
317                                 GetCharsInternal (bytePtr + index, count, charPtr, charCount);
318
319                 return s;
320         }
321
322         private unsafe int GetCharsInternal (byte* bytes, int byteCount,
323                                                                                 char* chars, int charCount)
324         {
325                 int count = byteCount / 2;
326
327                 // Validate that we have sufficient space in "chars".
328                 if (charCount < count)
329                         throw new ArgumentException (_("Arg_InsufficientSpace"));
330
331                 CopyChars (bytes, (byte*) chars, byteCount, bigEndian);
332                 return count;
333         }
334
335         [ComVisible (false)]
336         public override Encoder GetEncoder ()
337         {
338                 return(base.GetEncoder ());
339         }
340         
341         // Get the maximum number of bytes needed to encode a
342         // specified number of characters.
343         public override int GetMaxByteCount (int charCount)
344         {
345                 if (charCount < 0) {
346                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
347                 }
348                 return charCount * 2;
349         }
350
351         // Get the maximum number of characters needed to decode a
352         // specified number of bytes.
353         public override int GetMaxCharCount (int byteCount)
354         {
355                 if (byteCount < 0) {
356                         throw new ArgumentOutOfRangeException
357                                 ("byteCount", _("ArgRange_NonNegative"));
358                 }
359                 return byteCount / 2;
360         }
361
362         // Get a Unicode-specific decoder that is attached to this instance.
363         public override Decoder GetDecoder ()
364         {
365                 return new UnicodeDecoder (bigEndian);
366         }
367
368         // Get the Unicode preamble.
369         public override byte[] GetPreamble ()
370         {
371                 if (byteOrderMark) {
372                         byte[] preamble = new byte[2];
373                         if (bigEndian) {
374                                 preamble[0] = (byte)0xFE;
375                                 preamble[1] = (byte)0xFF;
376                         } else {
377                                 preamble[0] = (byte)0xFF;
378                                 preamble[1] = (byte)0xFE;
379                         }
380                         return preamble;
381                 }
382                 
383                 return empty;
384         }
385
386         // Determine if this object is equal to another.
387         public override bool Equals (Object value)
388         {
389                 UnicodeEncoding enc = (value as UnicodeEncoding);
390                 if (enc != null) {
391                         return (codePage == enc.codePage &&
392                                         bigEndian == enc.bigEndian &&
393                                         byteOrderMark == enc.byteOrderMark);
394                 } else {
395                         return false;
396                 }
397         }
398
399         // Get the hash code for this object.
400         public override int GetHashCode ()
401         {
402                 return base.GetHashCode ();
403         }
404
405         private unsafe static void CopyChars (byte* src, byte* dest, int count, bool bigEndian)
406         {
407                 if (BitConverter.IsLittleEndian != bigEndian) {
408                         string.memcpy (dest, src, count & unchecked ((int) 0xFFFFFFFE));
409                         return;
410                 }
411
412                 switch (count) {
413                 case 0:
414                         return;
415                 case 1:
416                         return;
417                 case 2:
418                         goto Count2;
419                 case 3:
420                         goto Count2;
421                 case 4:
422                         goto Count4;
423                 case 5:
424                         goto Count4;
425                 case 6:
426                         goto Count4;
427                 case 7:
428                         goto Count4;
429                 case 8:
430                         goto Count8;
431                 case 9:
432                         goto Count8;
433                 case 10:
434                         goto Count8;
435                 case 11:
436                         goto Count8;
437                 case 12:
438                         goto Count8;
439                 case 13:
440                         goto Count8;
441                 case 14:
442                         goto Count8;
443                 case 15:
444                         goto Count8;
445                 }
446
447                 do {
448                         dest [0] = src [1];
449                         dest [1] = src [0];
450                         dest [2] = src [3];
451                         dest [3] = src [2];
452                         dest [4] = src [5];
453                         dest [5] = src [4];
454                         dest [6] = src [7];
455                         dest [7] = src [6];
456                         dest [8] = src [9];
457                         dest [9] = src [8];
458                         dest [10] = src [11];
459                         dest [11] = src [10];
460                         dest [12] = src [13];
461                         dest [13] = src [12];
462                         dest [14] = src [15];
463                         dest [15] = src [14];
464                         dest += 16;
465                         src += 16;
466                         count -= 16;
467                 } while ((count & unchecked ((int) 0xFFFFFFF0)) != 0);
468
469                 switch (count) {
470                 case 0:
471                         return;
472                 case 1:
473                         return;
474                 case 2:
475                         goto Count2;
476                 case 3:
477                         goto Count2;
478                 case 4:
479                         goto Count4;
480                 case 5:
481                         goto Count4;
482                 case 6:
483                         goto Count4;
484                 case 7:
485                         goto Count4;
486                 }
487
488                 Count8:;
489                 dest [0] = src [1];
490                 dest [1] = src [0];
491                 dest [2] = src [3];
492                 dest [3] = src [2];
493                 dest [4] = src [5];
494                 dest [5] = src [4];
495                 dest [6] = src [7];
496                 dest [7] = src [6];
497                 dest += 8;
498                 src += 8;
499
500                 if ((count & 4) == 0)
501                         goto TestCount2;
502                 Count4:;
503                 dest [0] = src [1];
504                 dest [1] = src [0];
505                 dest [2] = src [3];
506                 dest [3] = src [2];
507                 dest += 4;
508                 src += 4;
509
510                 TestCount2:;
511                 if ((count & 2) == 0)
512                         return;
513                 Count2:;
514                 dest [0] = src [1];
515                 dest [1] = src [0];
516         }
517
518         // Unicode decoder implementation.
519         private sealed class UnicodeDecoder : Decoder
520         {
521                 private bool bigEndian;
522                 private int leftOverByte;
523
524                 // Constructor.
525                 public UnicodeDecoder (bool bigEndian)
526                 {
527                         this.bigEndian = bigEndian;
528                         leftOverByte = -1;
529                 }
530
531                 // Override inherited methods.
532                 public override int GetCharCount (byte[] bytes, int index, int count)
533                 {
534                         if (bytes == null) {
535                                 throw new ArgumentNullException ("bytes");
536                         }
537                         if (index < 0 || index > bytes.Length) {
538                                 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
539                         }
540                         if (count < 0 || count > (bytes.Length - index)) {
541                                 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
542                         }
543                         if (leftOverByte != -1) {
544                                 return (count + 1) / 2;
545                         } else {
546                                 return count / 2;
547                         }
548                 }
549                 
550                 public unsafe override int GetChars (byte [] bytes, int byteIndex,
551                                                                                         int byteCount, char [] chars,
552                                                                                         int charIndex)
553                 {
554                         if (bytes == null) {
555                                 throw new ArgumentNullException ("bytes");
556                         }
557                         if (chars == null) {
558                                 throw new ArgumentNullException ("chars");
559                         }
560                         if (byteIndex < 0 || byteIndex > bytes.Length) {
561                                 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
562                         }
563                         if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
564                                 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
565                         }
566                         if (charIndex < 0 || charIndex > chars.Length) {
567                                 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
568                         }
569
570                         if (byteCount == 0)
571                                 return 0;
572
573                         int leftOver = leftOverByte;
574                         int count;
575
576                         if (leftOver != -1)
577                                 count = (byteCount + 1) / 2;
578                         else
579                                 count = byteCount / 2;
580
581                         if (chars.Length - charIndex < count)
582                                 throw new ArgumentException (_("Arg_InsufficientSpace"));
583
584                         if (leftOver != -1) {
585                                 if (bigEndian)
586                                         chars [charIndex] = unchecked ((char) ((leftOver << 8) | (int) bytes [byteIndex]));
587                                 else
588                                         chars [charIndex] = unchecked ((char) (((int) bytes [byteIndex] << 8) | leftOver));
589                                 charIndex++;
590                                 byteIndex++;
591                                 byteCount--;
592                         }
593
594                         if ((byteCount & unchecked ((int) 0xFFFFFFFE)) != 0)
595                                 fixed (byte* bytePtr = bytes)
596                                         fixed (char* charPtr = chars)
597                                                 CopyChars (bytePtr + byteIndex, (byte*) (charPtr + charIndex), byteCount, bigEndian);
598
599                         if ((byteCount & 1) == 0)
600                                 leftOverByte = -1;
601                         else
602                                 leftOverByte = bytes [byteCount + byteIndex - 1];
603
604                         return count;
605                 }
606
607         } // class UnicodeDecoder
608
609 }; // class UnicodeEncoding
610
611 }; // namespace System.Text