merge -r 60439:60440
[mono.git] / mcs / class / corlib / System.Text / UnicodeEncoding.cs
1 /*
2  * UnicodeEncoding.cs - Implementation of the
3  *              "System.Text.UnicodeEncoding" class.
4  *
5  * Copyright (c) 2001, 2002  Southern Storm Software, Pty Ltd
6  * Copyright (C) 2003, 2004 Novell, Inc.
7  * Copyright (C) 2006 Kornél Pál <http://www.kornelpal.hu/>
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining
10  * a copy of this software and associated documentation files (the "Software"),
11  * to deal in the Software without restriction, including without limitation
12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13  * and/or sell copies of the Software, and to permit persons to whom the
14  * Software is furnished to do so, subject to the following conditions:
15  *
16  * The above copyright notice and this permission notice shall be included
17  * in all copies or substantial portions of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
23  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
24  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25  * OTHER DEALINGS IN THE SOFTWARE.
26  */
27
28 namespace System.Text
29 {
30
31 using System;
32
33 [Serializable]
34 [MonoTODO ("Fix serialization compatibility with MS.NET")]
35 public class UnicodeEncoding : Encoding
36 {
37         // Magic numbers used by Windows for Unicode.
38         internal const int UNICODE_CODE_PAGE     = 1200;
39         internal const int BIG_UNICODE_CODE_PAGE = 1201;
40
41 #if !ECMA_COMPAT
42         // Size of characters in this encoding.
43         public const int CharSize = 2;
44 #endif
45
46         // Internal state.
47         private bool bigEndian;
48         private bool byteOrderMark;
49
50         // Constructors.
51         public UnicodeEncoding () : this (false, true)
52         {
53                 bigEndian = false;
54                 byteOrderMark = true;
55         }
56         public UnicodeEncoding (bool bigEndian, bool byteOrderMark)
57                 : base ((bigEndian ? BIG_UNICODE_CODE_PAGE : UNICODE_CODE_PAGE))
58         {
59                 this.bigEndian = bigEndian;
60                 this.byteOrderMark = byteOrderMark;
61
62                 if (bigEndian){
63                         body_name = "unicodeFFFE";
64                         encoding_name = "Unicode (Big-Endian)";
65                         header_name = "unicodeFFFE";
66                         is_browser_save = false;
67                         web_name = "unicodeFFFE";
68                 } else {
69                         body_name = "utf-16";
70                         encoding_name = "Unicode";
71                         header_name = "utf-16";
72                         is_browser_save = true;
73                         web_name = "utf-16";
74                 }
75                 
76                 // Windows reports the same code page number for
77                 // both the little-endian and big-endian forms.
78                 windows_code_page = UNICODE_CODE_PAGE;
79         }
80
81         // Get the number of bytes needed to encode a character buffer.
82         public override int GetByteCount (char[] chars, int index, int count)
83         {
84                 if (chars == null) {
85                         throw new ArgumentNullException ("chars");
86                 }
87                 if (index < 0 || index > chars.Length) {
88                         throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
89                 }
90                 if (count < 0 || count > (chars.Length - index)) {
91                         throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
92                 }
93                 return count * 2;
94         }
95
96         public override int GetByteCount (String s)
97         {
98                 if (s == null) {
99                         throw new ArgumentNullException ("s");
100                 }
101                 return s.Length * 2;
102         }
103
104 #if NET_2_0
105         [CLSCompliantAttribute (false)]
106         public unsafe override int GetByteCount (char* chars, int count)
107         {
108                 if (chars == null)
109                         throw new ArgumentNullException ("chars");
110                 if (count < 0)
111                         throw new ArgumentOutOfRangeException ("count");
112
113                 return count * 2;
114         }
115 #endif
116
117         // Get the bytes that result from encoding a character buffer.
118         public unsafe override int GetBytes (char [] chars, int charIndex, int charCount,
119                                                                                 byte [] bytes, int byteIndex)
120         {
121                 if (chars == null) {
122                         throw new ArgumentNullException ("chars");
123                 }
124                 if (bytes == null) {
125                         throw new ArgumentNullException ("bytes");
126                 }
127                 if (charIndex < 0 || charIndex > chars.Length) {
128                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
129                 }
130                 if (charCount < 0 || charCount > (chars.Length - charIndex)) {
131                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
132                 }
133                 if (byteIndex < 0 || byteIndex > bytes.Length) {
134                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
135                 }
136
137                 if (charCount == 0)
138                         return 0;
139
140                 int byteCount = bytes.Length - byteIndex;
141                 if (bytes.Length == 0)
142                         bytes = new byte [1];
143
144                 fixed (char* charPtr = chars)
145                         fixed (byte* bytePtr = bytes)
146                                 return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
147         }
148
149 #if !NET_2_0
150         public override byte [] GetBytes (String s)
151         {
152                 if (s == null)
153                         throw new ArgumentNullException ("s");
154
155                 int byteCount = GetByteCount (s);
156                 byte [] bytes = new byte [byteCount];
157
158                 GetBytes (s, 0, s.Length, bytes, 0);
159
160                 return bytes;
161         }
162 #endif
163
164         public unsafe override int GetBytes (String s, int charIndex, int charCount,
165                                                                                 byte [] bytes, int byteIndex)
166         {
167                 if (s == null) {
168                         throw new ArgumentNullException ("s");
169                 }
170                 if (bytes == null) {
171                         throw new ArgumentNullException ("bytes");
172                 }
173                 if (charIndex < 0 || charIndex > s.Length) {
174                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
175                 }
176                 if (charCount < 0 || charCount > (s.Length - charIndex)) {
177                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
178                 }
179                 if (byteIndex < 0 || byteIndex > bytes.Length) {
180                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
181                 }
182
183                 // For consistency
184                 if (charCount == 0)
185                         return 0;
186
187                 int byteCount = bytes.Length - byteIndex;
188                 if (bytes.Length == 0)
189                         bytes = new byte [1];
190
191                 fixed (char* charPtr = s)
192                         fixed (byte* bytePtr = bytes)
193                                 return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
194         }
195
196 #if NET_2_0
197         [CLSCompliantAttribute (false)]
198         public unsafe override int GetBytes (char* chars, int charCount,
199                                                                                 byte* bytes, int byteCount)
200         {
201                 if (bytes == null)
202                         throw new ArgumentNullException ("bytes");
203                 if (chars == null)
204                         throw new ArgumentNullException ("chars");
205                 if (charCount < 0)
206                         throw new ArgumentOutOfRangeException ("charCount");
207                 if (byteCount < 0)
208                         throw new ArgumentOutOfRangeException ("byteCount");
209
210                 return GetBytesInternal (chars, charCount, bytes, byteCount);
211         }
212 #endif
213
214         private unsafe int GetBytesInternal (char* chars, int charCount,
215                                                                                 byte* bytes, int byteCount)
216         {
217                 int count = charCount * 2;
218
219                 if (byteCount < count)
220                         throw new ArgumentException (_("Arg_InsufficientSpace"));
221
222                 CopyChars ((byte*) chars, bytes, count, bigEndian);
223                 return count;
224         }
225
226         // Get the number of characters needed to decode a byte buffer.
227         public override int GetCharCount (byte[] bytes, int index, int count)
228         {
229                 if (bytes == null) {
230                         throw new ArgumentNullException ("bytes");
231                 }
232                 if (index < 0 || index > bytes.Length) {
233                         throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
234                 }
235                 if (count < 0 || count > (bytes.Length - index)) {
236                         throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
237                 }
238                 return count / 2;
239         }
240
241 #if NET_2_0
242         [CLSCompliantAttribute (false)]
243         public unsafe override int GetCharCount (byte* bytes, int count)
244         {
245                 if (bytes == null)
246                         throw new ArgumentNullException ("bytes");
247                 if (count < 0)
248                         throw new ArgumentOutOfRangeException ("count");
249
250                 return count / 2;
251         }
252 #endif
253
254         // Get the characters that result from decoding a byte buffer.
255         public unsafe override int GetChars (byte [] bytes, int byteIndex, int byteCount,
256                                                                                 char [] chars, int charIndex)
257         {
258                 if (bytes == null) {
259                         throw new ArgumentNullException ("bytes");
260                 }
261                 if (chars == null) {
262                         throw new ArgumentNullException ("chars");
263                 }
264                 if (byteIndex < 0 || byteIndex > bytes.Length) {
265                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
266                 }
267                 if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
268                         throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
269                 }
270                 if (charIndex < 0 || charIndex > chars.Length) {
271                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
272                 }
273
274                 if (byteCount == 0)
275                         return 0;
276
277                 int charCount = chars.Length - charIndex;
278                 if (chars.Length == 0)
279                         chars = new char [1];
280
281                 fixed (byte* bytePtr = bytes)
282                         fixed (char* charPtr = chars)
283                                 return GetCharsInternal (bytePtr + byteIndex, byteCount, charPtr + charIndex, charCount);
284 }
285
286 #if NET_2_0
287         [CLSCompliantAttribute (false)]
288         public unsafe override int GetChars (byte* bytes, int byteCount,
289                                                                                 char* chars, int charCount)
290         {
291                 if (bytes == null)
292                         throw new ArgumentNullException ("bytes");
293                 if (chars == null)
294                         throw new ArgumentNullException ("chars");
295                 if (charCount < 0)
296                         throw new ArgumentOutOfRangeException ("charCount");
297                 if (byteCount < 0)
298                         throw new ArgumentOutOfRangeException ("byteCount");
299
300                 return GetCharsInternal (bytes, byteCount, chars, charCount);
301         }
302 #endif
303
304         // Decode a buffer of bytes into a string.
305         public unsafe override String GetString (byte [] bytes, int index, int count)
306         {
307                 if (bytes == null)
308                         throw new ArgumentNullException ("bytes");
309                 if (index < 0 || index > bytes.Length)
310                         throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
311                 if (count < 0 || count > (bytes.Length - index))
312                         throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
313
314                 if (count == 0)
315                         return string.Empty;
316
317                 // GetCharCountInternal
318                 int charCount = count / 2;
319                 string s = string.InternalAllocateStr (charCount);
320
321                 fixed (byte* bytePtr = bytes)
322                         fixed (char* charPtr = s)
323                                 GetCharsInternal (bytePtr + index, count, charPtr, charCount);
324
325                 return s;
326         }
327
328         private unsafe int GetCharsInternal (byte* bytes, int byteCount,
329                                                                                 char* chars, int charCount)
330         {
331                 int count = byteCount / 2;
332                 bool isBigEndian;
333
334                 // Determine the byte order in the incoming buffer.
335                 if (byteCount >= 2)
336                 {
337                         if (bytes [0] == (byte) 0xFE && bytes [1] == (byte) 0xFF)
338                                 isBigEndian = true;
339                         else if (bytes [0] == (byte) 0xFF && bytes [1] == (byte) 0xFE)
340                                 isBigEndian = false;
341                         else
342                                 isBigEndian = bigEndian;
343                 } else {
344                         isBigEndian = bigEndian;
345                 }
346
347                 // Validate that we have sufficient space in "chars".
348                 if (charCount < count)
349                         throw new ArgumentException (_("Arg_InsufficientSpace"));
350
351                 CopyChars (bytes, (byte*) chars, byteCount, isBigEndian);
352                 return count;
353         }
354
355         // Get the maximum number of bytes needed to encode a
356         // specified number of characters.
357         public override int GetMaxByteCount (int charCount)
358         {
359                 if (charCount < 0) {
360                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
361                 }
362                 return charCount * 2;
363         }
364
365         // Get the maximum number of characters needed to decode a
366         // specified number of bytes.
367         public override int GetMaxCharCount (int byteCount)
368         {
369                 if (byteCount < 0) {
370                         throw new ArgumentOutOfRangeException
371                                 ("byteCount", _("ArgRange_NonNegative"));
372                 }
373                 return byteCount / 2;
374         }
375
376         // Get a Unicode-specific decoder that is attached to this instance.
377         public override Decoder GetDecoder ()
378         {
379                 return new UnicodeDecoder (bigEndian);
380         }
381
382         // Get the Unicode preamble.
383         public override byte[] GetPreamble ()
384         {
385                 if (byteOrderMark) {
386                         byte[] preamble = new byte[2];
387                         if (bigEndian) {
388                                 preamble[0] = (byte)0xFE;
389                                 preamble[1] = (byte)0xFF;
390                         } else {
391                                 preamble[0] = (byte)0xFF;
392                                 preamble[1] = (byte)0xFE;
393                         }
394                         return preamble;
395                 } else {
396                         return new byte [0];
397                 }
398         }
399
400         // Determine if this object is equal to another.
401         public override bool Equals (Object value)
402         {
403                 UnicodeEncoding enc = (value as UnicodeEncoding);
404                 if (enc != null) {
405                         return (codePage == enc.codePage &&
406                                         bigEndian == enc.bigEndian &&
407                                         byteOrderMark == enc.byteOrderMark);
408                 } else {
409                         return false;
410                 }
411         }
412
413         // Get the hash code for this object.
414         public override int GetHashCode ()
415         {
416                 return base.GetHashCode ();
417         }
418
419         private unsafe static void CopyChars (byte* src, byte* dest, int count, bool bigEndian)
420         {
421                 if (BitConverter.IsLittleEndian != bigEndian) {
422                         string.memcpy (dest, src, count & unchecked ((int) 0xFFFFFFFE));
423                         return;
424                 }
425
426                 switch (count) {
427                 case 0:
428                         return;
429                 case 1:
430                         return;
431                 case 2:
432                         goto Count2;
433                 case 3:
434                         goto Count2;
435                 case 4:
436                         goto Count4;
437                 case 5:
438                         goto Count4;
439                 case 6:
440                         goto Count4;
441                 case 7:
442                         goto Count4;
443                 case 8:
444                         goto Count8;
445                 case 9:
446                         goto Count8;
447                 case 10:
448                         goto Count8;
449                 case 11:
450                         goto Count8;
451                 case 12:
452                         goto Count8;
453                 case 13:
454                         goto Count8;
455                 case 14:
456                         goto Count8;
457                 case 15:
458                         goto Count8;
459                 }
460
461                 do {
462                         dest [0] = src [1];
463                         dest [1] = src [0];
464                         dest [2] = src [3];
465                         dest [3] = src [2];
466                         dest [4] = src [5];
467                         dest [5] = src [4];
468                         dest [6] = src [7];
469                         dest [7] = src [6];
470                         dest [8] = src [9];
471                         dest [9] = src [8];
472                         dest [10] = src [11];
473                         dest [11] = src [10];
474                         dest [12] = src [13];
475                         dest [13] = src [12];
476                         dest [14] = src [15];
477                         dest [15] = src [14];
478                         dest += 16;
479                         src += 16;
480                         count -= 16;
481                 } while ((count & unchecked ((int) 0xFFFFFFF0)) != 0);
482
483                 switch (count) {
484                 case 0:
485                         return;
486                 case 1:
487                         return;
488                 case 2:
489                         goto Count2;
490                 case 3:
491                         goto Count2;
492                 case 4:
493                         goto Count4;
494                 case 5:
495                         goto Count4;
496                 case 6:
497                         goto Count4;
498                 case 7:
499                         goto Count4;
500                 }
501
502                 Count8:;
503                 dest [0] = src [1];
504                 dest [1] = src [0];
505                 dest [2] = src [3];
506                 dest [3] = src [2];
507                 dest [4] = src [5];
508                 dest [5] = src [4];
509                 dest [6] = src [7];
510                 dest [7] = src [6];
511                 dest += 8;
512                 src += 8;
513
514                 if ((count & 4) == 0)
515                         goto TestCount2;
516                 Count4:;
517                 dest [0] = src [1];
518                 dest [1] = src [0];
519                 dest [2] = src [3];
520                 dest [3] = src [2];
521                 dest += 4;
522                 src += 4;
523
524                 TestCount2:;
525                 if ((count & 2) == 0)
526                         return;
527                 Count2:;
528                 dest [0] = src [1];
529                 dest [1] = src [0];
530         }
531
532         // Unicode decoder implementation.
533         private sealed class UnicodeDecoder : Decoder
534         {
535                 private bool bigEndian;
536                 private int leftOverByte;
537
538                 // Constructor.
539                 public UnicodeDecoder (bool bigEndian)
540                 {
541                         this.bigEndian = bigEndian;
542                         leftOverByte = -1;
543                 }
544
545                 // Override inherited methods.
546                 public override int GetCharCount (byte[] bytes, int index, int count)
547                 {
548                         if (bytes == null) {
549                                 throw new ArgumentNullException ("bytes");
550                         }
551                         if (index < 0 || index > bytes.Length) {
552                                 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
553                         }
554                         if (count < 0 || count > (bytes.Length - index)) {
555                                 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
556                         }
557                         if (leftOverByte != -1) {
558                                 return (count + 1) / 2;
559                         } else {
560                                 return count / 2;
561                         }
562                 }
563                 
564                 public unsafe override int GetChars (byte [] bytes, int byteIndex,
565                                                                                         int byteCount, char [] chars,
566                                                                                         int charIndex)
567                 {
568                         if (bytes == null) {
569                                 throw new ArgumentNullException ("bytes");
570                         }
571                         if (chars == null) {
572                                 throw new ArgumentNullException ("chars");
573                         }
574                         if (byteIndex < 0 || byteIndex > bytes.Length) {
575                                 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
576                         }
577                         if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
578                                 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
579                         }
580                         if (charIndex < 0 || charIndex > chars.Length) {
581                                 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
582                         }
583
584                         if (byteCount == 0)
585                                 return 0;
586
587                         bool isBigEndian = bigEndian;
588                         int leftOver = leftOverByte;
589                         int count;
590
591                         if (leftOver != -1)
592                                 count = (byteCount + 1) / 2;
593                         else
594                                 count = byteCount / 2;
595
596                         if (chars.Length - charIndex < count)
597                                 throw new ArgumentException (_("Arg_InsufficientSpace"));
598
599                         if (leftOver != -1) {
600                                 if (isBigEndian)
601                                         chars [charIndex] = unchecked ((char) ((leftOver << 8) | (int) bytes [byteIndex]));
602                                 else
603                                         chars [charIndex] = unchecked ((char) (((int) bytes [byteIndex] << 8) | leftOver));
604                                 charIndex++;
605                                 byteIndex++;
606                                 byteCount--;
607                         }
608
609                         if ((byteCount & unchecked ((int) 0xFFFFFFFE)) != 0)
610                                 fixed (byte* bytePtr = bytes)
611                                         fixed (char* charPtr = chars)
612                                                 CopyChars (bytePtr + byteIndex, (byte*) (charPtr + charIndex), byteCount, isBigEndian);
613
614                         if ((byteCount & 1) == 0)
615                                 leftOverByte = -1;
616                         else
617                                 leftOverByte = bytes [byteCount + byteIndex - 1];
618
619                         return count;
620                 }
621
622         } // class UnicodeDecoder
623
624 }; // class UnicodeEncoding
625
626 }; // namespace System.Text