New test.
[mono.git] / mcs / class / corlib / System.Text / UnicodeEncoding.cs
1 /*
2  * UnicodeEncoding.cs - Implementation of the
3  *              "System.Text.UnicodeEncoding" class.
4  *
5  * Copyright (c) 2001, 2002  Southern Storm Software, Pty Ltd
6  * Copyright (C) 2003, 2004 Novell, Inc.
7  * Copyright (C) 2006 Kornél Pál <http://www.kornelpal.hu/>
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining
10  * a copy of this software and associated documentation files (the "Software"),
11  * to deal in the Software without restriction, including without limitation
12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13  * and/or sell copies of the Software, and to permit persons to whom the
14  * Software is furnished to do so, subject to the following conditions:
15  *
16  * The above copyright notice and this permission notice shall be included
17  * in all copies or substantial portions of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
23  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
24  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25  * OTHER DEALINGS IN THE SOFTWARE.
26  */
27
28 namespace System.Text
29 {
30
31 using System;
32 using System.Runtime.InteropServices;
33
34 [Serializable]
35 #if NET_2_0
36 [ComVisible (true)]
37 #endif
38 [MonoTODO ("Serialization format not compatible with .NET")]
39 public class UnicodeEncoding : Encoding
40 {
41         // Magic numbers used by Windows for Unicode.
42         internal const int UNICODE_CODE_PAGE     = 1200;
43         internal const int BIG_UNICODE_CODE_PAGE = 1201;
44
45 #if !ECMA_COMPAT
46         // Size of characters in this encoding.
47         public const int CharSize = 2;
48 #endif
49
50         // Internal state.
51         private bool bigEndian;
52         private bool byteOrderMark;
53
54         // Constructors.
55         public UnicodeEncoding () : this (false, true)
56         {
57                 bigEndian = false;
58                 byteOrderMark = true;
59         }
60         public UnicodeEncoding (bool bigEndian, bool byteOrderMark)
61                 : base ((bigEndian ? BIG_UNICODE_CODE_PAGE : UNICODE_CODE_PAGE))
62         {
63                 this.bigEndian = bigEndian;
64                 this.byteOrderMark = byteOrderMark;
65
66                 if (bigEndian){
67                         body_name = "unicodeFFFE";
68                         encoding_name = "Unicode (Big-Endian)";
69                         header_name = "unicodeFFFE";
70                         is_browser_save = false;
71                         web_name = "unicodeFFFE";
72                 } else {
73                         body_name = "utf-16";
74                         encoding_name = "Unicode";
75                         header_name = "utf-16";
76                         is_browser_save = true;
77                         web_name = "utf-16";
78                 }
79                 
80                 // Windows reports the same code page number for
81                 // both the little-endian and big-endian forms.
82                 windows_code_page = UNICODE_CODE_PAGE;
83         }
84
85 #if NET_2_0
86         [MonoTODO ("Implement throwOnInvalidBytes")]
87         public UnicodeEncoding (bool bigEndian, bool byteOrderMark, bool throwOnInvalidBytes)
88                 : this (bigEndian, byteOrderMark)
89         {
90         }
91 #endif
92
93         // Get the number of bytes needed to encode a character buffer.
94         public override int GetByteCount (char[] chars, int index, int count)
95         {
96                 if (chars == null) {
97                         throw new ArgumentNullException ("chars");
98                 }
99                 if (index < 0 || index > chars.Length) {
100                         throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
101                 }
102                 if (count < 0 || count > (chars.Length - index)) {
103                         throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
104                 }
105                 return count * 2;
106         }
107
108         public override int GetByteCount (String s)
109         {
110                 if (s == null) {
111                         throw new ArgumentNullException ("s");
112                 }
113                 return s.Length * 2;
114         }
115
116 #if NET_2_0
117         [CLSCompliantAttribute (false)]
118         [ComVisible (false)]
119         public unsafe override int GetByteCount (char* chars, int count)
120         {
121                 if (chars == null)
122                         throw new ArgumentNullException ("chars");
123                 if (count < 0)
124                         throw new ArgumentOutOfRangeException ("count");
125
126                 return count * 2;
127         }
128 #endif
129
130         // Get the bytes that result from encoding a character buffer.
131         public unsafe override int GetBytes (char [] chars, int charIndex, int charCount,
132                                                                                 byte [] bytes, int byteIndex)
133         {
134                 if (chars == null) {
135                         throw new ArgumentNullException ("chars");
136                 }
137                 if (bytes == null) {
138                         throw new ArgumentNullException ("bytes");
139                 }
140                 if (charIndex < 0 || charIndex > chars.Length) {
141                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
142                 }
143                 if (charCount < 0 || charCount > (chars.Length - charIndex)) {
144                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
145                 }
146                 if (byteIndex < 0 || byteIndex > bytes.Length) {
147                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
148                 }
149
150                 if (charCount == 0)
151                         return 0;
152
153                 int byteCount = bytes.Length - byteIndex;
154                 if (bytes.Length == 0)
155                         bytes = new byte [1];
156
157                 fixed (char* charPtr = chars)
158                         fixed (byte* bytePtr = bytes)
159                                 return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
160         }
161
162 #if !NET_2_0
163         public override byte [] GetBytes (String s)
164         {
165                 if (s == null)
166                         throw new ArgumentNullException ("s");
167
168                 int byteCount = GetByteCount (s);
169                 byte [] bytes = new byte [byteCount];
170
171                 GetBytes (s, 0, s.Length, bytes, 0);
172
173                 return bytes;
174         }
175 #endif
176
177         public unsafe override int GetBytes (String s, int charIndex, int charCount,
178                                                                                 byte [] bytes, int byteIndex)
179         {
180                 if (s == null) {
181                         throw new ArgumentNullException ("s");
182                 }
183                 if (bytes == null) {
184                         throw new ArgumentNullException ("bytes");
185                 }
186                 if (charIndex < 0 || charIndex > s.Length) {
187                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
188                 }
189                 if (charCount < 0 || charCount > (s.Length - charIndex)) {
190                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
191                 }
192                 if (byteIndex < 0 || byteIndex > bytes.Length) {
193                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
194                 }
195
196                 // For consistency
197                 if (charCount == 0)
198                         return 0;
199
200                 int byteCount = bytes.Length - byteIndex;
201                 if (bytes.Length == 0)
202                         bytes = new byte [1];
203
204                 fixed (char* charPtr = s)
205                         fixed (byte* bytePtr = bytes)
206                                 return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
207         }
208
209 #if NET_2_0
210         [CLSCompliantAttribute (false)]
211         [ComVisible (false)]
212         public unsafe override int GetBytes (char* chars, int charCount,
213                                                                                 byte* bytes, int byteCount)
214         {
215                 if (bytes == null)
216                         throw new ArgumentNullException ("bytes");
217                 if (chars == null)
218                         throw new ArgumentNullException ("chars");
219                 if (charCount < 0)
220                         throw new ArgumentOutOfRangeException ("charCount");
221                 if (byteCount < 0)
222                         throw new ArgumentOutOfRangeException ("byteCount");
223
224                 return GetBytesInternal (chars, charCount, bytes, byteCount);
225         }
226 #endif
227
228         private unsafe int GetBytesInternal (char* chars, int charCount,
229                                                                                 byte* bytes, int byteCount)
230         {
231                 int count = charCount * 2;
232
233                 if (byteCount < count)
234                         throw new ArgumentException (_("Arg_InsufficientSpace"));
235
236                 CopyChars ((byte*) chars, bytes, count, bigEndian);
237                 return count;
238         }
239
240         // Get the number of characters needed to decode a byte buffer.
241         public override int GetCharCount (byte[] bytes, int index, int count)
242         {
243                 if (bytes == null) {
244                         throw new ArgumentNullException ("bytes");
245                 }
246                 if (index < 0 || index > bytes.Length) {
247                         throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
248                 }
249                 if (count < 0 || count > (bytes.Length - index)) {
250                         throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
251                 }
252                 return count / 2;
253         }
254
255 #if NET_2_0
256         [CLSCompliantAttribute (false)]
257         [ComVisible (false)]
258         public unsafe override int GetCharCount (byte* bytes, int count)
259         {
260                 if (bytes == null)
261                         throw new ArgumentNullException ("bytes");
262                 if (count < 0)
263                         throw new ArgumentOutOfRangeException ("count");
264
265                 return count / 2;
266         }
267 #endif
268
269         // Get the characters that result from decoding a byte buffer.
270         public unsafe override int GetChars (byte [] bytes, int byteIndex, int byteCount,
271                                                                                 char [] chars, int charIndex)
272         {
273                 if (bytes == null) {
274                         throw new ArgumentNullException ("bytes");
275                 }
276                 if (chars == null) {
277                         throw new ArgumentNullException ("chars");
278                 }
279                 if (byteIndex < 0 || byteIndex > bytes.Length) {
280                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
281                 }
282                 if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
283                         throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
284                 }
285                 if (charIndex < 0 || charIndex > chars.Length) {
286                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
287                 }
288
289                 if (byteCount == 0)
290                         return 0;
291
292                 int charCount = chars.Length - charIndex;
293                 if (chars.Length == 0)
294                         chars = new char [1];
295
296                 fixed (byte* bytePtr = bytes)
297                         fixed (char* charPtr = chars)
298                                 return GetCharsInternal (bytePtr + byteIndex, byteCount, charPtr + charIndex, charCount);
299 }
300
301 #if NET_2_0
302         [CLSCompliantAttribute (false)]
303         [ComVisible (false)]
304         public unsafe override int GetChars (byte* bytes, int byteCount,
305                                                                                 char* chars, int charCount)
306         {
307                 if (bytes == null)
308                         throw new ArgumentNullException ("bytes");
309                 if (chars == null)
310                         throw new ArgumentNullException ("chars");
311                 if (charCount < 0)
312                         throw new ArgumentOutOfRangeException ("charCount");
313                 if (byteCount < 0)
314                         throw new ArgumentOutOfRangeException ("byteCount");
315
316                 return GetCharsInternal (bytes, byteCount, chars, charCount);
317         }
318 #endif
319
320         // Decode a buffer of bytes into a string.
321         [ComVisible (false)]
322         public unsafe override String GetString (byte [] bytes, int index, int count)
323         {
324                 if (bytes == null)
325                         throw new ArgumentNullException ("bytes");
326                 if (index < 0 || index > bytes.Length)
327                         throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
328                 if (count < 0 || count > (bytes.Length - index))
329                         throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
330
331                 if (count == 0)
332                         return string.Empty;
333
334                 // GetCharCountInternal
335                 int charCount = count / 2;
336                 string s = string.InternalAllocateStr (charCount);
337
338                 fixed (byte* bytePtr = bytes)
339                         fixed (char* charPtr = s)
340                                 GetCharsInternal (bytePtr + index, count, charPtr, charCount);
341
342                 return s;
343         }
344
345         private unsafe int GetCharsInternal (byte* bytes, int byteCount,
346                                                                                 char* chars, int charCount)
347         {
348                 int count = byteCount / 2;
349
350                 // Validate that we have sufficient space in "chars".
351                 if (charCount < count)
352                         throw new ArgumentException (_("Arg_InsufficientSpace"));
353
354                 CopyChars (bytes, (byte*) chars, byteCount, bigEndian);
355                 return count;
356         }
357
358         // Get the maximum number of bytes needed to encode a
359         // specified number of characters.
360         public override int GetMaxByteCount (int charCount)
361         {
362                 if (charCount < 0) {
363                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
364                 }
365                 return charCount * 2;
366         }
367
368         // Get the maximum number of characters needed to decode a
369         // specified number of bytes.
370         public override int GetMaxCharCount (int byteCount)
371         {
372                 if (byteCount < 0) {
373                         throw new ArgumentOutOfRangeException
374                                 ("byteCount", _("ArgRange_NonNegative"));
375                 }
376                 return byteCount / 2;
377         }
378
379         // Get a Unicode-specific decoder that is attached to this instance.
380         public override Decoder GetDecoder ()
381         {
382                 return new UnicodeDecoder (bigEndian);
383         }
384
385         // Get the Unicode preamble.
386         public override byte[] GetPreamble ()
387         {
388                 if (byteOrderMark) {
389                         byte[] preamble = new byte[2];
390                         if (bigEndian) {
391                                 preamble[0] = (byte)0xFE;
392                                 preamble[1] = (byte)0xFF;
393                         } else {
394                                 preamble[0] = (byte)0xFF;
395                                 preamble[1] = (byte)0xFE;
396                         }
397                         return preamble;
398                 } else {
399                         return new byte [0];
400                 }
401         }
402
403         // Determine if this object is equal to another.
404         public override bool Equals (Object value)
405         {
406                 UnicodeEncoding enc = (value as UnicodeEncoding);
407                 if (enc != null) {
408                         return (codePage == enc.codePage &&
409                                         bigEndian == enc.bigEndian &&
410                                         byteOrderMark == enc.byteOrderMark);
411                 } else {
412                         return false;
413                 }
414         }
415
416         // Get the hash code for this object.
417         public override int GetHashCode ()
418         {
419                 return base.GetHashCode ();
420         }
421
422         private unsafe static void CopyChars (byte* src, byte* dest, int count, bool bigEndian)
423         {
424                 if (BitConverter.IsLittleEndian != bigEndian) {
425                         string.memcpy (dest, src, count & unchecked ((int) 0xFFFFFFFE));
426                         return;
427                 }
428
429                 switch (count) {
430                 case 0:
431                         return;
432                 case 1:
433                         return;
434                 case 2:
435                         goto Count2;
436                 case 3:
437                         goto Count2;
438                 case 4:
439                         goto Count4;
440                 case 5:
441                         goto Count4;
442                 case 6:
443                         goto Count4;
444                 case 7:
445                         goto Count4;
446                 case 8:
447                         goto Count8;
448                 case 9:
449                         goto Count8;
450                 case 10:
451                         goto Count8;
452                 case 11:
453                         goto Count8;
454                 case 12:
455                         goto Count8;
456                 case 13:
457                         goto Count8;
458                 case 14:
459                         goto Count8;
460                 case 15:
461                         goto Count8;
462                 }
463
464                 do {
465                         dest [0] = src [1];
466                         dest [1] = src [0];
467                         dest [2] = src [3];
468                         dest [3] = src [2];
469                         dest [4] = src [5];
470                         dest [5] = src [4];
471                         dest [6] = src [7];
472                         dest [7] = src [6];
473                         dest [8] = src [9];
474                         dest [9] = src [8];
475                         dest [10] = src [11];
476                         dest [11] = src [10];
477                         dest [12] = src [13];
478                         dest [13] = src [12];
479                         dest [14] = src [15];
480                         dest [15] = src [14];
481                         dest += 16;
482                         src += 16;
483                         count -= 16;
484                 } while ((count & unchecked ((int) 0xFFFFFFF0)) != 0);
485
486                 switch (count) {
487                 case 0:
488                         return;
489                 case 1:
490                         return;
491                 case 2:
492                         goto Count2;
493                 case 3:
494                         goto Count2;
495                 case 4:
496                         goto Count4;
497                 case 5:
498                         goto Count4;
499                 case 6:
500                         goto Count4;
501                 case 7:
502                         goto Count4;
503                 }
504
505                 Count8:;
506                 dest [0] = src [1];
507                 dest [1] = src [0];
508                 dest [2] = src [3];
509                 dest [3] = src [2];
510                 dest [4] = src [5];
511                 dest [5] = src [4];
512                 dest [6] = src [7];
513                 dest [7] = src [6];
514                 dest += 8;
515                 src += 8;
516
517                 if ((count & 4) == 0)
518                         goto TestCount2;
519                 Count4:;
520                 dest [0] = src [1];
521                 dest [1] = src [0];
522                 dest [2] = src [3];
523                 dest [3] = src [2];
524                 dest += 4;
525                 src += 4;
526
527                 TestCount2:;
528                 if ((count & 2) == 0)
529                         return;
530                 Count2:;
531                 dest [0] = src [1];
532                 dest [1] = src [0];
533         }
534
535         // Unicode decoder implementation.
536         private sealed class UnicodeDecoder : Decoder
537         {
538                 private bool bigEndian;
539                 private int leftOverByte;
540
541                 // Constructor.
542                 public UnicodeDecoder (bool bigEndian)
543                 {
544                         this.bigEndian = bigEndian;
545                         leftOverByte = -1;
546                 }
547
548                 // Override inherited methods.
549                 public override int GetCharCount (byte[] bytes, int index, int count)
550                 {
551                         if (bytes == null) {
552                                 throw new ArgumentNullException ("bytes");
553                         }
554                         if (index < 0 || index > bytes.Length) {
555                                 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
556                         }
557                         if (count < 0 || count > (bytes.Length - index)) {
558                                 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
559                         }
560                         if (leftOverByte != -1) {
561                                 return (count + 1) / 2;
562                         } else {
563                                 return count / 2;
564                         }
565                 }
566                 
567                 public unsafe override int GetChars (byte [] bytes, int byteIndex,
568                                                                                         int byteCount, char [] chars,
569                                                                                         int charIndex)
570                 {
571                         if (bytes == null) {
572                                 throw new ArgumentNullException ("bytes");
573                         }
574                         if (chars == null) {
575                                 throw new ArgumentNullException ("chars");
576                         }
577                         if (byteIndex < 0 || byteIndex > bytes.Length) {
578                                 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
579                         }
580                         if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
581                                 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
582                         }
583                         if (charIndex < 0 || charIndex > chars.Length) {
584                                 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
585                         }
586
587                         if (byteCount == 0)
588                                 return 0;
589
590                         int leftOver = leftOverByte;
591                         int count;
592
593                         if (leftOver != -1)
594                                 count = (byteCount + 1) / 2;
595                         else
596                                 count = byteCount / 2;
597
598                         if (chars.Length - charIndex < count)
599                                 throw new ArgumentException (_("Arg_InsufficientSpace"));
600
601                         if (leftOver != -1) {
602                                 if (bigEndian)
603                                         chars [charIndex] = unchecked ((char) ((leftOver << 8) | (int) bytes [byteIndex]));
604                                 else
605                                         chars [charIndex] = unchecked ((char) (((int) bytes [byteIndex] << 8) | leftOver));
606                                 charIndex++;
607                                 byteIndex++;
608                                 byteCount--;
609                         }
610
611                         if ((byteCount & unchecked ((int) 0xFFFFFFFE)) != 0)
612                                 fixed (byte* bytePtr = bytes)
613                                         fixed (char* charPtr = chars)
614                                                 CopyChars (bytePtr + byteIndex, (byte*) (charPtr + charIndex), byteCount, bigEndian);
615
616                         if ((byteCount & 1) == 0)
617                                 leftOverByte = -1;
618                         else
619                                 leftOverByte = bytes [byteCount + byteIndex - 1];
620
621                         return count;
622                 }
623
624         } // class UnicodeDecoder
625
626 }; // class UnicodeEncoding
627
628 }; // namespace System.Text