80373bb3618253e8ed5c5c6664232a458331894a
[mono.git] / mcs / class / corlib / System.Text / UnicodeEncoding.cs
1 /*
2  * UnicodeEncoding.cs - Implementation of the
3  *              "System.Text.UnicodeEncoding" class.
4  *
5  * Copyright (c) 2001, 2002  Southern Storm Software, Pty Ltd
6  * Copyright (C) 2003, 2004 Novell, Inc.
7  * Copyright (C) 2006 Kornél Pál <http://www.kornelpal.hu/>
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining
10  * a copy of this software and associated documentation files (the "Software"),
11  * to deal in the Software without restriction, including without limitation
12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13  * and/or sell copies of the Software, and to permit persons to whom the
14  * Software is furnished to do so, subject to the following conditions:
15  *
16  * The above copyright notice and this permission notice shall be included
17  * in all copies or substantial portions of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
23  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
24  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25  * OTHER DEALINGS IN THE SOFTWARE.
26  */
27
28 namespace System.Text
29 {
30
31 using System;
32
33 [Serializable]
34 [MonoTODO ("Fix serialization compatibility with MS.NET")]
35 public class UnicodeEncoding : Encoding
36 {
37         // Magic numbers used by Windows for Unicode.
38         internal const int UNICODE_CODE_PAGE     = 1200;
39         internal const int BIG_UNICODE_CODE_PAGE = 1201;
40
41 #if !ECMA_COMPAT
42         // Size of characters in this encoding.
43         public const int CharSize = 2;
44 #endif
45
46         // Internal state.
47         private bool bigEndian;
48         private bool byteOrderMark;
49
50         // Constructors.
51         public UnicodeEncoding () : this (false, true)
52         {
53                 bigEndian = false;
54                 byteOrderMark = true;
55         }
56         public UnicodeEncoding (bool bigEndian, bool byteOrderMark)
57                 : base ((bigEndian ? BIG_UNICODE_CODE_PAGE : UNICODE_CODE_PAGE))
58         {
59                 this.bigEndian = bigEndian;
60                 this.byteOrderMark = byteOrderMark;
61
62                 if (bigEndian){
63                         body_name = "unicodeFFFE";
64                         encoding_name = "Unicode (Big-Endian)";
65                         header_name = "unicodeFFFE";
66                         is_browser_save = false;
67                         web_name = "unicodeFFFE";
68                 } else {
69                         body_name = "utf-16";
70                         encoding_name = "Unicode";
71                         header_name = "utf-16";
72                         is_browser_save = true;
73                         web_name = "utf-16";
74                 }
75                 
76                 // Windows reports the same code page number for
77                 // both the little-endian and big-endian forms.
78                 windows_code_page = UNICODE_CODE_PAGE;
79         }
80
81         // Get the number of bytes needed to encode a character buffer.
82         public override int GetByteCount (char[] chars, int index, int count)
83         {
84                 if (chars == null) {
85                         throw new ArgumentNullException ("chars");
86                 }
87                 if (index < 0 || index > chars.Length) {
88                         throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
89                 }
90                 if (count < 0 || count > (chars.Length - index)) {
91                         throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
92                 }
93                 return count * 2;
94         }
95
96         public override int GetByteCount (String s)
97         {
98                 if (s == null) {
99                         throw new ArgumentNullException ("s");
100                 }
101                 return s.Length * 2;
102         }
103
104 #if NET_2_0
105         [CLSCompliantAttribute (false)]
106         public unsafe override int GetByteCount (char* chars, int count)
107         {
108                 if (chars == null)
109                         throw new ArgumentNullException ("chars");
110                 if (count < 0)
111                         throw new ArgumentOutOfRangeException ("count");
112
113                 return count * 2;
114         }
115 #endif
116
117         // Get the bytes that result from encoding a character buffer.
118         public unsafe override int GetBytes (char [] chars, int charIndex, int charCount,
119                                                                                 byte [] bytes, int byteIndex)
120         {
121                 if (chars == null) {
122                         throw new ArgumentNullException ("chars");
123                 }
124                 if (bytes == null) {
125                         throw new ArgumentNullException ("bytes");
126                 }
127                 if (charIndex < 0 || charIndex > chars.Length) {
128                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
129                 }
130                 if (charCount < 0 || charCount > (chars.Length - charIndex)) {
131                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
132                 }
133                 if (byteIndex < 0 || byteIndex > bytes.Length) {
134                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
135                 }
136
137                 if (charCount == 0)
138                         return 0;
139
140                 int byteCount = bytes.Length - byteIndex;
141                 if (bytes.Length == 0)
142                         bytes = new byte [1];
143
144                 fixed (char* charPtr = chars)
145                         fixed (byte* bytePtr = bytes)
146                                 return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
147         }
148
149 #if !NET_2_0
150         public unsafe override byte [] GetBytes (String s)
151         {
152                 if (s == null)
153                         throw new ArgumentNullException ("s");
154
155                 int byteCount = GetByteCount (s);
156                 byte [] bytes = new byte [byteCount];
157
158                 if (byteCount != 0)
159                         fixed (char* charPtr = s)
160                                 fixed (byte* bytePtr = bytes)
161                                         GetBytesInternal (charPtr, s.Length, bytePtr, byteCount);
162
163                 return bytes;
164         }
165 #endif
166
167         public unsafe override int GetBytes (String s, int charIndex, int charCount,
168                                                                                 byte [] bytes, int byteIndex)
169         {
170                 if (s == null) {
171                         throw new ArgumentNullException ("s");
172                 }
173                 if (bytes == null) {
174                         throw new ArgumentNullException ("bytes");
175                 }
176                 if (charIndex < 0 || charIndex > s.Length) {
177                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
178                 }
179                 if (charCount < 0 || charCount > (s.Length - charIndex)) {
180                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
181                 }
182                 if (byteIndex < 0 || byteIndex > bytes.Length) {
183                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
184                 }
185
186                 int byteCount = bytes.Length - byteIndex;
187                 if (bytes.Length == 0)
188                         bytes = new byte [1];
189
190                 fixed (char* charPtr = s)
191                         fixed (byte* bytePtr = bytes)
192                                 return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
193         }
194
195 #if NET_2_0
196         [CLSCompliantAttribute (false)]
197         public unsafe override int GetBytes (char* chars, int charCount,
198                                                                                 byte* bytes, int byteCount)
199         {
200                 if (bytes == null)
201                         throw new ArgumentNullException ("bytes");
202                 if (chars == null)
203                         throw new ArgumentNullException ("chars");
204                 if (charCount < 0)
205                         throw new ArgumentOutOfRangeException ("charCount");
206                 if (byteCount < 0)
207                         throw new ArgumentOutOfRangeException ("byteCount");
208
209                 return GetBytesInternal (chars, charCount, bytes, byteCount);
210         }
211 #endif
212
213         private unsafe int GetBytesInternal (char* chars, int charCount,
214                                                                                 byte* bytes, int byteCount)
215         {
216                 int count = charCount * 2;
217
218                 if (byteCount < count)
219                         throw new ArgumentException (_("Arg_InsufficientSpace"));
220
221                 CopyChars ((byte*) chars, bytes, count, bigEndian);
222                 return count;
223         }
224
225         // Get the number of characters needed to decode a byte buffer.
226         public override int GetCharCount (byte[] bytes, int index, int count)
227         {
228                 if (bytes == null) {
229                         throw new ArgumentNullException ("bytes");
230                 }
231                 if (index < 0 || index > bytes.Length) {
232                         throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
233                 }
234                 if (count < 0 || count > (bytes.Length - index)) {
235                         throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
236                 }
237                 return count / 2;
238         }
239
240 #if NET_2_0
241         [CLSCompliantAttribute (false)]
242         public unsafe override int GetCharCount (byte* bytes, int count)
243         {
244                 if (bytes == null)
245                         throw new ArgumentNullException ("bytes");
246                 if (count < 0)
247                         throw new ArgumentOutOfRangeException ("count");
248
249                 return count / 2;
250         }
251 #endif
252
253         // Get the characters that result from decoding a byte buffer.
254         public unsafe override int GetChars (byte [] bytes, int byteIndex, int byteCount,
255                                                                                 char [] chars, int charIndex)
256         {
257                 if (bytes == null) {
258                         throw new ArgumentNullException ("bytes");
259                 }
260                 if (chars == null) {
261                         throw new ArgumentNullException ("chars");
262                 }
263                 if (byteIndex < 0 || byteIndex > bytes.Length) {
264                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
265                 }
266                 if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
267                         throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
268                 }
269                 if (charIndex < 0 || charIndex > chars.Length) {
270                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
271                 }
272
273                 if (byteCount == 0)
274                         return 0;
275
276                 int charCount = chars.Length - charIndex;
277                 if (chars.Length == 0)
278                         chars = new char [1];
279
280                 fixed (byte* bytePtr = bytes)
281                         fixed (char* charPtr = chars)
282                                 return GetCharsInternal (bytePtr + byteIndex, byteCount, charPtr + charIndex, charCount);
283 }
284
285 #if NET_2_0
286         [CLSCompliantAttribute (false)]
287         public unsafe override int GetChars (byte* bytes, int byteCount,
288                                                                                 char* chars, int charCount)
289         {
290                 if (bytes == null)
291                         throw new ArgumentNullException ("bytes");
292                 if (chars == null)
293                         throw new ArgumentNullException ("chars");
294                 if (charCount < 0)
295                         throw new ArgumentOutOfRangeException ("charCount");
296                 if (byteCount < 0)
297                         throw new ArgumentOutOfRangeException ("byteCount");
298
299                 return GetCharsInternal (bytes, byteCount, chars, charCount);
300         }
301 #endif
302
303         private unsafe int GetCharsInternal (byte* bytes, int byteCount,
304                                                                                 char* chars, int charCount)
305         {
306                 int count = byteCount / 2;
307                 bool isBigEndian;
308
309                 // Determine the byte order in the incoming buffer.
310                 if (byteCount >= 2)
311                 {
312                         if (bytes [0] == (byte) 0xFE && bytes [1] == (byte) 0xFF)
313                                 isBigEndian = true;
314                         else if (bytes [0] == (byte) 0xFF && bytes [1] == (byte) 0xFE)
315                                 isBigEndian = false;
316                         else
317                                 isBigEndian = bigEndian;
318                 } else {
319                         isBigEndian = bigEndian;
320                 }
321
322                 // Validate that we have sufficient space in "chars".
323                 if (charCount < count)
324                         throw new ArgumentException (_("Arg_InsufficientSpace"));
325
326                 CopyChars (bytes, (byte*) chars, byteCount, isBigEndian);
327                 return count;
328         }
329
330         // Get the maximum number of bytes needed to encode a
331         // specified number of characters.
332         public override int GetMaxByteCount (int charCount)
333         {
334                 if (charCount < 0) {
335                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
336                 }
337                 return charCount * 2;
338         }
339
340         // Get the maximum number of characters needed to decode a
341         // specified number of bytes.
342         public override int GetMaxCharCount (int byteCount)
343         {
344                 if (byteCount < 0) {
345                         throw new ArgumentOutOfRangeException
346                                 ("byteCount", _("ArgRange_NonNegative"));
347                 }
348                 return byteCount / 2;
349         }
350
351         // Get a Unicode-specific decoder that is attached to this instance.
352         public override Decoder GetDecoder ()
353         {
354                 return new UnicodeDecoder (bigEndian);
355         }
356
357         // Get the Unicode preamble.
358         public override byte[] GetPreamble ()
359         {
360                 if (byteOrderMark) {
361                         byte[] preamble = new byte[2];
362                         if (bigEndian) {
363                                 preamble[0] = (byte)0xFE;
364                                 preamble[1] = (byte)0xFF;
365                         } else {
366                                 preamble[0] = (byte)0xFF;
367                                 preamble[1] = (byte)0xFE;
368                         }
369                         return preamble;
370                 } else {
371                         return new byte [0];
372                 }
373         }
374
375         // Determine if this object is equal to another.
376         public override bool Equals (Object value)
377         {
378                 UnicodeEncoding enc = (value as UnicodeEncoding);
379                 if (enc != null) {
380                         return (codePage == enc.codePage &&
381                                         bigEndian == enc.bigEndian &&
382                                         byteOrderMark == enc.byteOrderMark);
383                 } else {
384                         return false;
385                 }
386         }
387
388         // Get the hash code for this object.
389         public override int GetHashCode ()
390         {
391                 return base.GetHashCode ();
392         }
393
394         private unsafe static void CopyChars (byte* src, byte* dest, int count, bool bigEndian)
395         {
396                 if (BitConverter.IsLittleEndian != bigEndian) {
397                         string.memcpy (dest, src, count & unchecked ((int) 0xFFFFFFFE));
398                         return;
399                 }
400
401                 switch (count) {
402                 case 0:
403                         return;
404                 case 1:
405                         return;
406                 case 2:
407                         goto Count2;
408                 case 3:
409                         goto Count2;
410                 case 4:
411                         goto Count4;
412                 case 5:
413                         goto Count4;
414                 case 6:
415                         goto Count4;
416                 case 7:
417                         goto Count4;
418                 case 8:
419                         goto Count8;
420                 case 9:
421                         goto Count8;
422                 case 10:
423                         goto Count8;
424                 case 11:
425                         goto Count8;
426                 case 12:
427                         goto Count8;
428                 case 13:
429                         goto Count8;
430                 case 14:
431                         goto Count8;
432                 case 15:
433                         goto Count8;
434                 }
435
436                 do {
437                         dest [0] = src [1];
438                         dest [1] = src [0];
439                         dest [2] = src [3];
440                         dest [3] = src [2];
441                         dest [4] = src [5];
442                         dest [5] = src [4];
443                         dest [6] = src [7];
444                         dest [7] = src [6];
445                         dest [8] = src [9];
446                         dest [9] = src [8];
447                         dest [10] = src [11];
448                         dest [11] = src [10];
449                         dest [12] = src [13];
450                         dest [13] = src [12];
451                         dest [14] = src [15];
452                         dest [15] = src [14];
453                         dest += 16;
454                         src += 16;
455                         count -= 16;
456                 } while ((count & unchecked ((int) 0xFFFFFFF0)) != 0);
457
458                 switch (count) {
459                 case 0:
460                         return;
461                 case 1:
462                         return;
463                 case 2:
464                         goto Count2;
465                 case 3:
466                         goto Count2;
467                 case 4:
468                         goto Count4;
469                 case 5:
470                         goto Count4;
471                 case 6:
472                         goto Count4;
473                 case 7:
474                         goto Count4;
475                 }
476
477                 Count8:;
478                 dest [0] = src [1];
479                 dest [1] = src [0];
480                 dest [2] = src [3];
481                 dest [3] = src [2];
482                 dest [4] = src [5];
483                 dest [5] = src [4];
484                 dest [6] = src [7];
485                 dest [7] = src [6];
486                 dest += 8;
487                 src += 8;
488
489                 if ((count & 4) == 0)
490                         goto TestCount2;
491                 Count4:;
492                 dest [0] = src [1];
493                 dest [1] = src [0];
494                 dest [2] = src [3];
495                 dest [3] = src [2];
496                 dest += 4;
497                 src += 4;
498
499                 TestCount2:;
500                 if ((count & 2) == 0)
501                         return;
502                 Count2:;
503                 dest [0] = src [1];
504                 dest [1] = src [0];
505         }
506
507         // Unicode decoder implementation.
508         private sealed class UnicodeDecoder : Decoder
509         {
510                 private bool bigEndian;
511                 private int leftOverByte;
512
513                 // Constructor.
514                 public UnicodeDecoder (bool bigEndian)
515                 {
516                         this.bigEndian = bigEndian;
517                         leftOverByte = -1;
518                 }
519
520                 // Override inherited methods.
521                 public override int GetCharCount (byte[] bytes, int index, int count)
522                 {
523                         if (bytes == null) {
524                                 throw new ArgumentNullException ("bytes");
525                         }
526                         if (index < 0 || index > bytes.Length) {
527                                 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
528                         }
529                         if (count < 0 || count > (bytes.Length - index)) {
530                                 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
531                         }
532                         if (leftOverByte != -1) {
533                                 return (count + 1) / 2;
534                         } else {
535                                 return count / 2;
536                         }
537                 }
538                 
539                 public unsafe override int GetChars (byte [] bytes, int byteIndex,
540                                                                                         int byteCount, char [] chars,
541                                                                                         int charIndex)
542                 {
543                         if (bytes == null) {
544                                 throw new ArgumentNullException ("bytes");
545                         }
546                         if (chars == null) {
547                                 throw new ArgumentNullException ("chars");
548                         }
549                         if (byteIndex < 0 || byteIndex > bytes.Length) {
550                                 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
551                         }
552                         if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
553                                 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
554                         }
555                         if (charIndex < 0 || charIndex > chars.Length) {
556                                 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
557                         }
558
559                         if (byteCount == 0)
560                                 return 0;
561
562                         bool isBigEndian = bigEndian;
563                         int leftOver = leftOverByte;
564                         int count;
565
566                         if (leftOver != -1)
567                                 count = (byteCount + 1) / 2;
568                         else
569                                 count = byteCount / 2;
570
571                         if (chars.Length - charIndex < count)
572                                 throw new ArgumentException (_("Arg_InsufficientSpace"));
573
574                         if (leftOver != -1) {
575                                 if (isBigEndian)
576                                         chars [charIndex] = unchecked ((char) ((leftOver << 8) | (int) bytes [byteIndex]));
577                                 else
578                                         chars [charIndex] = unchecked ((char) (((int) bytes [byteIndex] << 8) | leftOver));
579                                 charIndex++;
580                                 byteIndex++;
581                                 byteCount--;
582                         }
583
584                         if ((byteCount & unchecked ((int) 0xFFFFFFFE)) != 0)
585                                 fixed (byte* bytePtr = bytes)
586                                         fixed (char* charPtr = chars)
587                                                 CopyChars (bytePtr + byteIndex, (byte*) (charPtr + charIndex), byteCount, isBigEndian);
588
589                         if ((byteCount & 1) == 0)
590                                 leftOverByte = -1;
591                         else
592                                 leftOverByte = bytes [byteCount + byteIndex - 1];
593
594                         return count;
595                 }
596
597         } // class UnicodeDecoder
598
599 }; // class UnicodeEncoding
600
601 }; // namespace System.Text