2005-04-12 Dick Porter <dick@ximian.com>
[mono.git] / mcs / class / corlib / System.Text / UnicodeEncoding.cs
1 /*
2  * UnicodeEncoding.cs - Implementation of the
3  *              "System.Text.UnicodeEncoding" class.
4  *
5  * Copyright (c) 2001, 2002  Southern Storm Software, Pty Ltd
6  * Copyright (C) 2003, 2004 Novell, Inc.
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining
9  * a copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice shall be included
16  * in all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
22  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24  * OTHER DEALINGS IN THE SOFTWARE.
25  */
26
27 namespace System.Text
28 {
29
30 using System;
31
32 [Serializable]
33 [MonoTODO ("Fix serialization compatibility with MS.NET")]
34 public class UnicodeEncoding : Encoding
35 {
36         // Magic numbers used by Windows for Unicode.
37         internal const int UNICODE_CODE_PAGE     = 1200;
38         internal const int BIG_UNICODE_CODE_PAGE = 1201;
39
40 #if !ECMA_COMPAT
41         // Size of characters in this encoding.
42         public const int CharSize = 2;
43 #endif
44
45         // Internal state.
46         private bool bigEndian;
47         private bool byteOrderMark;
48
49         // Constructors.
50         public UnicodeEncoding () : this (false, true)
51         {
52                 bigEndian = false;
53                 byteOrderMark = true;
54         }
55         public UnicodeEncoding (bool bigEndian, bool byteOrderMark)
56                 : base ((bigEndian ? BIG_UNICODE_CODE_PAGE : UNICODE_CODE_PAGE))
57         {
58                 this.bigEndian = bigEndian;
59                 this.byteOrderMark = byteOrderMark;
60
61                 if (bigEndian){
62                         body_name = "unicodeFFFE";
63                         encoding_name = "Unicode (Big-Endian)";
64                         header_name = "unicodeFFFE";
65                         is_browser_save = false;
66                         web_name = "unicodeFFFE";
67                 } else {
68                         body_name = "utf-16";
69                         encoding_name = "Unicode";
70                         header_name = "utf-16";
71                         is_browser_save = true;
72                         web_name = "utf-16";
73                 }
74                 
75                 // Windows reports the same code page number for
76                 // both the little-endian and big-endian forms.
77                 windows_code_page = UNICODE_CODE_PAGE;
78         }
79
80         // Get the number of bytes needed to encode a character buffer.
81         public override int GetByteCount (char[] chars, int index, int count)
82         {
83                 if (chars == null) {
84                         throw new ArgumentNullException ("chars");
85                 }
86                 if (index < 0 || index > chars.Length) {
87                         throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
88                 }
89                 if (count < 0 || count > (chars.Length - index)) {
90                         throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
91                 }
92                 return count * 2;
93         }
94
95         // Convenience wrappers for "GetByteCount".
96         public override int GetByteCount (String s)
97         {
98                 if (s == null) {
99                         throw new ArgumentNullException ("s");
100                 }
101                 return s.Length * 2;
102         }
103
104         // Get the bytes that result from encoding a character buffer.
105         public override int GetBytes (char[] chars, int charIndex, int charCount,
106                                                                  byte[] bytes, int byteIndex)
107         {
108                 if (chars == null) {
109                         throw new ArgumentNullException ("chars");
110                 }
111                 if (bytes == null) {
112                         throw new ArgumentNullException ("bytes");
113                 }
114                 if (charIndex < 0 || charIndex > chars.Length) {
115                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
116                 }
117                 if (charCount < 0 || charCount > (chars.Length - charIndex)) {
118                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
119                 }
120                 if (byteIndex < 0 || byteIndex > bytes.Length) {
121                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
122                 }
123                 if ((bytes.Length - byteIndex) < (charCount * 2)) {
124                         throw new ArgumentException (_("Arg_InsufficientSpace"));
125                 }
126                 int posn = byteIndex;
127                 char ch;
128                 if (bigEndian) {
129                         while (charCount-- > 0) {
130                                 ch = chars[charIndex++];
131                                 bytes[posn++] = (byte)(ch >> 8);
132                                 bytes[posn++] = (byte)ch;
133                         }
134                 } else {
135                         while (charCount-- > 0) {
136                                 ch = chars[charIndex++];
137                                 bytes[posn++] = (byte)ch;
138                                 bytes[posn++] = (byte)(ch >> 8);
139                         }
140                 }
141                 return posn - byteIndex;
142         }
143
144         // Convenience wrappers for "GetBytes".
145         public override int GetBytes (String s, int charIndex, int charCount,
146                                                                  byte[] bytes, int byteIndex)
147         {
148                 if (s == null) {
149                         throw new ArgumentNullException ("s");
150                 }
151                 if (bytes == null) {
152                         throw new ArgumentNullException ("bytes");
153                 }
154                 if (charIndex < 0 || charIndex > s.Length) {
155                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
156                 }
157                 if (charCount < 0 || charCount > (s.Length - charIndex)) {
158                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
159                 }
160                 if (byteIndex < 0 || byteIndex > bytes.Length) {
161                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
162                 }
163                 if ((bytes.Length - byteIndex) < (charCount * 2)) {
164                         throw new ArgumentException (_("Arg_InsufficientSpace"));
165                 }
166                 int posn = byteIndex;
167                 char ch;
168                 if (bigEndian) {
169                         while (charCount-- > 0) {
170                                 ch = s[charIndex++];
171                                 bytes[posn++] = (byte)(ch >> 8);
172                                 bytes[posn++] = (byte)ch;
173                         }
174                 } else {
175                         while (charCount-- > 0) {
176                                 ch = s[charIndex++];
177                                 bytes[posn++] = (byte)ch;
178                                 bytes[posn++] = (byte)(ch >> 8);
179                         }
180                 }
181                 return posn - byteIndex;
182         }
183
184         public override byte [] GetBytes (String s)
185         {
186                 return base.GetBytes (s);
187         }
188         
189         // Get the number of characters needed to decode a byte buffer.
190         public override int GetCharCount (byte[] bytes, int index, int count)
191         {
192                 if (bytes == null) {
193                         throw new ArgumentNullException ("bytes");
194                 }
195                 if (index < 0 || index > bytes.Length) {
196                         throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
197                 }
198                 if (count < 0 || count > (bytes.Length - index)) {
199                         throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
200                 }
201                 return count / 2;
202         }
203
204         // Get the characters that result from decoding a byte buffer.
205         public override int GetChars (byte[] bytes, int byteIndex, int byteCount,
206                                                                  char[] chars, int charIndex)
207         {
208                 if (bytes == null) {
209                         throw new ArgumentNullException ("bytes");
210                 }
211                 if (chars == null) {
212                         throw new ArgumentNullException ("chars");
213                 }
214                 if (byteIndex < 0 || byteIndex > bytes.Length) {
215                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
216                 }
217                 if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
218                         throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
219                 }
220                 if (charIndex < 0 || charIndex > chars.Length) {
221                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
222                 }
223
224                 // Determine the byte order in the incoming buffer.
225                 bool isBigEndian;
226                 if (byteCount >= 2) {
227                         if (bytes[byteIndex] == (byte)0xFE && bytes[byteIndex + 1] == (byte)0xFF) {
228                                 isBigEndian = true;
229                         } else if (bytes[byteIndex] == (byte)0xFF && bytes[byteIndex + 1] == (byte)0xFE) {
230                                 isBigEndian = false;
231                         } else {
232                                 isBigEndian = bigEndian;
233                         }
234                 } else {
235                         isBigEndian = bigEndian;
236                 }
237
238                 // Validate that we have sufficient space in "chars".
239                 if ((chars.Length - charIndex) < (byteCount / 2)) {
240                         throw new ArgumentException (_("Arg_InsufficientSpace"));
241                 }
242
243                 // Convert the characters.
244                 int posn = charIndex;
245                 if (isBigEndian) {
246                         while (byteCount >= 2) {
247                                 chars[posn++] =
248                                         ((char)((((int)(bytes[byteIndex])) << 8) |
249                                                          ((int)(bytes[byteIndex + 1]))));
250                                 byteIndex += 2;
251                                 byteCount -= 2;
252                         }
253                 } else {
254                         while (byteCount >= 2) {
255                                 chars[posn++] =
256                                         ((char)((((int)(bytes[byteIndex + 1])) << 8) |
257                                                          ((int)(bytes[byteIndex]))));
258                                 byteIndex += 2;
259                                 byteCount -= 2;
260                         }
261                 }
262                 return posn - charIndex;
263         }
264
265         // Get the maximum number of bytes needed to encode a
266         // specified number of characters.
267         public override int GetMaxByteCount (int charCount)
268         {
269                 if (charCount < 0) {
270                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
271                 }
272                 return charCount * 2;
273         }
274
275         // Get the maximum number of characters needed to decode a
276         // specified number of bytes.
277         public override int GetMaxCharCount (int byteCount)
278         {
279                 if (byteCount < 0) {
280                         throw new ArgumentOutOfRangeException
281                                 ("byteCount", _("ArgRange_NonNegative"));
282                 }
283                 return byteCount / 2;
284         }
285
286         // Get a Unicode-specific decoder that is attached to this instance.
287         public override Decoder GetDecoder ()
288         {
289                 return new UnicodeDecoder (bigEndian);
290         }
291
292         // Get the Unicode preamble.
293         public override byte[] GetPreamble ()
294         {
295                 if (byteOrderMark) {
296                         byte[] preamble = new byte[2];
297                         if (bigEndian) {
298                                 preamble[0] = (byte)0xFE;
299                                 preamble[1] = (byte)0xFF;
300                         } else {
301                                 preamble[0] = (byte)0xFF;
302                                 preamble[1] = (byte)0xFE;
303                         }
304                         return preamble;
305                 } else {
306                         return new byte [0];
307                 }
308         }
309
310         // Determine if this object is equal to another.
311         public override bool Equals (Object value)
312         {
313                 UnicodeEncoding enc = (value as UnicodeEncoding);
314                 if (enc != null) {
315                         return (codePage == enc.codePage &&
316                                         bigEndian == enc.bigEndian &&
317                                         byteOrderMark == enc.byteOrderMark);
318                 } else {
319                         return false;
320                 }
321         }
322
323         // Get the hash code for this object.
324         public override int GetHashCode ()
325         {
326                 return base.GetHashCode ();
327         }
328
329         // Unicode decoder implementation.
330         private sealed class UnicodeDecoder : Decoder
331         {
332                 private bool bigEndian;
333                 private int leftOverByte;
334
335                 // Constructor.
336                 public UnicodeDecoder (bool bigEndian)
337                 {
338                         this.bigEndian = bigEndian;
339                         leftOverByte = -1;
340                 }
341
342                 // Override inherited methods.
343                 public override int GetCharCount (byte[] bytes, int index, int count)
344                 {
345                         if (bytes == null) {
346                                 throw new ArgumentNullException ("bytes");
347                         }
348                         if (index < 0 || index > bytes.Length) {
349                                 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
350                         }
351                         if (count < 0 || count > (bytes.Length - index)) {
352                                 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
353                         }
354                         if (leftOverByte != -1) {
355                                 return (count + 1) / 2;
356                         } else {
357                                 return count / 2;
358                         }
359                 }
360                 public override int GetChars (byte[] bytes, int byteIndex,
361                                                                          int byteCount, char[] chars,
362                                                                          int charIndex)
363                 {
364                         if (bytes == null) {
365                                 throw new ArgumentNullException ("bytes");
366                         }
367                         if (chars == null) {
368                                 throw new ArgumentNullException ("chars");
369                         }
370                         if (byteIndex < 0 || byteIndex > bytes.Length) {
371                                 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
372                         }
373                         if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
374                                 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
375                         }
376                         if (charIndex < 0 || charIndex > chars.Length) {
377                                 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
378                         }
379
380                         // Convert the characters.
381                         int posn = charIndex;
382                         bool isBigEndian = bigEndian;
383                         int leftOver = leftOverByte;
384                         int length = chars.Length;
385                         char ch;
386                         while (byteCount > 0) {
387                                 if (leftOver != -1) {
388                                         if (isBigEndian) {
389                                                 ch = ((char)((leftOver << 8) | ((int)(bytes[byteIndex]))));
390                                         } else {
391                                                 ch = ((char)(leftOver |
392                                                                  (((int)(bytes[byteIndex])) << 8)));
393                                         }
394                                         leftOver = -1;
395                                         ++byteIndex;
396                                         --byteCount;
397                                 } else if (byteCount > 1) {
398                                         if (isBigEndian) {
399                                                 ch = ((char)((((int)(bytes[byteIndex])) << 8) |
400                                                                           ((int)(bytes[byteIndex + 1]))));
401                                         } else {
402                                                 ch = ((char)((((int)(bytes[byteIndex + 1])) << 8) |
403                                                                       ((int)(bytes[byteIndex]))));
404                                         }
405                                         byteIndex += 2;
406                                         byteCount -= 2;
407                                 } else {
408                                         leftOver = (int)(bytes[byteIndex]);
409                                         break;
410                                 }
411
412                                 if (posn < length) {
413                                         chars[posn++] = ch;
414                                 } else {
415                                         throw new ArgumentException (_("Arg_InsufficientSpace"));
416                                 }
417                         }
418                         leftOverByte = leftOver;
419                         bigEndian = isBigEndian;
420
421                         // Finished - return the converted length.
422                         return posn - charIndex;
423                 }
424
425         } // class UnicodeDecoder
426
427 }; // class UnicodeEncoding
428
429 }; // namespace System.Text