2 * UnicodeEncoding.cs - Implementation of the
3 * "System.Text.UnicodeEncoding" class.
5 * Copyright (c) 2001, 2002 Southern Storm Software, Pty Ltd
6 * Copyright (C) 2003, 2004 Novell, Inc.
8 * Permission is hereby granted, free of charge, to any person obtaining
9 * a copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
22 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 * OTHER DEALINGS IN THE SOFTWARE.
33 [MonoTODO ("Fix serialization compatibility with MS.NET")]
34 public class UnicodeEncoding : Encoding
36 // Magic numbers used by Windows for Unicode.
37 internal const int UNICODE_CODE_PAGE = 1200;
38 internal const int BIG_UNICODE_CODE_PAGE = 1201;
41 // Size of characters in this encoding.
42 public const int CharSize = 2;
46 private bool bigEndian;
47 private bool byteOrderMark;
50 public UnicodeEncoding () : this (false, true)
55 public UnicodeEncoding (bool bigEndian, bool byteOrderMark)
56 : base ((bigEndian ? BIG_UNICODE_CODE_PAGE : UNICODE_CODE_PAGE))
58 this.bigEndian = bigEndian;
59 this.byteOrderMark = byteOrderMark;
62 body_name = "unicodeFFFE";
63 encoding_name = "Unicode (Big-Endian)";
64 header_name = "unicodeFFFE";
65 is_browser_save = false;
66 web_name = "unicodeFFFE";
69 encoding_name = "Unicode";
70 header_name = "utf-16";
71 is_browser_save = true;
75 // Windows reports the same code page number for
76 // both the little-endian and big-endian forms.
77 windows_code_page = UNICODE_CODE_PAGE;
80 // Get the number of bytes needed to encode a character buffer.
81 public override int GetByteCount (char[] chars, int index, int count)
84 throw new ArgumentNullException ("chars");
86 if (index < 0 || index > chars.Length) {
87 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
89 if (count < 0 || count > (chars.Length - index)) {
90 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
95 // Convenience wrappers for "GetByteCount".
96 public override int GetByteCount (String s)
99 throw new ArgumentNullException ("s");
104 // Get the bytes that result from encoding a character buffer.
105 public override int GetBytes (char[] chars, int charIndex, int charCount,
106 byte[] bytes, int byteIndex)
109 throw new ArgumentNullException ("chars");
112 throw new ArgumentNullException ("bytes");
114 if (charIndex < 0 || charIndex > chars.Length) {
115 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
117 if (charCount < 0 || charCount > (chars.Length - charIndex)) {
118 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
120 if (byteIndex < 0 || byteIndex > bytes.Length) {
121 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
123 if ((bytes.Length - byteIndex) < (charCount * 2)) {
124 throw new ArgumentException (_("Arg_InsufficientSpace"));
126 int posn = byteIndex;
129 while (charCount-- > 0) {
130 ch = chars[charIndex++];
131 bytes[posn++] = (byte)(ch >> 8);
132 bytes[posn++] = (byte)ch;
135 while (charCount-- > 0) {
136 ch = chars[charIndex++];
137 bytes[posn++] = (byte)ch;
138 bytes[posn++] = (byte)(ch >> 8);
141 return posn - byteIndex;
144 // Convenience wrappers for "GetBytes".
145 public override int GetBytes (String s, int charIndex, int charCount,
146 byte[] bytes, int byteIndex)
149 throw new ArgumentNullException ("s");
152 throw new ArgumentNullException ("bytes");
154 if (charIndex < 0 || charIndex > s.Length) {
155 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
157 if (charCount < 0 || charCount > (s.Length - charIndex)) {
158 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
160 if (byteIndex < 0 || byteIndex > bytes.Length) {
161 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
163 if ((bytes.Length - byteIndex) < (charCount * 2)) {
164 throw new ArgumentException (_("Arg_InsufficientSpace"));
166 int posn = byteIndex;
169 while (charCount-- > 0) {
171 bytes[posn++] = (byte)(ch >> 8);
172 bytes[posn++] = (byte)ch;
175 while (charCount-- > 0) {
177 bytes[posn++] = (byte)ch;
178 bytes[posn++] = (byte)(ch >> 8);
181 return posn - byteIndex;
184 public override byte [] GetBytes (String s)
186 return base.GetBytes (s);
189 // Get the number of characters needed to decode a byte buffer.
190 public override int GetCharCount (byte[] bytes, int index, int count)
193 throw new ArgumentNullException ("bytes");
195 if (index < 0 || index > bytes.Length) {
196 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
198 if (count < 0 || count > (bytes.Length - index)) {
199 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
204 // Get the characters that result from decoding a byte buffer.
205 public override int GetChars (byte[] bytes, int byteIndex, int byteCount,
206 char[] chars, int charIndex)
209 throw new ArgumentNullException ("bytes");
212 throw new ArgumentNullException ("chars");
214 if (byteIndex < 0 || byteIndex > bytes.Length) {
215 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
217 if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
218 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
220 if (charIndex < 0 || charIndex > chars.Length) {
221 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
224 // Determine the byte order in the incoming buffer.
226 if (byteCount >= 2) {
227 if (bytes[byteIndex] == (byte)0xFE && bytes[byteIndex + 1] == (byte)0xFF) {
229 } else if (bytes[byteIndex] == (byte)0xFF && bytes[byteIndex + 1] == (byte)0xFE) {
232 isBigEndian = bigEndian;
235 isBigEndian = bigEndian;
238 // Validate that we have sufficient space in "chars".
239 if ((chars.Length - charIndex) < (byteCount / 2)) {
240 throw new ArgumentException (_("Arg_InsufficientSpace"));
243 // Convert the characters.
244 int posn = charIndex;
246 while (byteCount >= 2) {
248 ((char)((((int)(bytes[byteIndex])) << 8) |
249 ((int)(bytes[byteIndex + 1]))));
254 while (byteCount >= 2) {
256 ((char)((((int)(bytes[byteIndex + 1])) << 8) |
257 ((int)(bytes[byteIndex]))));
262 return posn - charIndex;
265 // Get the maximum number of bytes needed to encode a
266 // specified number of characters.
267 public override int GetMaxByteCount (int charCount)
270 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
272 return charCount * 2;
275 // Get the maximum number of characters needed to decode a
276 // specified number of bytes.
277 public override int GetMaxCharCount (int byteCount)
280 throw new ArgumentOutOfRangeException
281 ("byteCount", _("ArgRange_NonNegative"));
283 return byteCount / 2;
286 // Get a Unicode-specific decoder that is attached to this instance.
287 public override Decoder GetDecoder ()
289 return new UnicodeDecoder (bigEndian);
292 // Get the Unicode preamble.
293 public override byte[] GetPreamble ()
296 byte[] preamble = new byte[2];
298 preamble[0] = (byte)0xFE;
299 preamble[1] = (byte)0xFF;
301 preamble[0] = (byte)0xFF;
302 preamble[1] = (byte)0xFE;
310 // Determine if this object is equal to another.
311 public override bool Equals (Object value)
313 UnicodeEncoding enc = (value as UnicodeEncoding);
315 return (codePage == enc.codePage &&
316 bigEndian == enc.bigEndian &&
317 byteOrderMark == enc.byteOrderMark);
323 // Get the hash code for this object.
324 public override int GetHashCode ()
326 return base.GetHashCode ();
329 // Unicode decoder implementation.
330 private sealed class UnicodeDecoder : Decoder
332 private bool bigEndian;
333 private int leftOverByte;
336 public UnicodeDecoder (bool bigEndian)
338 this.bigEndian = bigEndian;
342 // Override inherited methods.
343 public override int GetCharCount (byte[] bytes, int index, int count)
346 throw new ArgumentNullException ("bytes");
348 if (index < 0 || index > bytes.Length) {
349 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
351 if (count < 0 || count > (bytes.Length - index)) {
352 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
354 if (leftOverByte != -1) {
355 return (count + 1) / 2;
360 public override int GetChars (byte[] bytes, int byteIndex,
361 int byteCount, char[] chars,
365 throw new ArgumentNullException ("bytes");
368 throw new ArgumentNullException ("chars");
370 if (byteIndex < 0 || byteIndex > bytes.Length) {
371 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
373 if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
374 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
376 if (charIndex < 0 || charIndex > chars.Length) {
377 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
380 // Convert the characters.
381 int posn = charIndex;
382 bool isBigEndian = bigEndian;
383 int leftOver = leftOverByte;
384 int length = chars.Length;
386 while (byteCount > 0) {
387 if (leftOver != -1) {
389 ch = ((char)((leftOver << 8) | ((int)(bytes[byteIndex]))));
391 ch = ((char)(leftOver |
392 (((int)(bytes[byteIndex])) << 8)));
397 } else if (byteCount > 1) {
399 ch = ((char)((((int)(bytes[byteIndex])) << 8) |
400 ((int)(bytes[byteIndex + 1]))));
402 ch = ((char)((((int)(bytes[byteIndex + 1])) << 8) |
403 ((int)(bytes[byteIndex]))));
408 leftOver = (int)(bytes[byteIndex]);
415 throw new ArgumentException (_("Arg_InsufficientSpace"));
418 leftOverByte = leftOver;
419 bigEndian = isBigEndian;
421 // Finished - return the converted length.
422 return posn - charIndex;
425 } // class UnicodeDecoder
427 }; // class UnicodeEncoding
429 }; // namespace System.Text