for TARGET_J2EE only:
[mono.git] / mcs / class / Mono.Posix / Mono.Unix / UnixEncoding.cs
1 /*
2  * Mono.Unix/UnixEncoding.cs
3  *
4  * Authors:
5  *   Jonathan Pryor (jonpryor@vt.edu)
6  *
7  * Copyright (c) 2001, 2002  Southern Storm Software, Pty Ltd
8  * Copyright (C) 2004 Novell, Inc (http://www.novell.com)
9  * Copyright (C) 2005 Jonathan Pryor
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining
12  * a copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice shall be included
19  * in all copies or substantial portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
25  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
26  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27  * OTHER DEALINGS IN THE SOFTWARE.
28  */
29
30 namespace Mono.Unix
31 {
32
33 using System;
34 using System.Text;
35
36 [Serializable]
37 public class UnixEncoding : Encoding
38 {
39         public static readonly Encoding Instance = new UnixEncoding ();
40
41         public static readonly char EscapeByte = '\u0000';
42
43         // Constructors.
44         public UnixEncoding ()
45         {
46         }
47
48         // Internal version of "GetByteCount" which can handle a rolling
49         // state between multiple calls to this method.
50         private static int InternalGetByteCount (char[] chars, int index, int count, uint leftOver, bool flush)
51         {
52                 // Validate the parameters.
53                 if (chars == null) {
54                         throw new ArgumentNullException ("chars");
55                 }
56                 if (index < 0 || index > chars.Length) {
57                         throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
58                 }
59                 if (count < 0 || count > (chars.Length - index)) {
60                         throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
61                 }
62
63                 // Determine the lengths of all characters.
64                 char ch;
65                 int length = 0;
66                 uint pair = leftOver;
67                 while (count > 0) {
68                         ch = chars[index];
69                         if (pair == 0) {
70                                 if (ch == EscapeByte && count > 1) {
71                                         ++length;
72                                         ++index;
73                                         --count;
74                                 } else if (ch < '\u0080') {
75                                         ++length;
76                                 } else if (ch < '\u0800') {
77                                         length += 2;
78                                 } else if (ch >= '\uD800' && ch <= '\uDBFF') {
79                                         // This is the start of a surrogate pair.
80                                         pair = (uint)ch;
81                                 } else {
82                                         length += 3;
83                                 }
84                         } else if (ch >= '\uDC00' && ch <= '\uDFFF') {
85                                 // We have a surrogate pair.
86                                 length += 4;
87                                 pair = 0;
88                         } else {
89                                 // We have a surrogate start followed by a
90                                 // regular character.  Technically, this is
91                                 // invalid, but we have to do something.
92                                 // We write out the surrogate start and then
93                                 // re-visit the current character again.
94                                 length += 3;
95                                 pair = 0;
96                                 continue;
97                         }
98                         ++index;
99                         --count;
100                 }
101                 if (flush && pair != 0) {
102                         // Flush the left-over surrogate pair start.
103                         length += 3;
104                 }
105
106                 // Return the final length to the caller.
107                 return length;
108         }
109
110         // Get the number of bytes needed to encode a character buffer.
111         public override int GetByteCount (char[] chars, int index, int count)
112         {
113                 return InternalGetByteCount (chars, index, count, 0, true);
114         }
115
116         // Convenience wrappers for "GetByteCount".
117         public override int GetByteCount (String s)
118         {
119                 // Validate the parameters.
120                 if (s == null) {
121                         throw new ArgumentNullException ("s");
122                 }
123
124                 // Determine the lengths of all characters.
125                 char ch;
126                 int index = 0;
127                 int count = s.Length;
128                 int length = 0;
129                 uint pair;
130                 while (count > 0) {
131                         ch = s[index++];
132                         if (ch == EscapeByte && count > 1) {
133                                 ++length;
134                                 ++index;
135                                 --count;
136                         } else if (ch < '\u0080') {
137                                 ++length;
138                         } else if (ch < '\u0800') {
139                                 length += 2;
140                         } else if (ch >= '\uD800' && ch <= '\uDBFF' && count > 1) {
141                                 // This may be the start of a surrogate pair.
142                                 pair = (uint)(s[index]);
143                                 if (pair >= (uint)0xDC00 && pair <= (uint)0xDFFF) {
144                                         length += 4;
145                                         ++index;
146                                         --count;
147                                 } else {
148                                         length += 3;
149                                 }
150                         } else {
151                                 length += 3;
152                         }
153                         --count;
154                 }
155
156                 // Return the final length to the caller.
157                 return length;
158         }
159
160         // Internal version of "GetBytes" which can handle a rolling
161         // state between multiple calls to this method.
162         private static int InternalGetBytes (char[] chars, int charIndex,
163                                              int charCount, byte[] bytes,
164                                              int byteIndex, ref uint leftOver,
165                                              bool flush)
166         {
167                 // Validate the parameters.
168                 if (chars == null) {
169                         throw new ArgumentNullException ("chars");
170                 }
171                 if (bytes == null) {
172                         throw new ArgumentNullException ("bytes");
173                 }
174                 if (charIndex < 0 || charIndex > chars.Length) {
175                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
176                 }
177                 if (charCount < 0 || charCount > (chars.Length - charIndex)) {
178                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
179                 }
180                 if (byteIndex < 0 || byteIndex > bytes.Length) {
181                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
182                 }
183
184                 // Convert the characters into bytes.
185                 char ch;
186                 int length = bytes.Length;
187                 uint pair;
188                 uint left = leftOver;
189                 int posn = byteIndex;
190                 while (charCount > 0) {
191                         // Fetch the next UTF-16 character pair value.
192                         ch = chars[charIndex++];
193                         --charCount;
194                         if (left == 0) {
195                                 if (ch >= '\uD800' && ch <= '\uDBFF') {
196                                         // This is the start of a surrogate pair.
197                                         left = (uint)ch;
198                                         continue;
199                                 } else if (ch == EscapeByte) {
200                                         if (posn >= length) {
201                                                 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
202                                         }
203                                         if (--charCount >= 0) {
204                                                 bytes[posn++] = (byte) chars [charIndex++];
205                                         }
206                                         continue;
207                                 } else {
208                                         // This is a regular character.
209                                         pair = (uint)ch;
210                                 }
211                         } else if (ch >= '\uDC00' && ch <= '\uDFFF') {
212                                 // We have a surrogate pair.
213                                 pair = ((left - (uint)0xD800) << 10) +
214                                            (((uint)ch) - (uint)0xDC00) +
215                                            (uint)0x10000;
216                                 left = 0;
217                         } else {
218                                 // We have a surrogate start followed by a
219                                 // regular character.  Technically, this is
220                                 // invalid, but we have to do something.
221                                 // We write out the surrogate start and then
222                                 // re-visit the current character again.
223                                 pair = (uint)left;
224                                 left = 0;
225                                 --charIndex;
226                                 ++charCount;
227                         }
228
229                         // Encode the character pair value.
230                         if (pair < (uint)0x0080) {
231                                 if (posn >= length) {
232                                         throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
233                                 }
234                                 bytes[posn++] = (byte)pair;
235                         } else if (pair < (uint)0x0800) {
236                                 if ((posn + 2) > length) {
237                                         throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
238                                 }
239                                 bytes[posn++] = (byte)(0xC0 | (pair >> 6));
240                                 bytes[posn++] = (byte)(0x80 | (pair & 0x3F));
241                         } else if (pair < (uint)0x10000) {
242                                 if ((posn + 3) > length) {
243                                         throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
244                                 }
245                                 bytes[posn++] = (byte)(0xE0 | (pair >> 12));
246                                 bytes[posn++] = (byte)(0x80 | ((pair >> 6) & 0x3F));
247                                 bytes[posn++] = (byte)(0x80 | (pair & 0x3F));
248                         } else {
249                                 if ((posn + 4) > length) {
250                                         throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
251                                 }
252                                 bytes[posn++] = (byte)(0xF0 | (pair >> 18));
253                                 bytes[posn++] = (byte)(0x80 | ((pair >> 12) & 0x3F));
254                                 bytes[posn++] = (byte)(0x80 | ((pair >> 6) & 0x3F));
255                                 bytes[posn++] = (byte)(0x80 | (pair & 0x3F));
256                         }
257                 }
258                 if (flush && left != 0) {
259                         // Flush the left-over surrogate pair start.
260                         if ((posn + 3) > length) {
261                                 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
262                         }
263                         bytes[posn++] = (byte)(0xE0 | (left >> 12));
264                         bytes[posn++] = (byte)(0x80 | ((left >> 6) & 0x3F));
265                         bytes[posn++] = (byte)(0x80 | (left & 0x3F));
266                         left = 0;
267                 }
268                 leftOver = left;
269
270                 // Return the final count to the caller.
271                 return posn - byteIndex;
272         }
273
274         // Get the bytes that result from encoding a character buffer.
275         public override int GetBytes (char[] chars, int charIndex, int charCount,
276                                                                  byte[] bytes, int byteIndex)
277         {
278                 uint leftOver = 0;
279                 return InternalGetBytes (chars, charIndex, charCount, bytes, byteIndex, ref leftOver, true);
280         }
281
282         // Convenience wrappers for "GetBytes".
283         public override int GetBytes (String s, int charIndex, int charCount,
284                                                                  byte[] bytes, int byteIndex)
285         {
286                 // Validate the parameters.
287                 if (s == null) {
288                         throw new ArgumentNullException ("s");
289                 }
290                 if (bytes == null) {
291                         throw new ArgumentNullException ("bytes");
292                 }
293                 if (charIndex < 0 || charIndex > s.Length) {
294                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
295                 }
296                 if (charCount < 0 || charCount > (s.Length - charIndex)) {
297                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
298                 }
299                 if (byteIndex < 0 || byteIndex > bytes.Length) {
300                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
301                 }
302
303                 // Convert the characters into bytes.
304                 char ch;
305                 int length = bytes.Length;
306                 uint pair;
307                 int posn = byteIndex;
308                 while (charCount > 0) {
309                         // Fetch the next UTF-16 character pair value.
310                         ch = s[charIndex++];
311                         if (ch >= '\uD800' && ch <= '\uDBFF' && charCount > 1) {
312                                 // This may be the start of a surrogate pair.
313                                 pair = (uint)(s[charIndex]);
314                                 if (pair >= (uint)0xDC00 && pair <= (uint)0xDFFF) {
315                                         pair = (pair - (uint)0xDC00) +
316                                                    ((((uint)ch) - (uint)0xD800) << 10) +
317                                                    (uint)0x10000;
318                                         ++charIndex;
319                                         --charCount;
320                                 } else {
321                                         pair = (uint)ch;
322                                 }
323                         } else if (ch == EscapeByte && charCount > 1) {
324                                 if (posn >= length) {
325                                         throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
326                                 }
327                                 charCount -= 2;
328                                 if (charCount >= 0) {
329                                         bytes[posn++] = (byte) s [charIndex++];
330                                 }
331                                 continue;
332                         } else {
333                                 pair = (uint)ch;
334                         }
335                         --charCount;
336
337                         // Encode the character pair value.
338                         if (pair < (uint)0x0080) {
339                                 if (posn >= length) {
340                                         throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
341                                 }
342                                 bytes[posn++] = (byte)pair;
343                         } else if (pair < (uint)0x0800) {
344                                 if ((posn + 2) > length) {
345                                         throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
346                                 }
347                                 bytes[posn++] = (byte)(0xC0 | (pair >> 6));
348                                 bytes[posn++] = (byte)(0x80 | (pair & 0x3F));
349                         } else if (pair < (uint)0x10000) {
350                                 if ((posn + 3) > length) {
351                                         throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
352                                 }
353                                 bytes[posn++] = (byte)(0xE0 | (pair >> 12));
354                                 bytes[posn++] = (byte)(0x80 | ((pair >> 6) & 0x3F));
355                                 bytes[posn++] = (byte)(0x80 | (pair & 0x3F));
356                         } else {
357                                 if ((posn + 4) > length) {
358                                         throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
359                                 }
360                                 bytes[posn++] = (byte)(0xF0 | (pair >> 18));
361                                 bytes[posn++] = (byte)(0x80 | ((pair >> 12) & 0x3F));
362                                 bytes[posn++] = (byte)(0x80 | ((pair >> 6) & 0x3F));
363                                 bytes[posn++] = (byte)(0x80 | (pair & 0x3F));
364                         }
365                 }
366
367                 // Return the final count to the caller.
368                 return posn - byteIndex;
369         }
370
371         // Internal version of "GetCharCount" which can handle a rolling
372         // state between multiple calls to this method.
373         private static int InternalGetCharCount (byte[] bytes, int index, int count,
374                                                                                    uint leftOverBits,
375                                                                                    uint leftOverCount,
376                                                                                    bool throwOnInvalid, bool flush)
377         {
378                 // Validate the parameters.
379                 if (bytes == null) {
380                         throw new ArgumentNullException ("bytes");
381                 }
382                 if (index < 0 || index > bytes.Length) {
383                         throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
384                 }
385                 if (count < 0 || count > (bytes.Length - index)) {
386                         throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
387                 }
388
389                 // Determine the number of characters that we have.
390                 int next_raw = 0;
391                 uint ch;
392                 int length = 0;
393                 uint leftBits = leftOverBits;
394                 uint leftSoFar = (leftOverCount & (uint)0x0F);
395                 uint leftSize = ((leftOverCount >> 4) & (uint)0x0F);
396                 while (count > 0) {
397                         ch = (uint)(bytes[index++]);
398                         ++next_raw;
399                         --count;
400                         if (leftSize == 0) {
401                                 // Process a UTF-8 start character.
402                                 if (ch < (uint)0x0080) {
403                                         // Single-byte UTF-8 character.
404                                         ++length;
405                                         next_raw = 0;
406                                 } else if ((ch & (uint)0xE0) == (uint)0xC0) {
407                                         // Double-byte UTF-8 character.
408                                         leftBits = (ch & (uint)0x1F);
409                                         leftSoFar = 1;
410                                         leftSize = 2;
411                                 } else if ((ch & (uint)0xF0) == (uint)0xE0) {
412                                         // Three-byte UTF-8 character.
413                                         leftBits = (ch & (uint)0x0F);
414                                         leftSoFar = 1;
415                                         leftSize = 3;
416                                 } else if ((ch & (uint)0xF8) == (uint)0xF0) {
417                                         // Four-byte UTF-8 character.
418                                         leftBits = (ch & (uint)0x07);
419                                         leftSoFar = 1;
420                                         leftSize = 4;
421                                 } else if ((ch & (uint)0xFC) == (uint)0xF8) {
422                                         // Five-byte UTF-8 character.
423                                         leftBits = (ch & (uint)0x03);
424                                         leftSoFar = 1;
425                                         leftSize = 5;
426                                 } else if ((ch & (uint)0xFE) == (uint)0xFC) {
427                                         // Six-byte UTF-8 character.
428                                         leftBits = (ch & (uint)0x03);
429                                         leftSoFar = 1;
430                                         leftSize = 6;
431                                 } else {
432                                         // Invalid UTF-8 start character.
433                                         if (throwOnInvalid) {
434                                                 // throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
435                                         }
436                                         length += next_raw*2;
437                                         next_raw = 0;
438                                 }
439                         } else {
440                                 // Process an extra byte in a multi-byte sequence.
441                                 if ((ch & (uint)0xC0) == (uint)0x80) {
442                                         leftBits = ((leftBits << 6) | (ch & (uint)0x3F));
443                                         if (++leftSoFar >= leftSize) {
444                                                 // We have a complete character now.
445                                                 if (leftBits < (uint)0x10000) {
446                                                         // is it an overlong ?
447                                                         bool overlong = false;
448                                                         switch (leftSize) {
449                                                         case 2:
450                                                                 overlong = (leftBits <= 0x7F);
451                                                                 break;
452                                                         case 3:
453                                                                 overlong = (leftBits <= 0x07FF);
454                                                                 break;
455                                                         case 4:
456                                                                 overlong = (leftBits <= 0xFFFF);
457                                                                 break;
458                                                         case 5:
459                                                                 overlong = (leftBits <= 0x1FFFFF);
460                                                                 break;
461                                                         case 6:
462                                                                 overlong = (leftBits <= 0x03FFFFFF);
463                                                                 break;
464                                                         }
465                                                         if (overlong) {
466                                                                 // if (throwOnInvalid)
467                                                                 //      throw new ArgumentException (_("Overlong"), leftBits.ToString ());
468                                                                 length += next_raw*2;
469                                                         }
470                                                         else
471                                                                 ++length;
472                                                 } else if (leftBits < (uint)0x110000) {
473                                                         length += 2;
474                                                 } else if (throwOnInvalid) {
475                                                         // ???
476                                                         // throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
477                                                         length += next_raw*2;
478                                                 }
479                                                 leftSize = 0;
480                                                 next_raw = 0;
481                                         }
482                                 } else {
483                                         // Invalid UTF-8 sequence: clear and restart.
484                                         if (throwOnInvalid) {
485                                                 // throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
486                                         }
487                                         // don't escape the current byte, process it normally
488                                         if (ch < (uint)0x0080) {
489                                                 --index;
490                                                 ++count;
491                                                 --next_raw;
492                                         }
493                                         length += next_raw*2;
494                                         leftSize = 0;
495                                         next_raw = 0;
496                                 }
497                         }
498                 }
499                 if (flush && leftSize != 0 && throwOnInvalid) {
500                         // We had left-over bytes that didn't make up
501                         // a complete UTF-8 character sequence.
502                         // throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
503                         length += next_raw * 2;
504                 }
505
506                 // Return the final length to the caller.
507                 return length;
508         }
509
510         // Get the number of characters needed to decode a byte buffer.
511         public override int GetCharCount (byte[] bytes, int index, int count)
512         {
513                 return InternalGetCharCount (bytes, index, count, 0, 0, true, true);
514         }
515
516         // Get the characters that result from decoding a byte buffer.
517         private static int InternalGetChars (byte[] bytes, int byteIndex,
518                                                                            int byteCount, char[] chars,
519                                                                            int charIndex, ref uint leftOverBits,
520                                                                            ref uint leftOverCount,
521                                                                            bool throwOnInvalid, bool flush)
522         {
523                 // Validate the parameters.
524                 if (bytes == null) {
525                         throw new ArgumentNullException ("bytes");
526                 }
527                 if (chars == null) {
528                         throw new ArgumentNullException ("chars");
529                 }
530                 if (byteIndex < 0 || byteIndex > bytes.Length) {
531                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
532                 }
533                 if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
534                         throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
535                 }
536                 if (charIndex < 0 || charIndex > chars.Length) {
537                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
538                 }
539
540                 if (charIndex == chars.Length)
541                         return 0;
542
543                 // Convert the bytes into the output buffer.
544                 byte[] raw = new byte[6];
545                 int next_raw = 0;
546                 uint ch;
547                 int length = chars.Length;
548                 int posn = charIndex;
549                 uint leftBits = leftOverBits;
550                 uint leftSoFar = (leftOverCount & (uint)0x0F);
551                 uint leftSize = ((leftOverCount >> 4) & (uint)0x0F);
552                 while (byteCount > 0) {
553                         // Fetch the next character from the byte buffer.
554                         ch = (uint)(bytes[byteIndex++]);
555                         raw [next_raw++] = (byte) ch;
556                         --byteCount;
557                         if (leftSize == 0) {
558                                 // Process a UTF-8 start character.
559                                 if (ch < (uint)0x0080) {
560                                         // Single-byte UTF-8 character.
561                                         if (posn >= length) {
562                                                 throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
563                                         }
564                                         next_raw = 0;
565                                         chars[posn++] = (char)ch;
566                                 } else if ((ch & (uint)0xE0) == (uint)0xC0) {
567                                         // Double-byte UTF-8 character.
568                                         leftBits = (ch & (uint)0x1F);
569                                         leftSoFar = 1;
570                                         leftSize = 2;
571                                 } else if ((ch & (uint)0xF0) == (uint)0xE0) {
572                                         // Three-byte UTF-8 character.
573                                         leftBits = (ch & (uint)0x0F);
574                                         leftSoFar = 1;
575                                         leftSize = 3;
576                                 } else if ((ch & (uint)0xF8) == (uint)0xF0) {
577                                         // Four-byte UTF-8 character.
578                                         leftBits = (ch & (uint)0x07);
579                                         leftSoFar = 1;
580                                         leftSize = 4;
581                                 } else if ((ch & (uint)0xFC) == (uint)0xF8) {
582                                         // Five-byte UTF-8 character.
583                                         leftBits = (ch & (uint)0x03);
584                                         leftSoFar = 1;
585                                         leftSize = 5;
586                                 } else if ((ch & (uint)0xFE) == (uint)0xFC) {
587                                         // Six-byte UTF-8 character.
588                                         leftBits = (ch & (uint)0x03);
589                                         leftSoFar = 1;
590                                         leftSize = 6;
591                                 } else {
592                                         // Invalid UTF-8 start character.
593                                         if (throwOnInvalid) {
594                                                 // throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
595                                         }
596                                         next_raw = 0;
597                                         chars[posn++] = EscapeByte;
598                                         chars[posn++] = (char) ch;
599                                 }
600                         } else {
601                                 // Process an extra byte in a multi-byte sequence.
602                                 if ((ch & (uint)0xC0) == (uint)0x80) {
603                                         leftBits = ((leftBits << 6) | (ch & (uint)0x3F));
604                                         if (++leftSoFar >= leftSize) {
605                                                 // We have a complete character now.
606                                                 if (leftBits < (uint)0x10000) {
607                                                         // is it an overlong ?
608                                                         bool overlong = false;
609                                                         switch (leftSize) {
610                                                         case 2:
611                                                                 overlong = (leftBits <= 0x7F);
612                                                                 break;
613                                                         case 3:
614                                                                 overlong = (leftBits <= 0x07FF);
615                                                                 break;
616                                                         case 4:
617                                                                 overlong = (leftBits <= 0xFFFF);
618                                                                 break;
619                                                         case 5:
620                                                                 overlong = (leftBits <= 0x1FFFFF);
621                                                                 break;
622                                                         case 6:
623                                                                 overlong = (leftBits <= 0x03FFFFFF);
624                                                                 break;
625                                                         }
626                                                         if (overlong) {
627                                                                 // if (throwOnInvalid)
628                                                                 //      throw new ArgumentException (_("Overlong"), leftBits.ToString ());
629                                                                 CopyRaw (raw, ref next_raw, chars, ref posn, length);
630                                                         }
631                                                         else {
632                                                                 if (posn >= length) {
633                                                                         throw new ArgumentException
634                                                                                 (_("Arg_InsufficientSpace"), "chars");
635                                                                 }
636                                                                 chars[posn++] = (char)leftBits;
637                                                         }
638                                                 } else if (leftBits < (uint)0x110000) {
639                                                         if ((posn + 2) > length) {
640                                                                 throw new ArgumentException
641                                                                         (_("Arg_InsufficientSpace"), "chars");
642                                                         }
643                                                         leftBits -= (uint)0x10000;
644                                                         chars[posn++] = (char)((leftBits >> 10) +
645                                                                                                    (uint)0xD800);
646                                                         chars[posn++] =
647                                                                 (char)((leftBits & (uint)0x3FF) + (uint)0xDC00);
648                                                 } else if (throwOnInvalid) {
649                                                         // ???
650                                                         // throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
651                                                         CopyRaw (raw, ref next_raw, chars, ref posn, length);
652                                                 }
653                                                 leftSize = 0;
654                                                 next_raw = 0;
655                                         }
656                                 } else {
657                                         // Invalid UTF-8 sequence: clear and restart.
658                                         if (throwOnInvalid) {
659                                                 // throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
660                                         }
661                                         // don't escape the current byte, process it normally
662                                         if (ch < (uint)0x0080) {
663                                                 --byteIndex;
664                                                 ++byteCount;
665                                                 --next_raw;
666                                         }
667                                         CopyRaw (raw, ref next_raw, chars, ref posn, length);
668                                         leftSize = 0;
669                                         next_raw = 0;
670                                 }
671                         }
672                 }
673                 if (flush && leftSize != 0 && throwOnInvalid) {
674                         // We had left-over bytes that didn't make up
675                         // a complete UTF-8 character sequence.
676                         // throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
677                         CopyRaw (raw, ref next_raw, chars, ref posn, length);
678                 }
679                 leftOverBits = leftBits;
680                 leftOverCount = (leftSoFar | (leftSize << 4));
681
682                 // Return the final length to the caller.
683                 return posn - charIndex;
684         }
685
686         private static void CopyRaw (byte[] raw, ref int next_raw, char[] chars, ref int posn, int length)
687         {
688                 if (posn+(next_raw*2) > length)
689                         throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
690
691                 for (int i = 0; i < next_raw; ++i) {
692                         chars[posn++] = EscapeByte;
693                         chars[posn++] = (char) raw [i];
694                 }
695
696                 next_raw = 0;
697         }
698
699         // Get the characters that result from decoding a byte buffer.
700         public override int GetChars (byte[] bytes, int byteIndex, int byteCount,
701                                                                  char[] chars, int charIndex)
702         {
703                 uint leftOverBits = 0;
704                 uint leftOverCount = 0;
705                 return InternalGetChars (bytes, byteIndex, byteCount, chars, 
706                                 charIndex, ref leftOverBits, ref leftOverCount, true, true);
707         }
708
709         // Get the maximum number of bytes needed to encode a
710         // specified number of characters.
711         public override int GetMaxByteCount (int charCount)
712         {
713                 if (charCount < 0) {
714                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
715                 }
716                 return charCount * 4;
717         }
718
719         // Get the maximum number of characters needed to decode a
720         // specified number of bytes.
721         public override int GetMaxCharCount (int byteCount)
722         {
723                 if (byteCount < 0) {
724                         throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_NonNegative"));
725                 }
726                 return byteCount;
727         }
728
729         // Get a Unix-specific decoder that is attached to this instance.
730         public override Decoder GetDecoder ()
731         {
732                 return new UnixDecoder ();
733         }
734
735         // Get a Unix-specific encoder that is attached to this instance.
736         public override Encoder GetEncoder ()
737         {
738                 return new UnixEncoder ();
739         }
740
741         // Get the Unix preamble.
742         public override byte[] GetPreamble ()
743         {
744                 return new byte [0];
745         }
746
747         // Determine if this object is equal to another.
748         public override bool Equals (Object value)
749         {
750                 UnixEncoding enc = (value as UnixEncoding);
751                 if (enc != null) {
752                         return true;
753                 }
754                 else {
755                         return false;
756                 }
757         }
758
759         // Get the hash code for this object.
760         public override int GetHashCode ()
761         {
762                 return base.GetHashCode ();
763         }
764         
765         public override byte [] GetBytes (String s)
766         {
767                 if (s == null)
768                         throw new ArgumentNullException ("s");
769                 
770                 int length = GetByteCount (s);
771                 byte [] bytes = new byte [length];
772                 GetBytes (s, 0, s.Length, bytes, 0);
773                 return bytes;
774         }
775
776         // Unix decoder implementation.
777         [Serializable]
778         private class UnixDecoder : Decoder
779         {
780                 private uint leftOverBits;
781                 private uint leftOverCount;
782
783                 // Constructor.
784                 public UnixDecoder ()
785                 {
786                         leftOverBits = 0;
787                         leftOverCount = 0;
788                 }
789
790                 // Override inherited methods.
791                 public override int GetCharCount (byte[] bytes, int index, int count)
792                 {
793                         return InternalGetCharCount (bytes, index, count,
794                                         leftOverBits, leftOverCount, true, false);
795                 }
796                 public override int GetChars (byte[] bytes, int byteIndex,
797                                                  int byteCount, char[] chars, int charIndex)
798                 {
799                         return InternalGetChars (bytes, byteIndex, byteCount,
800                                 chars, charIndex, ref leftOverBits, ref leftOverCount, true, false);
801                 }
802
803         }
804
805         // Unix encoder implementation.
806         [Serializable]
807         private class UnixEncoder : Encoder
808         {
809                 private uint leftOver;
810
811                 // Constructor.
812                 public UnixEncoder ()
813                 {
814                         leftOver = 0;
815                 }
816
817                 // Override inherited methods.
818                 public override int GetByteCount (char[] chars, int index,
819                                          int count, bool flush)
820                 {
821                         return InternalGetByteCount (chars, index, count, leftOver, flush);
822                 }
823                 public override int GetBytes (char[] chars, int charIndex,
824                                          int charCount, byte[] bytes, int byteCount, bool flush)
825                 {
826                         int result;
827                         result = InternalGetBytes (chars, charIndex, charCount, bytes, byteCount, ref leftOver, flush);
828                         return result;
829                 }
830         }
831
832         private static string _ (string arg)
833         {
834                 return arg;
835         }
836 }
837 }
838