Merge pull request #5014 from vkargov/vk-kasha
[mono.git] / mcs / class / Mono.Posix / Mono.Unix / UnixEncoding.cs
1 /*
2  * Mono.Unix/UnixEncoding.cs
3  *
4  * Authors:
5  *   Jonathan Pryor (jonpryor@vt.edu)
6  *
7  * Copyright (c) 2001, 2002  Southern Storm Software, Pty Ltd
8  * Copyright (C) 2004 Novell, Inc (http://www.novell.com)
9  * Copyright (C) 2005 Jonathan Pryor
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining
12  * a copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice shall be included
19  * in all copies or substantial portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
25  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
26  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27  * OTHER DEALINGS IN THE SOFTWARE.
28  */
29
30 namespace Mono.Unix
31 {
32
33 using System;
34 using System.Text;
35
36 [Serializable]
37 public class UnixEncoding : Encoding
38 {
39         public static readonly Encoding Instance = new UnixEncoding ();
40
41         public static readonly char EscapeByte = '\u0000';
42
43         // Constructors.
44         public UnixEncoding ()
45         {
46         }
47
48         // Internal version of "GetByteCount" which can handle a rolling
49         // state between multiple calls to this method.
50         private static int InternalGetByteCount (char[] chars, int index, int count, uint leftOver, bool flush)
51         {
52                 // Validate the parameters.
53                 if (chars == null) {
54                         throw new ArgumentNullException ("chars");
55                 }
56                 if (index < 0 || index > chars.Length) {
57                         throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
58                 }
59                 if (count < 0 || count > (chars.Length - index)) {
60                         throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
61                 }
62
63                 // Determine the lengths of all characters.
64                 char ch;
65                 int length = 0;
66                 uint pair = leftOver;
67                 while (count > 0) {
68                         ch = chars[index];
69                         if (pair == 0) {
70                                 if (ch == EscapeByte && count > 1) {
71                                         ++length;
72                                         ++index;
73                                         --count;
74                                 } else if (ch < '\u0080') {
75                                         ++length;
76                                 } else if (ch < '\u0800') {
77                                         length += 2;
78                                 } else if (ch >= '\uD800' && ch <= '\uDBFF') {
79                                         // This is the start of a surrogate pair.
80                                         pair = (uint)ch;
81                                 } else {
82                                         length += 3;
83                                 }
84                         } else if (ch >= '\uDC00' && ch <= '\uDFFF') {
85                                 // We have a surrogate pair.
86                                 length += 4;
87                                 pair = 0;
88                         } else {
89                                 // We have a surrogate start followed by a
90                                 // regular character.  Technically, this is
91                                 // invalid, but we have to do something.
92                                 // We write out the surrogate start and then
93                                 // re-visit the current character again.
94                                 length += 3;
95                                 pair = 0;
96                                 continue;
97                         }
98                         ++index;
99                         --count;
100                 }
101                 if (flush && pair != 0) {
102                         // Flush the left-over surrogate pair start.
103                         length += 3;
104                 }
105
106                 // Return the final length to the caller.
107                 return length;
108         }
109
110         // Get the number of bytes needed to encode a character buffer.
111         public override int GetByteCount (char[] chars, int index, int count)
112         {
113                 return InternalGetByteCount (chars, index, count, 0, true);
114         }
115
116         // Convenience wrappers for "GetByteCount".
117         public override int GetByteCount (String s)
118         {
119                 // Validate the parameters.
120                 if (s == null) {
121                         throw new ArgumentNullException ("s");
122                 }
123
124                 // Determine the lengths of all characters.
125                 char ch;
126                 int index = 0;
127                 int count = s.Length;
128                 int length = 0;
129                 uint pair;
130                 while (count > 0) {
131                         ch = s[index++];
132                         if (ch == EscapeByte && count > 1) {
133                                 ++length;
134                                 ++index;
135                                 --count;
136                         } else if (ch < '\u0080') {
137                                 ++length;
138                         } else if (ch < '\u0800') {
139                                 length += 2;
140                         } else if (ch >= '\uD800' && ch <= '\uDBFF' && count > 1) {
141                                 // This may be the start of a surrogate pair.
142                                 pair = (uint)(s[index]);
143                                 if (pair >= (uint)0xDC00 && pair <= (uint)0xDFFF) {
144                                         length += 4;
145                                         ++index;
146                                         --count;
147                                 } else {
148                                         length += 3;
149                                 }
150                         } else {
151                                 length += 3;
152                         }
153                         --count;
154                 }
155
156                 // Return the final length to the caller.
157                 return length;
158         }
159
160         // Internal version of "GetBytes" which can handle a rolling
161         // state between multiple calls to this method.
162         private static int InternalGetBytes (char[] chars, int charIndex,
163                                              int charCount, byte[] bytes,
164                                              int byteIndex, ref uint leftOver,
165                                              bool flush)
166         {
167                 // Validate the parameters.
168                 if (chars == null) {
169                         throw new ArgumentNullException ("chars");
170                 }
171                 if (bytes == null) {
172                         throw new ArgumentNullException ("bytes");
173                 }
174                 if (charIndex < 0 || charIndex > chars.Length) {
175                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
176                 }
177                 if (charCount < 0 || charCount > (chars.Length - charIndex)) {
178                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
179                 }
180                 if (byteIndex < 0 || byteIndex > bytes.Length) {
181                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
182                 }
183
184                 // Convert the characters into bytes.
185                 char ch;
186                 int length = bytes.Length;
187                 uint pair;
188                 uint left = leftOver;
189                 int posn = byteIndex;
190                 while (charCount > 0) {
191                         // Fetch the next UTF-16 character pair value.
192                         ch = chars[charIndex++];
193                         --charCount;
194                         if (left == 0) {
195                                 if (ch >= '\uD800' && ch <= '\uDBFF') {
196                                         // This is the start of a surrogate pair.
197                                         left = (uint)ch;
198                                         continue;
199                                 } else if (ch == EscapeByte) {
200                                         if (posn >= length) {
201                                                 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
202                                         }
203                                         if (--charCount >= 0) {
204                                                 bytes[posn++] = (byte) chars [charIndex++];
205                                         }
206                                         continue;
207                                 } else {
208                                         // This is a regular character.
209                                         pair = (uint)ch;
210                                 }
211                         } else if (ch >= '\uDC00' && ch <= '\uDFFF') {
212                                 // We have a surrogate pair.
213                                 pair = ((left - (uint)0xD800) << 10) +
214                                            (((uint)ch) - (uint)0xDC00) +
215                                            (uint)0x10000;
216                                 left = 0;
217                         } else {
218                                 // We have a surrogate start followed by a
219                                 // regular character.  Technically, this is
220                                 // invalid, but we have to do something.
221                                 // We write out the surrogate start and then
222                                 // re-visit the current character again.
223                                 pair = (uint)left;
224                                 left = 0;
225                                 --charIndex;
226                                 ++charCount;
227                         }
228
229                         // Encode the character pair value.
230                         if (pair < (uint)0x0080) {
231                                 if (posn >= length) {
232                                         throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
233                                 }
234                                 bytes[posn++] = (byte)pair;
235                         } else if (pair < (uint)0x0800) {
236                                 if ((posn + 2) > length) {
237                                         throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
238                                 }
239                                 bytes[posn++] = (byte)(0xC0 | (pair >> 6));
240                                 bytes[posn++] = (byte)(0x80 | (pair & 0x3F));
241                         } else if (pair < (uint)0x10000) {
242                                 if ((posn + 3) > length) {
243                                         throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
244                                 }
245                                 bytes[posn++] = (byte)(0xE0 | (pair >> 12));
246                                 bytes[posn++] = (byte)(0x80 | ((pair >> 6) & 0x3F));
247                                 bytes[posn++] = (byte)(0x80 | (pair & 0x3F));
248                         } else {
249                                 if ((posn + 4) > length) {
250                                         throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
251                                 }
252                                 bytes[posn++] = (byte)(0xF0 | (pair >> 18));
253                                 bytes[posn++] = (byte)(0x80 | ((pair >> 12) & 0x3F));
254                                 bytes[posn++] = (byte)(0x80 | ((pair >> 6) & 0x3F));
255                                 bytes[posn++] = (byte)(0x80 | (pair & 0x3F));
256                         }
257                 }
258                 if (flush && left != 0) {
259                         // Flush the left-over surrogate pair start.
260                         if ((posn + 3) > length) {
261                                 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
262                         }
263                         bytes[posn++] = (byte)(0xE0 | (left >> 12));
264                         bytes[posn++] = (byte)(0x80 | ((left >> 6) & 0x3F));
265                         bytes[posn++] = (byte)(0x80 | (left & 0x3F));
266                         left = 0;
267                 }
268                 leftOver = left;
269
270                 // Return the final count to the caller.
271                 return posn - byteIndex;
272         }
273
274         // Get the bytes that result from encoding a character buffer.
275         public override int GetBytes (char[] chars, int charIndex, int charCount,
276                                                                  byte[] bytes, int byteIndex)
277         {
278                 uint leftOver = 0;
279                 return InternalGetBytes (chars, charIndex, charCount, bytes, byteIndex, ref leftOver, true);
280         }
281
282         // Convenience wrappers for "GetBytes".
283         public override int GetBytes (String s, int charIndex, int charCount,
284                                                                  byte[] bytes, int byteIndex)
285         {
286                 // Validate the parameters.
287                 if (s == null) {
288                         throw new ArgumentNullException ("s");
289                 }
290                 if (bytes == null) {
291                         throw new ArgumentNullException ("bytes");
292                 }
293                 if (charIndex < 0 || charIndex > s.Length) {
294                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
295                 }
296                 if (charCount < 0 || charCount > (s.Length - charIndex)) {
297                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
298                 }
299                 if (byteIndex < 0 || byteIndex > bytes.Length) {
300                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
301                 }
302
303                 unsafe {
304                         fixed (char* p = s) {
305                                 fixed (byte* b = bytes) {
306                                         return GetBytes (p + charIndex, charCount, b + byteIndex, bytes.Length - byteIndex);
307                                 }
308                         }
309                 }
310         }
311
312         public unsafe override int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
313         {
314                 if (bytes == null || chars == null)
315                         throw new ArgumentNullException (bytes == null ? "bytes" : "chars");
316
317                 if (charCount < 0 || byteCount < 0)
318                         throw new ArgumentOutOfRangeException (charCount < 0 ? "charCount" : "byteCount");
319                         
320                 // Convert the characters into bytes.
321                 char ch;
322                 int length = byteCount;
323                 uint pair;
324                 int posn = 0;
325                 int charIndex = 0;
326                 while (charCount > 0) {
327                         // Fetch the next UTF-16 character pair value.
328                         ch = chars [charIndex++];
329                         if (ch >= '\uD800' && ch <= '\uDBFF' && charCount > 1) {
330                                 // This may be the start of a surrogate pair.
331                                 pair = (uint)(chars[charIndex]);
332                                 if (pair >= (uint)0xDC00 && pair <= (uint)0xDFFF) {
333                                         pair = (pair - (uint)0xDC00) +
334                                                    ((((uint)ch) - (uint)0xD800) << 10) +
335                                                    (uint)0x10000;
336                                         ++charIndex;
337                                         --charCount;
338                                 } else {
339                                         pair = (uint)ch;
340                                 }
341                         } else if (ch == EscapeByte && charCount > 1) {
342                                 if (posn >= length) {
343                                         throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
344                                 }
345                                 charCount -= 2;
346                                 if (charCount >= 0) {
347                                         bytes[posn++] = (byte)chars [charIndex++];
348                                 }
349                                 continue;
350                         } else {
351                                 pair = (uint)ch;
352                         }
353                         --charCount;
354
355                         // Encode the character pair value.
356                         if (pair < (uint)0x0080) {
357                                 if (posn >= length) {
358                                         throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
359                                 }
360                                 bytes[posn++] = (byte)pair;
361                         } else if (pair < (uint)0x0800) {
362                                 if ((posn + 2) > length) {
363                                         throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
364                                 }
365                                 bytes[posn++] = (byte)(0xC0 | (pair >> 6));
366                                 bytes[posn++] = (byte)(0x80 | (pair & 0x3F));
367                         } else if (pair < (uint)0x10000) {
368                                 if ((posn + 3) > length) {
369                                         throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
370                                 }
371                                 bytes[posn++] = (byte)(0xE0 | (pair >> 12));
372                                 bytes[posn++] = (byte)(0x80 | ((pair >> 6) & 0x3F));
373                                 bytes[posn++] = (byte)(0x80 | (pair & 0x3F));
374                         } else {
375                                 if ((posn + 4) > length) {
376                                         throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
377                                 }
378                                 bytes[posn++] = (byte)(0xF0 | (pair >> 18));
379                                 bytes[posn++] = (byte)(0x80 | ((pair >> 12) & 0x3F));
380                                 bytes[posn++] = (byte)(0x80 | ((pair >> 6) & 0x3F));
381                                 bytes[posn++] = (byte)(0x80 | (pair & 0x3F));
382                         }
383                 }
384
385                 // Return the final count to the caller.
386                 return posn;
387         }
388
389         // Internal version of "GetCharCount" which can handle a rolling
390         // state between multiple calls to this method.
391         private static int InternalGetCharCount (byte[] bytes, int index, int count,
392                                                                                    uint leftOverBits,
393                                                                                    uint leftOverCount,
394                                                                                    bool throwOnInvalid, bool flush)
395         {
396                 // Validate the parameters.
397                 if (bytes == null) {
398                         throw new ArgumentNullException ("bytes");
399                 }
400                 if (index < 0 || index > bytes.Length) {
401                         throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
402                 }
403                 if (count < 0 || count > (bytes.Length - index)) {
404                         throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
405                 }
406
407                 // Determine the number of characters that we have.
408                 int next_raw = 0;
409                 uint ch;
410                 int length = 0;
411                 uint leftBits = leftOverBits;
412                 uint leftSoFar = (leftOverCount & (uint)0x0F);
413                 uint leftSize = ((leftOverCount >> 4) & (uint)0x0F);
414                 while (count > 0) {
415                         ch = (uint)(bytes [index++]);
416                         ++next_raw;
417                         --count;
418                         if (leftSize == 0) {
419                                 // Process a UTF-8 start character.
420                                 if (ch < (uint)0x0080) {
421                                         // Single-byte UTF-8 character.
422                                         ++length;
423                                         next_raw = 0;
424                                 } else if ((ch & (uint)0xE0) == (uint)0xC0) {
425                                         // Double-byte UTF-8 character.
426                                         leftBits = (ch & (uint)0x1F);
427                                         leftSoFar = 1;
428                                         leftSize = 2;
429                                 } else if ((ch & (uint)0xF0) == (uint)0xE0) {
430                                         // Three-byte UTF-8 character.
431                                         leftBits = (ch & (uint)0x0F);
432                                         leftSoFar = 1;
433                                         leftSize = 3;
434                                 } else if ((ch & (uint)0xF8) == (uint)0xF0) {
435                                         // Four-byte UTF-8 character.
436                                         leftBits = (ch & (uint)0x07);
437                                         leftSoFar = 1;
438                                         leftSize = 4;
439                                 } else if ((ch & (uint)0xFC) == (uint)0xF8) {
440                                         // Five-byte UTF-8 character.
441                                         leftBits = (ch & (uint)0x03);
442                                         leftSoFar = 1;
443                                         leftSize = 5;
444                                 } else if ((ch & (uint)0xFE) == (uint)0xFC) {
445                                         // Six-byte UTF-8 character.
446                                         leftBits = (ch & (uint)0x03);
447                                         leftSoFar = 1;
448                                         leftSize = 6;
449                                 } else {
450                                         // Invalid UTF-8 start character.
451                                         if (throwOnInvalid) {
452                                                 // throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
453                                         }
454                                         length += next_raw*2;
455                                         next_raw = 0;
456                                 }
457                         } else {
458                                 // Process an extra byte in a multi-byte sequence.
459                                 if ((ch & (uint)0xC0) == (uint)0x80) {
460                                         leftBits = ((leftBits << 6) | (ch & (uint)0x3F));
461                                         if (++leftSoFar >= leftSize) {
462                                                 // We have a complete character now.
463                                                 if (leftBits < (uint)0x10000) {
464                                                         // is it an overlong ?
465                                                         bool overlong = false;
466                                                         switch (leftSize) {
467                                                         case 2:
468                                                                 overlong = (leftBits <= 0x7F);
469                                                                 break;
470                                                         case 3:
471                                                                 overlong = (leftBits <= 0x07FF);
472                                                                 break;
473                                                         case 4:
474                                                                 overlong = (leftBits <= 0xFFFF);
475                                                                 break;
476                                                         case 5:
477                                                                 overlong = (leftBits <= 0x1FFFFF);
478                                                                 break;
479                                                         case 6:
480                                                                 overlong = (leftBits <= 0x03FFFFFF);
481                                                                 break;
482                                                         }
483                                                         if (overlong) {
484                                                                 // if (throwOnInvalid)
485                                                                 //      throw new ArgumentException (_("Overlong"), leftBits.ToString ());
486                                                                 length += next_raw*2;
487                                                         }
488                                                         else
489                                                                 ++length;
490                                                 } else if (leftBits < (uint)0x110000) {
491                                                         length += 2;
492                                                 } else if (throwOnInvalid) {
493                                                         // ???
494                                                         // throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
495                                                         length += next_raw*2;
496                                                 }
497                                                 leftSize = 0;
498                                                 next_raw = 0;
499                                         }
500                                 } else {
501                                         // Invalid UTF-8 sequence: clear and restart.
502                                         if (throwOnInvalid) {
503                                                 // throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
504                                         }
505                                         // don't escape the current byte, process it normally
506                                         if (ch < (uint)0x0080) {
507                                                 --index;
508                                                 ++count;
509                                                 --next_raw;
510                                         }
511                                         length += next_raw*2;
512                                         leftSize = 0;
513                                         next_raw = 0;
514                                 }
515                         }
516                 }
517                 if (flush && leftSize != 0 && throwOnInvalid) {
518                         // We had left-over bytes that didn't make up
519                         // a complete UTF-8 character sequence.
520                         // throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
521                         length += next_raw * 2;
522                 }
523
524                 // Return the final length to the caller.
525                 return length;
526         }
527
528         // Get the number of characters needed to decode a byte buffer.
529         public override int GetCharCount (byte[] bytes, int index, int count)
530         {
531                 return InternalGetCharCount (bytes, index, count, 0, 0, true, true);
532         }
533
534         // Get the characters that result from decoding a byte buffer.
535         private static int InternalGetChars (byte[] bytes, int byteIndex,
536                                                                            int byteCount, char[] chars,
537                                                                            int charIndex, ref uint leftOverBits,
538                                                                            ref uint leftOverCount,
539                                                                            bool throwOnInvalid, bool flush)
540         {
541                 // Validate the parameters.
542                 if (bytes == null) {
543                         throw new ArgumentNullException ("bytes");
544                 }
545                 if (chars == null) {
546                         throw new ArgumentNullException ("chars");
547                 }
548                 if (byteIndex < 0 || byteIndex > bytes.Length) {
549                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
550                 }
551                 if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
552                         throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
553                 }
554                 if (charIndex < 0 || charIndex > chars.Length) {
555                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
556                 }
557
558                 if (charIndex == chars.Length)
559                         return 0;
560
561                 // Convert the bytes into the output buffer.
562                 byte[] raw = new byte[6];
563                 int next_raw = 0;
564                 uint ch;
565                 int length = chars.Length;
566                 int posn = charIndex;
567                 uint leftBits = leftOverBits;
568                 uint leftSoFar = (leftOverCount & (uint)0x0F);
569                 uint leftSize = ((leftOverCount >> 4) & (uint)0x0F);
570                 while (byteCount > 0) {
571                         // Fetch the next character from the byte buffer.
572                         ch = (uint)(bytes[byteIndex++]);
573                         raw [next_raw++] = (byte) ch;
574                         --byteCount;
575                         if (leftSize == 0) {
576                                 // Process a UTF-8 start character.
577                                 if (ch < (uint)0x0080) {
578                                         // Single-byte UTF-8 character.
579                                         if (posn >= length) {
580                                                 throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
581                                         }
582                                         next_raw = 0;
583                                         chars[posn++] = (char)ch;
584                                 } else if ((ch & (uint)0xE0) == (uint)0xC0) {
585                                         // Double-byte UTF-8 character.
586                                         leftBits = (ch & (uint)0x1F);
587                                         leftSoFar = 1;
588                                         leftSize = 2;
589                                 } else if ((ch & (uint)0xF0) == (uint)0xE0) {
590                                         // Three-byte UTF-8 character.
591                                         leftBits = (ch & (uint)0x0F);
592                                         leftSoFar = 1;
593                                         leftSize = 3;
594                                 } else if ((ch & (uint)0xF8) == (uint)0xF0) {
595                                         // Four-byte UTF-8 character.
596                                         leftBits = (ch & (uint)0x07);
597                                         leftSoFar = 1;
598                                         leftSize = 4;
599                                 } else if ((ch & (uint)0xFC) == (uint)0xF8) {
600                                         // Five-byte UTF-8 character.
601                                         leftBits = (ch & (uint)0x03);
602                                         leftSoFar = 1;
603                                         leftSize = 5;
604                                 } else if ((ch & (uint)0xFE) == (uint)0xFC) {
605                                         // Six-byte UTF-8 character.
606                                         leftBits = (ch & (uint)0x03);
607                                         leftSoFar = 1;
608                                         leftSize = 6;
609                                 } else {
610                                         // Invalid UTF-8 start character.
611                                         if (throwOnInvalid) {
612                                                 // throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
613                                         }
614                                         next_raw = 0;
615                                         chars[posn++] = EscapeByte;
616                                         chars[posn++] = (char) ch;
617                                 }
618                         } else {
619                                 // Process an extra byte in a multi-byte sequence.
620                                 if ((ch & (uint)0xC0) == (uint)0x80) {
621                                         leftBits = ((leftBits << 6) | (ch & (uint)0x3F));
622                                         if (++leftSoFar >= leftSize) {
623                                                 // We have a complete character now.
624                                                 if (leftBits < (uint)0x10000) {
625                                                         // is it an overlong ?
626                                                         bool overlong = false;
627                                                         switch (leftSize) {
628                                                         case 2:
629                                                                 overlong = (leftBits <= 0x7F);
630                                                                 break;
631                                                         case 3:
632                                                                 overlong = (leftBits <= 0x07FF);
633                                                                 break;
634                                                         case 4:
635                                                                 overlong = (leftBits <= 0xFFFF);
636                                                                 break;
637                                                         case 5:
638                                                                 overlong = (leftBits <= 0x1FFFFF);
639                                                                 break;
640                                                         case 6:
641                                                                 overlong = (leftBits <= 0x03FFFFFF);
642                                                                 break;
643                                                         }
644                                                         if (overlong) {
645                                                                 // if (throwOnInvalid)
646                                                                 //      throw new ArgumentException (_("Overlong"), leftBits.ToString ());
647                                                                 CopyRaw (raw, ref next_raw, chars, ref posn, length);
648                                                         }
649                                                         else {
650                                                                 if (posn >= length) {
651                                                                         throw new ArgumentException
652                                                                                 (_("Arg_InsufficientSpace"), "chars");
653                                                                 }
654                                                                 chars[posn++] = (char)leftBits;
655                                                         }
656                                                 } else if (leftBits < (uint)0x110000) {
657                                                         if ((posn + 2) > length) {
658                                                                 throw new ArgumentException
659                                                                         (_("Arg_InsufficientSpace"), "chars");
660                                                         }
661                                                         leftBits -= (uint)0x10000;
662                                                         chars[posn++] = (char)((leftBits >> 10) +
663                                                                                                    (uint)0xD800);
664                                                         chars[posn++] =
665                                                                 (char)((leftBits & (uint)0x3FF) + (uint)0xDC00);
666                                                 } else if (throwOnInvalid) {
667                                                         // ???
668                                                         // throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
669                                                         CopyRaw (raw, ref next_raw, chars, ref posn, length);
670                                                 }
671                                                 leftSize = 0;
672                                                 next_raw = 0;
673                                         }
674                                 } else {
675                                         // Invalid UTF-8 sequence: clear and restart.
676                                         if (throwOnInvalid) {
677                                                 // throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
678                                         }
679                                         // don't escape the current byte, process it normally
680                                         if (ch < (uint)0x0080) {
681                                                 --byteIndex;
682                                                 ++byteCount;
683                                                 --next_raw;
684                                         }
685                                         CopyRaw (raw, ref next_raw, chars, ref posn, length);
686                                         leftSize = 0;
687                                         next_raw = 0;
688                                 }
689                         }
690                 }
691                 if (flush && leftSize != 0 && throwOnInvalid) {
692                         // We had left-over bytes that didn't make up
693                         // a complete UTF-8 character sequence.
694                         // throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
695                         CopyRaw (raw, ref next_raw, chars, ref posn, length);
696                 }
697                 leftOverBits = leftBits;
698                 leftOverCount = (leftSoFar | (leftSize << 4));
699
700                 // Return the final length to the caller.
701                 return posn - charIndex;
702         }
703
704         private static void CopyRaw (byte[] raw, ref int next_raw, char[] chars, ref int posn, int length)
705         {
706                 if (posn+(next_raw*2) > length)
707                         throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
708
709                 for (int i = 0; i < next_raw; ++i) {
710                         chars[posn++] = EscapeByte;
711                         chars[posn++] = (char) raw [i];
712                 }
713
714                 next_raw = 0;
715         }
716
717         // Get the characters that result from decoding a byte buffer.
718         public override int GetChars (byte[] bytes, int byteIndex, int byteCount,
719                                                                  char[] chars, int charIndex)
720         {
721                 uint leftOverBits = 0;
722                 uint leftOverCount = 0;
723                 return InternalGetChars (bytes, byteIndex, byteCount, chars, 
724                                 charIndex, ref leftOverBits, ref leftOverCount, true, true);
725         }
726
727         // Get the maximum number of bytes needed to encode a
728         // specified number of characters.
729         public override int GetMaxByteCount (int charCount)
730         {
731                 if (charCount < 0) {
732                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
733                 }
734                 return charCount * 4;
735         }
736
737         // Get the maximum number of characters needed to decode a
738         // specified number of bytes.
739         public override int GetMaxCharCount (int byteCount)
740         {
741                 if (byteCount < 0) {
742                         throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_NonNegative"));
743                 }
744                 return byteCount;
745         }
746
747         // Get a Unix-specific decoder that is attached to this instance.
748         public override Decoder GetDecoder ()
749         {
750                 return new UnixDecoder ();
751         }
752
753         // Get a Unix-specific encoder that is attached to this instance.
754         public override Encoder GetEncoder ()
755         {
756                 return new UnixEncoder ();
757         }
758
759         // Get the Unix preamble.
760         public override byte[] GetPreamble ()
761         {
762                 return new byte [0];
763         }
764
765         // Determine if this object is equal to another.
766         public override bool Equals (Object value)
767         {
768                 UnixEncoding enc = (value as UnixEncoding);
769                 if (enc != null) {
770                         return true;
771                 }
772                 else {
773                         return false;
774                 }
775         }
776
777         // Get the hash code for this object.
778         public override int GetHashCode ()
779         {
780                 return base.GetHashCode ();
781         }
782         
783         public override byte [] GetBytes (String s)
784         {
785                 if (s == null)
786                         throw new ArgumentNullException ("s");
787                 
788                 int length = GetByteCount (s);
789                 byte [] bytes = new byte [length];
790                 GetBytes (s, 0, s.Length, bytes, 0);
791                 return bytes;
792         }
793
794         // Unix decoder implementation.
795         [Serializable]
796         private class UnixDecoder : Decoder
797         {
798                 private uint leftOverBits;
799                 private uint leftOverCount;
800
801                 // Constructor.
802                 public UnixDecoder ()
803                 {
804                         leftOverBits = 0;
805                         leftOverCount = 0;
806                 }
807
808                 // Override inherited methods.
809                 public override int GetCharCount (byte[] bytes, int index, int count)
810                 {
811                         return InternalGetCharCount (bytes, index, count,
812                                         leftOverBits, leftOverCount, true, false);
813                 }
814                 public override int GetChars (byte[] bytes, int byteIndex,
815                                                  int byteCount, char[] chars, int charIndex)
816                 {
817                         return InternalGetChars (bytes, byteIndex, byteCount,
818                                 chars, charIndex, ref leftOverBits, ref leftOverCount, true, false);
819                 }
820
821         }
822
823         // Unix encoder implementation.
824         [Serializable]
825         private class UnixEncoder : Encoder
826         {
827                 private uint leftOver;
828
829                 // Constructor.
830                 public UnixEncoder ()
831                 {
832                         leftOver = 0;
833                 }
834
835                 // Override inherited methods.
836                 public override int GetByteCount (char[] chars, int index,
837                                          int count, bool flush)
838                 {
839                         return InternalGetByteCount (chars, index, count, leftOver, flush);
840                 }
841                 public override int GetBytes (char[] chars, int charIndex,
842                                          int charCount, byte[] bytes, int byteCount, bool flush)
843                 {
844                         int result;
845                         result = InternalGetBytes (chars, charIndex, charCount, bytes, byteCount, ref leftOver, flush);
846                         return result;
847                 }
848         }
849
850         private static string _ (string arg)
851         {
852                 return arg;
853         }
854 }
855 }
856