Better and more homogeneous error handling.
[mono.git] / mcs / class / corlib / System.Text / UTF8Encoding.cs
1 /*
2  * UTF8Encoding.cs - Implementation of the "System.Text.UTF8Encoding" class.
3  *
4  * Copyright (c) 2001, 2002  Southern Storm Software, Pty Ltd
5  * Copyright (C) 2004 Novell, Inc (http://www.novell.com)
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining
8  * a copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included
15  * in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23  * OTHER DEALINGS IN THE SOFTWARE.
24  */
25
26 namespace System.Text
27 {
28
29 using System;
30 using System.Runtime.InteropServices;
31
32 [Serializable]
33 [MonoLimitation ("Serialization format not compatible with .NET")]
34 [ComVisible (true)]
35 public class UTF8Encoding : Encoding
36 {
37         // Magic number used by Windows for UTF-8.
38         internal const int UTF8_CODE_PAGE = 65001;
39
40         // Internal state.
41         private bool emitIdentifier;
42
43         // Constructors.
44         public UTF8Encoding () : this (false, false) {}
45         public UTF8Encoding (bool encoderShouldEmitUTF8Identifier)
46                         : this (encoderShouldEmitUTF8Identifier, false) {}
47         
48         public UTF8Encoding (bool encoderShouldEmitUTF8Identifier, bool throwOnInvalidBytes)
49                 : base (UTF8_CODE_PAGE)
50         {
51                 emitIdentifier = encoderShouldEmitUTF8Identifier;
52                 if (throwOnInvalidBytes)
53                         SetFallbackInternal (EncoderFallback.ExceptionFallback, DecoderFallback.ExceptionFallback);
54                 else
55                         SetFallbackInternal (EncoderFallback.StandardSafeFallback, DecoderFallback.StandardSafeFallback);
56
57                 web_name = body_name = header_name = "utf-8";
58                 encoding_name = "Unicode (UTF-8)";
59                 is_browser_save = true;
60                 is_browser_display = true;
61                 is_mail_news_display = true;
62                 is_mail_news_save = true;
63                 windows_code_page = UnicodeEncoding.UNICODE_CODE_PAGE;
64         }
65
66         ///////////////////////////////////////////////////////////////////////
67         // INTERNAL DECODING FUNCTION (UTF8 -> CHAR/UTF16)
68         ///////////////////////////////////////////////////////////////////////
69
70         internal enum DecoderStatus {
71                 Ok,
72                 InsufficientSpace,
73                 InvalidChar,
74                 InvalidSequence,
75                 InvalidStart,
76                 InputRunOut,
77                 SurrogateFound,
78                 Overlong,
79         };
80
81         // following method decodes an utf8 character from a byte buffer.
82         // NOTE: If 'charCount' is < 0, this function only counts bytes and
83         //       chars without writing anything.
84         // NOTE: BOM (0xEF 0xBB 0xBF) is not yet supported.
85         //       See http://www.cl.cam.ac.uk/~mgk25/unicode.html
86         private unsafe static DecoderStatus InternalGetChar (
87                 byte* bytes, int byteCount,
88                 char* chars, int charCount,
89                 out int bytesProcessed, out int charsProcessed,
90                 ref uint leftBytes, ref uint leftBits, ref uint procBytes)
91         {
92                 uint ch;
93                 bool checkByte;
94
95                 // reset counters
96                 bytesProcessed = 0;
97                 charsProcessed = 0;
98
99                 // Fetch the start character from the byte buffer.
100                 if (leftBytes == 0) {
101                         if (byteCount == 0)
102                                 return DecoderStatus.InputRunOut;
103                         ch = (uint) (*bytes++);
104                         bytesProcessed++;
105                         byteCount--;
106                         procBytes = ch;
107                         if (ch < (uint) 0x0080) {
108                                 // Single-byte UTF-8 character.
109                                 leftBits = ch;
110                                 leftBytes = 0;
111                         } else if (ch == (uint) 0xc0 || ch == (uint) 0xc1) {
112                                 // invalid start
113                                 return DecoderStatus.InvalidChar;
114                         } else if ((ch & (uint) 0xE0) == (uint) 0xC0) {
115                                 // Double-byte UTF-8 character.
116                                 leftBits = ((ch & (uint) 0x1F) << 6*1);
117                                 leftBytes = 1;
118                         } else if ((ch & (uint) 0xF0) == (uint) 0xE0) {
119                                 // Three-byte UTF-8 character.
120                                 leftBits = ((ch & (uint) 0x0F) << 6*2);
121                                 leftBytes = 2;
122                         } else if ((ch & (uint) 0xF8) == (uint) 0xF0) {
123                                 // Four-byte UTF-8 character.
124                                 leftBits = ((ch & (uint) 0x07) << 6*3);
125                                 leftBytes = 3;
126                                 // extra check for detecting as soon as
127                                 // possible too big four-byte utf chars
128                                 if (leftBits >= (uint) 0x110000)
129                                         return DecoderStatus.InvalidChar;
130                         } else {
131                                 // Invalid five-or-six-byte or start char
132                                 // NOTE: I keep here the code for 5/6 bytes if
133                                 // needed, but technically these combinations
134                                 // are invalid in UTF-8 sequences.
135                                 //   (ch & (uint) 0xFC) == (uint) 0xF8 =>
136                                 //              leftBits = ch & (uint) 0x03;
137                                 //              leftBytes = 4;
138                                 //   (ch & (uint) 0xFE) == (uint) 0xFC =>
139                                 //              leftBits = ch & (uint) 0x01;
140                                 //              leftBytes = 5;
141                                 leftBits = leftBytes = 0;
142                                 return DecoderStatus.InvalidStart;
143                         }
144                         checkByte = (leftBytes > 0 && leftBits == 0);
145                 } else {
146                         // restore state
147                         checkByte = (leftBytes >> 4) != 0;
148                         leftBytes &= (uint) 0x0f;
149                 }
150
151                 // process the required bytes...
152                 for (; leftBytes > 0; leftBytes--) {
153                         if (byteCount == 0) {
154                                 leftBytes = ((uint) (checkByte ? 0x10 : 0x00)) | leftBytes;
155                                 return DecoderStatus.InputRunOut;
156                         }
157                         ch = (uint) (*bytes++);
158                         if ((ch & (uint) 0xC0) != (uint) 0x80) {
159                                 // Invalid UTF-8 sequence: clear and restart.
160                                 // NOTE: we return before counting the
161                                 //       processed bytes for restarting
162                                 //       decoding later at this point
163                                 return DecoderStatus.InvalidSequence;
164                         }
165                         bytesProcessed++;
166                         byteCount--;
167                         procBytes = (procBytes << 8) | ch;
168                         if (checkByte && ((~((uint) 0x1f >> (int) leftBytes - 2)) & ch) == 0x80) {
169                                 // detected an overlong sequence :(
170                                 return DecoderStatus.Overlong;
171                         }
172                         checkByte = false;
173                         leftBits = leftBits | ((ch & (uint) 0x3F) << (6*(int) (leftBytes - 1)));
174                         if (leftBits >= (uint) 0x110000) {
175                                 // this UTF-8 is too big ...
176                                 return DecoderStatus.InvalidChar;
177                         }
178                         if ((leftBits & 0xF800) == 0xD800) {
179                                 // UTF-8 doesn't use surrogate characters
180                                 return DecoderStatus.SurrogateFound;
181                         }
182                 }
183
184                 // convert this character to UTF-16
185                 if (leftBits < (uint) 0x10000) {
186                         if (charCount >= 0) {
187                                 if (charCount < 1)
188                                         return DecoderStatus.InsufficientSpace;
189                                 *chars = (char) leftBits;
190                         }
191                         charsProcessed++;
192                 } else  {
193                         if (charCount >= 0) {
194                                 if (charCount < 2)
195                                         return DecoderStatus.InsufficientSpace;
196                                 leftBits -= (uint) 0x10000;
197                                 *chars++ = (char) ((leftBits >> 10) + (uint) 0xD800);
198                                 *chars++ = (char) ((leftBits & (uint) 0x3FF) + (uint) 0xDC00);
199                         }
200                         charsProcessed += 2;
201                 }
202
203                 // we've read a complete char... reset decoder status and finish
204                 leftBytes = leftBits = procBytes = 0;
205                 return DecoderStatus.Ok;
206         }
207
208         // This function is called when we want to flush the decoder state
209         // (i.e. in case of invalid UTF-8 characters or interrupted sequences)
210         internal unsafe static DecoderStatus InternalGetCharsFlush (
211                 char* chars, int charCount,
212                 DecoderFallbackBuffer fallbackBuffer,
213                 DecoderStatus s,
214                 int bytesProcessed, ref int charsProcessed,
215                 ref uint leftBytes, ref uint leftBits, ref uint procBytes)
216         {
217                 // if there is nothing to flush, then exit silently
218                 if(procBytes == 0)
219                         return DecoderStatus.Ok;
220                 // now we build a 'bytesUnknown' array with the
221                 // stored bytes in 'procBytes'.
222                 int extra = 0;
223                 for (uint t = procBytes; t != 0; extra++)
224                         t = t >> 8;
225                 byte [] bytesUnknown = new byte [extra];
226                 for (int i = extra; i > 0; i--)
227                         bytesUnknown [i - 1] = (byte) ((procBytes >> (8 * (extra - i))) & 0xff);
228                 // partial reset: this condition avoids infinite loops
229                 if (s == DecoderStatus.InvalidSequence)
230                         leftBytes = 0;
231                 // call the fallback and cross fingers
232                 fallbackBuffer.Fallback (bytesUnknown, bytesProcessed - extra);
233                 if (chars != null) {
234                         while (fallbackBuffer.Remaining > 0) {
235                                 if (charsProcessed >= charCount)
236                                         return DecoderStatus.InsufficientSpace;
237                                 chars [charsProcessed++] = fallbackBuffer.GetNextChar ();
238                         }
239                 } else
240                         charsProcessed += fallbackBuffer.Remaining;
241                 fallbackBuffer.Reset ();
242
243                 // recovery was succesful, flush decoder state
244                 leftBits = leftBytes = procBytes = 0;
245
246                 return DecoderStatus.Ok;
247         }
248
249         // InternalGetChars processor. Can decode or count space needed for
250         // decoding, depending on the enabled mode:
251         //   - decoder
252         //       enabled when charCount >= 0 (but chars may be null)
253         //   - counter
254         //       enabled when chars == null && charCount < 0
255         internal unsafe static DecoderStatus InternalGetChars (
256                 byte* bytes, int byteCount,
257                 char* chars, int charCount,
258                 DecoderFallbackBuffer fallbackBuffer,
259                 out int bytesProcessed, out int charsProcessed,
260                 ref uint leftBytes, ref uint leftBits, ref uint procBytes,
261                 bool flush)
262         {
263                 DecoderStatus s;
264                 int t_bytesProcessed, t_charsProcessed;
265
266                 // Validate parameters
267                 if (byteCount < 0)
268                         throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_NonNegative"));
269                 else
270                         if (byteCount > 0 && bytes == null)
271                                 throw new ArgumentNullException ("bytes");
272                 if (chars == null) {
273                         if (charCount > 0)
274                                 throw new ArgumentNullException ("chars");
275                 } else {
276                         if (charCount < 0)
277                                 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
278                 }
279
280                 // reset counters
281                 charsProcessed = 0;
282                 bytesProcessed = 0;
283
284                 // byte processing loop
285                 while (byteCount - bytesProcessed > 0) {
286                         // fetch a char from the input byte array
287                         s = chars != null
288                                 ? InternalGetChar (
289                                         bytes + bytesProcessed, byteCount - bytesProcessed,
290                                         chars + charsProcessed, charCount - charsProcessed,
291                                         out t_bytesProcessed, out t_charsProcessed,
292                                         ref leftBytes, ref leftBits, ref procBytes)
293                                 : InternalGetChar (
294                                         bytes + bytesProcessed, byteCount - bytesProcessed,
295                                         null, charCount,
296                                         out t_bytesProcessed, out t_charsProcessed,
297                                         ref leftBytes, ref leftBits, ref procBytes);
298
299                         // if not enough space return here
300                         // NOTE: maybe we should restore the original encoder
301                         //       state ... we should check what ms do in this case
302                         if(s == DecoderStatus.InsufficientSpace)
303                                 return DecoderStatus.InsufficientSpace;
304
305                         // update counters
306                         charsProcessed += t_charsProcessed;
307                         bytesProcessed += t_bytesProcessed;
308
309                         switch (s) {
310                         case DecoderStatus.Ok:
311                                 break;  // everything OK :D
312
313                         case DecoderStatus.Overlong:
314                         case DecoderStatus.InvalidSequence:
315                         case DecoderStatus.InvalidStart:
316                         case DecoderStatus.InvalidChar:
317                         case DecoderStatus.SurrogateFound:
318                                 s = InternalGetCharsFlush (
319                                         chars, charCount,
320                                         fallbackBuffer,
321                                         s,
322                                         bytesProcessed, ref charsProcessed,
323                                         ref leftBytes, ref leftBits, ref procBytes);
324                                 if (s != DecoderStatus.Ok)
325                                         return s;
326                                 break;
327
328                         case DecoderStatus.InputRunOut:
329                                 return flush
330                                         ? InternalGetCharsFlush (
331                                                 chars, charCount,
332                                                 fallbackBuffer,
333                                                 s,
334                                                 bytesProcessed, ref charsProcessed,
335                                                 ref leftBytes, ref leftBits, ref procBytes)
336                                         : DecoderStatus.InputRunOut;
337                         }
338                 }
339                 return flush
340                         ? InternalGetCharsFlush (
341                                 chars, charCount,
342                                 fallbackBuffer,
343                                 DecoderStatus.Ok,
344                                 bytesProcessed, ref charsProcessed,
345                                 ref leftBytes, ref leftBits, ref procBytes)
346                         : DecoderStatus.Ok;
347         }
348
349         internal unsafe static DecoderStatus InternalGetCharsDecode (
350                 byte* bytes, int byteCount,
351                 char* chars, int charCount,
352                 DecoderFallbackBuffer fallbackBuffer,
353                 out int bytesProcessed, out int charsProcessed,
354                 ref uint leftBytes, ref uint leftBits, ref uint procBytes,
355                 bool flush)
356         {
357                 if (byteCount < 0)
358                         throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
359                 if (charCount < 0)
360                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
361
362                 return InternalGetChars (
363                                 bytes, byteCount,
364                                 chars, charCount,
365                                 fallbackBuffer,
366                                 out bytesProcessed, out charsProcessed,
367                                 ref leftBytes, ref leftBits, ref procBytes,
368                                 flush);
369         }
370
371         internal unsafe static DecoderStatus InternalGetCharsDecode (
372                 byte[] bytes, int byteIndex, int byteCount,
373                 char[] chars, int charIndex,
374                 DecoderFallbackBuffer fallbackBuffer,
375                 out int bytesProcessed, out int charsProcessed,
376                 ref uint leftBytes, ref uint leftBits, ref uint procBytes,
377                 bool flush)
378         {
379                 if (bytes == null)
380                         throw new ArgumentNullException ("bytes");
381                 if (chars == null)
382                         throw new ArgumentNullException ("chars");
383                 if (byteIndex < 0 || byteIndex > bytes.Length)
384                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
385                 if (byteCount < 0 || byteCount > (bytes.Length - byteIndex))
386                         throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
387                 if (charIndex < 0 || charIndex > chars.Length)
388                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
389
390                 fixed (char* cptr = chars) {
391                         fixed (byte* bptr = bytes) {
392                                 return InternalGetChars (
393                                                 bptr + byteIndex, byteCount,
394                                                 cptr + charIndex, chars.Length - charIndex,
395                                                 fallbackBuffer,
396                                                 out bytesProcessed, out charsProcessed,
397                                                 ref leftBytes, ref leftBits, ref procBytes,
398                                                 flush);
399                         }
400                 }
401         }
402
403         internal unsafe static DecoderStatus InternalGetCharsCount (
404                 byte* bytes, int byteCount,
405                 DecoderFallbackBuffer fallbackBuffer,
406                 out int bytesProcessed, out int charsProcessed,
407                 ref uint leftBytes, ref uint leftBits, ref uint procBytes,
408                 bool flush)
409         {
410                 if (byteCount < 0)
411                         throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
412
413                 return InternalGetChars (
414                                 bytes, byteCount,
415                                 null, -1,
416                                 fallbackBuffer,
417                                 out bytesProcessed, out charsProcessed,
418                                 ref leftBytes, ref leftBits, ref procBytes,
419                                 flush);
420         }
421
422         internal unsafe static DecoderStatus InternalGetCharsCount (
423                 byte[] bytes, int byteIndex, int byteCount,
424                 DecoderFallbackBuffer fallbackBuffer,
425                 out int bytesProcessed, out int charsProcessed,
426                 ref uint leftBytes, ref uint leftBits, ref uint procBytes,
427                 bool flush)
428         {
429                 if (bytes == null)
430                         throw new ArgumentNullException ("bytes");
431                 if (byteIndex < 0 || byteIndex > bytes.Length)
432                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
433                 if (byteCount < 0 || byteCount > (bytes.Length - byteIndex))
434                         throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
435
436                 fixed (byte* bptr = bytes) {
437                         return InternalGetChars (
438                                         bptr + byteIndex, byteCount,
439                                         null, -1,
440                                         fallbackBuffer,
441                                         out bytesProcessed, out charsProcessed,
442                                         ref leftBytes, ref leftBits, ref procBytes,
443                                         flush);
444                 }
445         }
446
447         ///////////////////////////////////////////////////////////////////////
448         // INTERNAL ENCODING FUNCTION (CHAR/UTF16 -> UTF8)
449         ///////////////////////////////////////////////////////////////////////
450
451         internal enum EncoderStatus {
452                 Ok,
453                 InputRunOut,
454                 InsufficientSpace,
455                 InvalidChar,
456                 InvalidSurrogate,
457         };
458
459         // following method encodes an utf8 character into a byte buffer.
460         // NOTE: If 'byteCount' is < 0, this function only counts used bytes
461         //       without writing anything.
462         // NOTE: BOM (0xEF 0xBB 0xBF) is not yet supported.
463         //       See http://www.cl.cam.ac.uk/~mgk25/unicode.html
464         private unsafe static EncoderStatus InternalGetByte (
465                 char* chars, int charCount,
466                 byte* bytes, int byteCount,
467                 out int charsProcessed, out int bytesProcessed, ref uint leftChar)
468         {
469                 uint ch;
470
471                 // reset counters
472                 charsProcessed = 0;
473                 bytesProcessed = 0;
474
475                 // process one char (this block executes twice if a surrogate is found)
476 again:
477                 if (charCount < 1)
478                         return EncoderStatus.InputRunOut;
479
480                 ch = *chars++;
481
482                 if (leftChar == 0) {
483                         // char counting is inside if for reason discused in else
484                         charsProcessed++;
485                         charCount--;
486                         if (ch < (uint) 0x80) {
487                                 if (byteCount >= 0) {
488                                         if (byteCount < 1)
489                                                 return EncoderStatus.InsufficientSpace;
490                                         *bytes++ = (byte) ch;
491                                         byteCount--;
492                                 }
493                                 bytesProcessed++;
494                         } else if (ch < (uint) 0x0800) {
495                                 if (byteCount >= 0) {
496                                         if (byteCount < 2)
497                                                 return EncoderStatus.InsufficientSpace;
498                                         *bytes++ = (byte) ((uint) 0xC0 | (ch >> 6) & 0x3f);
499                                         *bytes++ = (byte) ((uint) 0x80 | ch & 0x3f);
500                                         byteCount -= 2;
501                                 }
502                                 bytesProcessed += 2;
503                         } else if (ch < (uint) 0xD800 || ch > (uint) 0xDFFF) {
504                                 if (byteCount >= 0) {
505                                         if (byteCount < 3)
506                                                 return EncoderStatus.InsufficientSpace;
507                                         *bytes++ = (byte) ((uint) 0xE0 | (ch >> 12));
508                                         *bytes++ = (byte) ((uint) 0x80 | ((ch >> 6) & 0x3F));
509                                         *bytes++ = (byte) ((uint) 0x80 | (ch & 0x3F));
510                                         byteCount -= 3;
511                                 }
512                                 bytesProcessed += 3;
513                         } else if (ch <= (uint) 0xDBFF) {
514                                 // This is a surrogate char, repeat please
515                                 leftChar = ch;
516                                 goto again;
517                         } else {
518                                 // We have a surrogate tail without 
519                                 // leading surrogate.
520                                 leftChar = ch;
521                                 return EncoderStatus.InvalidChar;
522                         }
523                 } else {
524                         if (ch >= (uint) 0xDC00 && ch <= (uint) 0xDFFF) {
525                                 // We have a correct surrogate pair.
526                                 ch = 0x10000 + (uint) ch - (uint) 0xDC00
527                                         + ((leftChar - (uint) 0xD800) << 10);
528                                 if (byteCount >= 0) {
529                                         if (byteCount < 4)
530                                                 return EncoderStatus.InsufficientSpace;
531                                         *bytes++ = (byte) (0xF0 | (ch >> 18));
532                                         *bytes++ = (byte) (0x80 | ((ch >> 12) & 0x3F));
533                                         *bytes++ = (byte) (0x80 | ((ch >> 6) & 0x3F));
534                                         *bytes++ = (byte) (0x80 | (ch & 0x3F));
535                                         byteCount -= 4;
536                                 }
537                                 bytesProcessed += 4;
538                         } else {
539                                 // We have a surrogate start followed by a
540                                 // regular character.  Technically, this is
541                                 // invalid, so we fail :(
542                                 return EncoderStatus.InvalidSurrogate;
543                         }
544                         // increment counters; this is done after processing
545                         // the surrogate: in case of a bad surrogate the
546                         // encoding should restart on the faulty char (maybe
547                         // the correct surrogate has been lost, and in this
548                         // case the best option is to restart processing on the
549                         // erroneus char to avoid losing more chars during the
550                         // encoding.
551                         charsProcessed++;
552                         charCount--;
553                         leftChar = 0;
554                 }
555                 return EncoderStatus.Ok;
556         }
557
558         // This function is called when we want to flush the decoder state
559         // (i.e. in case of invalid UTF-16 characters or dangling surrogates)
560         internal unsafe static EncoderStatus InternalGetBytesFlush (
561                 byte* bytes, int byteCount,
562                 EncoderFallbackBuffer fallbackBuffer,
563                 int charsProcessed, ref int bytesProcessed,
564                 ref uint leftChar)
565         {
566                 int t_charsProcessed, t_bytesProcessed;
567
568                 // in normal circumstances fallbackBuffer never is null, except
569                 // when we have called InternalGetBytes from this function
570                 // (for avoiding infinite recursive calls)
571                 if (fallbackBuffer == null)
572                         return EncoderStatus.Ok;
573
574                 // if there is nothing to flush, then return silently
575                 if(leftChar == 0)
576                         return EncoderStatus.Ok;
577
578                 // invalid UTF-16 or invalid surrogate
579                 fallbackBuffer.Fallback ((char) leftChar, charsProcessed - 1);
580                 // if we've arrived here we are working in replacement mode:
581                 // build a replacement fallback_chars buffer
582                 char[] fallback_chars = new char [fallbackBuffer.Remaining];
583                 for (int i = 0; i < fallback_chars.Length; i++)
584                         fallback_chars [i] = fallbackBuffer.GetNextChar ();
585                 fallbackBuffer.Reset ();
586                 // and encode it into UTF8 bytes...
587                 fixed (char *fb_chars = fallback_chars) {
588                         leftChar = 0;
589                         switch (bytes != null
590                                 ? InternalGetBytes (
591                                                 fb_chars, fallback_chars.Length,
592                                                 bytes + bytesProcessed, byteCount - bytesProcessed,
593                                                 null, out t_charsProcessed, out t_bytesProcessed,
594                                                 ref leftChar,
595                                                 true)
596                                 : InternalGetBytes (
597                                                 fb_chars, fallback_chars.Length,
598                                                 null, byteCount,
599                                                 null, out t_charsProcessed, out t_bytesProcessed,
600                                                 ref leftChar,
601                                                 true)) {
602                         case EncoderStatus.Ok:
603                                 // everything OK :D
604                                 bytesProcessed += t_bytesProcessed;
605                                 break;
606                         case EncoderStatus.InsufficientSpace:
607                                 return EncoderStatus.InsufficientSpace;
608                         case EncoderStatus.InputRunOut:
609                         case EncoderStatus.InvalidChar:
610                         case EncoderStatus.InvalidSurrogate:
611                                 throw new ArgumentException ("Fallback chars are pure evil.", "fallback buffer bytes");
612                         }
613                 }
614                 // flush encoder state
615                 leftChar = 0;
616                 return EncoderStatus.Ok;
617         }
618
619         // InternalGetBytes processor. Can encode or count space needed for
620         // encoding, depending on the enabled mode:
621         //   - encoder
622         //       enabled when byteCount >= 0 (but bytes may be null)
623         //   - counter
624         //       enabled when bytes == null && byteCount < 0
625         internal unsafe static EncoderStatus InternalGetBytes (
626                 char* chars, int charCount,
627                 byte* bytes, int byteCount,
628                 EncoderFallbackBuffer fallbackBuffer,
629                 out int charsProcessed, out int bytesProcessed,
630                 ref uint leftChar,
631                 bool flush)
632         {
633                 EncoderStatus s;
634                 int t_charsProcessed, t_bytesProcessed;
635
636                 // Validate the parameters
637                 if (charCount < 0)
638                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
639                 else
640                         if (charCount > 0 && chars == null)
641                                 throw new ArgumentNullException ("chars");
642                 if (bytes == null) {
643                         if (byteCount > 0)
644                                 throw new ArgumentNullException ("bytes");
645                 } else {
646                         if (byteCount <= 0)
647                                 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_NonNegative"));
648                 }
649
650                 // reset counters
651                 charsProcessed = 0;
652                 bytesProcessed = 0;
653
654                 // char processing loop
655                 while (charCount - charsProcessed > 0) {
656                         s = bytes != null
657                                 ? InternalGetByte (
658                                         chars + charsProcessed, charCount - charsProcessed,
659                                         bytes + bytesProcessed, byteCount - bytesProcessed,
660                                         out t_charsProcessed, out t_bytesProcessed, ref leftChar)
661                                 : InternalGetByte (
662                                         chars + charsProcessed, charCount - charsProcessed,
663                                         null, byteCount,
664                                         out t_charsProcessed, out t_bytesProcessed, ref leftChar);
665
666                         // if not enough space return here
667                         // NOTE: maybe we should restore the original encoder
668                         //       state ... we should check what ms do in this case
669                         if(s == EncoderStatus.InsufficientSpace)
670                                 return EncoderStatus.InsufficientSpace;
671
672                         // update counters
673                         charsProcessed += t_charsProcessed;
674                         bytesProcessed += t_bytesProcessed;
675
676                         switch (s) {
677                         case EncoderStatus.Ok:
678                                 break;  // everything OK :D
679
680                         case EncoderStatus.InputRunOut:
681                                 return flush
682                                         ? InternalGetBytesFlush (
683                                                 bytes, byteCount,
684                                                 fallbackBuffer,
685                                                 charsProcessed, ref bytesProcessed,
686                                                 ref leftChar)
687                                         : EncoderStatus.InputRunOut;
688
689                         case EncoderStatus.InvalidChar:
690                         case EncoderStatus.InvalidSurrogate:
691                                 s = InternalGetBytesFlush (
692                                         bytes, byteCount,
693                                         fallbackBuffer,
694                                         charsProcessed, ref bytesProcessed,
695                                         ref leftChar);
696                                 if (s != EncoderStatus.Ok)
697                                         return s;
698                                 break;
699                         }
700                 }
701                 return flush
702                         ? InternalGetBytesFlush (
703                                 bytes, byteCount,
704                                 fallbackBuffer,
705                                 charsProcessed, ref bytesProcessed,
706                                 ref leftChar)
707                         : EncoderStatus.Ok;
708         }
709
710         internal unsafe static EncoderStatus InternalGetBytesEncode (
711                 char* chars, int charCount,
712                 byte* bytes, int byteCount,
713                 EncoderFallbackBuffer fallbackBuffer,
714                 out int charsProcessed, out int bytesProcessed,
715                 ref uint leftChar,
716                 bool flush)
717         {
718                 if (charCount < 0)
719                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
720                 if (byteCount < 0)
721                         throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
722
723                 return InternalGetBytes (
724                         chars, charCount,
725                         bytes, byteCount,
726                         fallbackBuffer,
727                         out charsProcessed, out bytesProcessed,
728                         ref leftChar,
729                         flush);
730         }
731
732         internal unsafe static EncoderStatus InternalGetBytesEncode (
733                 char[] chars, int charIndex, int charCount,
734                 byte[] bytes, int byteIndex,
735                 EncoderFallbackBuffer fallbackBuffer,
736                 out int charsProcessed, out int bytesProcessed,
737                 ref uint leftChar,
738                 bool flush)
739         {
740                 if (chars == null)
741                         throw new ArgumentNullException ("chars");
742                 if (bytes == null)
743                         throw new ArgumentNullException ("bytes");
744                 if (charIndex < 0 || charIndex > chars.Length)
745                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
746                 if (charCount < 0 || charCount > (chars.Length - charIndex))
747                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
748                 if (byteIndex < 0 || byteIndex > bytes.Length)
749                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
750
751                 unsafe {
752                         fixed (char *cptr = chars) {
753                                 fixed (byte *bptr = bytes) {
754                                         return InternalGetBytes (
755                                                 cptr + charIndex, charCount,
756                                                 bptr + byteIndex, bytes.Length - byteIndex,
757                                                 fallbackBuffer,
758                                                 out charsProcessed, out bytesProcessed,
759                                                 ref leftChar,
760                                                 flush);
761                                 }
762                         }
763                 }
764         }
765
766         internal unsafe static EncoderStatus InternalGetBytesCount (
767                 char* chars, int charCount,
768                 EncoderFallbackBuffer fallbackBuffer,
769                 out int charsProcessed, out int bytesProcessed,
770                 ref uint leftChar,
771                 bool flush)
772         {
773                 if (charCount < 0)
774                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
775
776                 return InternalGetBytes (
777                         chars, charCount,
778                         null, -1,
779                         fallbackBuffer,
780                         out charsProcessed, out bytesProcessed,
781                         ref leftChar,
782                         flush);
783         }
784
785         internal unsafe static EncoderStatus InternalGetBytesCount (
786                 char[] chars, int charIndex, int charCount,
787                 EncoderFallbackBuffer fallbackBuffer,
788                 out int charsProcessed, out int bytesProcessed,
789                 ref uint leftChar,
790                 bool flush)
791         {
792                 if (chars == null)
793                         throw new ArgumentNullException ("chars");
794                 if (charIndex < 0 || charIndex > chars.Length)
795                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
796                 if (charCount < 0 || charCount > (chars.Length - charIndex))
797                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
798
799                 fixed (char *cptr = chars) {
800                         return InternalGetBytes (
801                                 cptr + charIndex, charCount,
802                                 null, -1,
803                                 fallbackBuffer,
804                                 out charsProcessed, out bytesProcessed,
805                                 ref leftChar,
806                                 flush);
807                 }
808         }
809
810         #region GetByteCount()
811
812         // Get the number of bytes needed to encode a character buffer.
813         public override int GetByteCount (char[] chars, int index, int count)
814         {
815                 uint leftChar = 0;
816                 int charsProcessed, bytesProcessed;
817                 InternalGetBytesCount (
818                                 chars, index, count,
819                                 EncoderFallback.CreateFallbackBuffer (),
820                                 out charsProcessed, out bytesProcessed,
821                                 ref leftChar,
822                                 true);
823                 return bytesProcessed;
824         }
825
826
827         [CLSCompliant (false)]
828         [ComVisible (false)]
829         public unsafe override int GetByteCount (char* chars, int count)
830         {
831                 int charsProcessed, bytesProcessed;
832                 uint leftChar = 0;
833                 InternalGetBytesCount (
834                         chars, count,
835                         EncoderFallback.CreateFallbackBuffer (),
836                         out charsProcessed, out bytesProcessed,
837                         ref leftChar,
838                         true);
839                 return bytesProcessed;
840         }
841
842         #endregion
843
844         #region GetBytes()
845
846         // Get the bytes that result from encoding a character buffer.
847         public override int GetBytes (char[] chars, int charIndex, int charCount,
848                                       byte[] bytes, int byteIndex)
849         {
850                 int charsProcessed, bytesProcessed;
851                 uint leftChar = 0;
852                 if (InternalGetBytesEncode (
853                                 chars, charIndex, charCount,
854                                 bytes, byteIndex,
855                                 EncoderFallback.CreateFallbackBuffer (),
856                                 out charsProcessed, out bytesProcessed,
857                                 ref leftChar,
858                                 true) == EncoderStatus.InsufficientSpace)
859                         throw new ArgumentException ("Insufficient Space", "bytes");
860                 return bytesProcessed;
861         }
862
863         // Convenience wrappers for "GetBytes".
864         public unsafe override int GetBytes (String s, int charIndex, int charCount,
865                                       byte[] bytes, int byteIndex)
866         {
867                 int charsProcessed, bytesProcessed;
868                 uint leftChar = 0;
869                 EncoderStatus status;
870                 if (s == null)
871                         throw new ArgumentNullException ("s");
872                 if (charIndex < 0 || charIndex >= s.Length)
873                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
874                 if (charCount < 0 || charCount > (s.Length - charIndex))
875                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
876                 if (byteIndex < 0 || byteIndex > bytes.Length)
877                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
878                 unsafe {
879                         fixed (char *cptr = s) {
880                                 fixed (byte *bptr = bytes) {
881                                         status = InternalGetBytesEncode (
882                                                 cptr + charIndex, charCount,
883                                                 bptr + byteIndex, bytes.Length - byteIndex,
884                                                 EncoderFallback.CreateFallbackBuffer (),
885                                                 out charsProcessed, out bytesProcessed,
886                                                 ref leftChar,
887                                                 true);
888                                 }
889                         }
890                 }
891                 if (status == EncoderStatus.InsufficientSpace)
892                         throw new ArgumentException ("Insufficient Space", "bytes");
893                 return bytesProcessed;
894         }
895
896         [CLSCompliant (false)]
897         [ComVisible (false)]
898         public unsafe override int GetBytes (char* chars, int charCount, byte* bytes, int byteCount)
899         {
900                 int charsProcessed, bytesProcessed;
901                 uint leftChar = 0;
902                 if (InternalGetBytesEncode (
903                                 chars, charCount, bytes, byteCount,
904                                 EncoderFallback.CreateFallbackBuffer (),
905                                 out charsProcessed, out bytesProcessed,
906                                 ref leftChar,
907                                 true) == EncoderStatus.InsufficientSpace)
908                         throw new ArgumentException ("Insufficient Space", "bytes");
909                 return bytesProcessed;
910         }
911
912         #endregion
913
914         #region GetCharCount()
915
916         // Get the number of characters needed to decode a byte buffer.
917         public override int GetCharCount (byte[] bytes, int index, int count)
918         {
919                 int bytesProcessed, charsProcessed;
920                 uint leftBytes = 0, leftBits = 0, procBytes = 0;
921                 InternalGetCharsCount (
922                                 bytes, index, count,
923                                 DecoderFallback.CreateFallbackBuffer (),
924                                 out bytesProcessed, out charsProcessed,
925                                 ref leftBytes, ref leftBits, ref procBytes,
926                                 true);
927                 return charsProcessed;
928         }
929
930         [CLSCompliant (false)]
931         [ComVisible (false)]
932         public unsafe override int GetCharCount (byte* bytes, int count)
933         {
934                 int bytesProcessed, charsProcessed;
935                 uint leftBytes = 0, leftBits = 0, procBytes = 0;
936                 InternalGetCharsCount (
937                                 bytes, count,
938                                 DecoderFallback.CreateFallbackBuffer (),
939                                 out bytesProcessed, out charsProcessed,
940                                 ref leftBytes, ref leftBits, ref procBytes,
941                                 true);
942                 return charsProcessed;
943         }
944
945         #endregion
946
947         // Get the characters that result from decoding a byte buffer.
948         public override int GetChars (byte[] bytes, int byteIndex, int byteCount,
949                                       char[] chars, int charIndex)
950         {
951                 int bytesProcessed, charsProcessed;
952                 uint leftBytes = 0, leftBits = 0, procBytes = 0;
953
954                 if (InternalGetCharsDecode (
955                                 bytes, byteIndex, byteCount,
956                                 chars, charIndex,
957                                 DecoderFallback.CreateFallbackBuffer (),
958                                 out bytesProcessed, out charsProcessed,
959                                 ref leftBytes, ref leftBits, ref procBytes,
960                                 true) == DecoderStatus.InsufficientSpace)
961                         throw new ArgumentException ("Insufficient Space", "bytes");
962
963                 return charsProcessed;
964         }
965
966         [CLSCompliant (false)]
967         [ComVisible (false)]
968         public unsafe override int GetChars (byte* bytes, int byteCount, char* chars, int charCount)
969         {
970                 int bytesProcessed, charsProcessed;
971                 uint leftBytes = 0, leftBits = 0, procBytes = 0;
972
973                 if (InternalGetCharsDecode (
974                                 bytes, byteCount,
975                                 chars, charCount,
976                                 DecoderFallback.CreateFallbackBuffer (),
977                                 out bytesProcessed, out charsProcessed,
978                                 ref leftBytes, ref leftBits, ref procBytes,
979                                 true) == DecoderStatus.InsufficientSpace)
980                         throw new ArgumentException ("Insufficient Space", "bytes");
981
982                 return charsProcessed;
983         }
984
985         // Get the maximum number of bytes needed to encode a
986         // specified number of characters.
987         public override int GetMaxByteCount (int charCount)
988         {
989                 if (charCount < 0)
990                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
991                 return charCount * 4;
992         }
993
994         // Get the maximum number of characters needed to decode a
995         // specified number of bytes.
996         public override int GetMaxCharCount (int byteCount)
997         {
998                 if (byteCount < 0)
999                         throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_NonNegative"));
1000                 return byteCount;
1001         }
1002
1003         // Get a UTF8-specific decoder that is attached to this instance.
1004         public override Decoder GetDecoder ()
1005         {
1006                 return new UTF8Decoder (DecoderFallback);
1007         }
1008
1009         // Get a UTF8-specific encoder that is attached to this instance.
1010         public override Encoder GetEncoder ()
1011         {
1012                 return new UTF8Encoder (EncoderFallback, emitIdentifier);
1013         }
1014
1015         // Get the UTF8 preamble.
1016         // XXX: why does this method return a preamble or void array depending
1017         //      on 'emitIdentifier' attribute?
1018         public override byte[] GetPreamble ()
1019         {
1020                 if (emitIdentifier)
1021                         return new byte [] { 0xEF, 0xBB, 0xBF };
1022
1023                 return EmptyArray<byte>.Value;
1024         }
1025
1026         // Determine if this object is equal to another.
1027         public override bool Equals (Object value)
1028         {
1029                 UTF8Encoding enc = (value as UTF8Encoding);
1030                 if (enc != null) {
1031                         return (codePage == enc.codePage &&
1032                                 emitIdentifier == enc.emitIdentifier &&
1033                                 DecoderFallback.Equals (enc.DecoderFallback) &&
1034                                 EncoderFallback.Equals (enc.EncoderFallback));
1035                 } else {
1036                         return false;
1037                 }
1038         }
1039
1040         // Get the hash code for this object.
1041         public override int GetHashCode ()
1042         {
1043                 return base.GetHashCode ();
1044         }
1045
1046         public override int GetByteCount (string chars)
1047         {
1048                 // hmm, does this override make any sense?
1049                 return base.GetByteCount (chars);
1050         }
1051
1052         [ComVisible (false)]
1053         public override string GetString (byte [] bytes, int index, int count)
1054         {
1055                 // hmm, does this override make any sense?
1056                 return base.GetString (bytes, index, count);
1057         }
1058
1059         // UTF-8 decoder implementation.
1060         [Serializable]
1061         private class UTF8Decoder : Decoder
1062         {
1063                 // internal encoder state
1064                 private uint leftBytes;
1065                 private uint leftBits;
1066                 private uint procBytes;
1067
1068                 // Constructor.
1069                 public UTF8Decoder (DecoderFallback fallback)
1070                 {
1071                         Fallback = fallback;
1072                         leftBytes = 0;
1073                         leftBits = 0;
1074                         procBytes = 0;
1075                 }
1076
1077                 // Override inherited methods.
1078                 public override int GetCharCount (byte[] bytes, int index, int count, bool flush)
1079                 {
1080                         int bytesProcessed, charsProcessed;
1081                         InternalGetCharsCount (
1082                                         bytes, index, count,
1083                                         this.FallbackBuffer,
1084                                         out bytesProcessed, out charsProcessed,
1085                                         ref leftBytes, ref leftBits, ref procBytes,
1086                                         flush);
1087                         return charsProcessed;
1088                 }
1089
1090                 [ComVisibleAttribute(false)]
1091                 public override int GetCharCount (byte[] bytes, int index, int count)
1092                 {
1093                         return GetCharCount (bytes, index, count, true);
1094                 }
1095
1096                 [ComVisibleAttribute(false)] 
1097                 public unsafe override int GetCharCount (byte* bytes, int count, bool flush)
1098                 {
1099                         int bytesProcessed, charsProcessed;
1100                         InternalGetCharsCount (
1101                                         bytes, count,
1102                                         this.FallbackBuffer,
1103                                         out bytesProcessed, out charsProcessed,
1104                                         ref leftBytes, ref leftBits, ref procBytes,
1105                                         flush);
1106                         return charsProcessed;
1107                 }
1108
1109                 [ComVisibleAttribute(false)]
1110                 public unsafe override int GetChars (byte* bytes, int byteCount,
1111                                                 char* chars, int charCount, bool flush)
1112                 {
1113                         int bytesProcessed, charsProcessed;
1114                         if (InternalGetCharsDecode (
1115                                         bytes, byteCount,
1116                                         chars, charCount,
1117                                         this.FallbackBuffer,
1118                                         out bytesProcessed, out charsProcessed,
1119                                         ref leftBytes, ref leftBits, ref procBytes,
1120                                         flush) == DecoderStatus.InsufficientSpace)
1121                                 throw new ArgumentException ("Insufficient Space", "bytes");
1122                         return charsProcessed;
1123                 }
1124
1125                 public override int GetChars (byte[] bytes, int byteIndex,
1126                                                  int byteCount, char[] chars, int charIndex, bool flush)
1127                 {
1128                         int bytesProcessed, charsProcessed;
1129                         if (InternalGetCharsDecode (
1130                                         bytes, byteIndex, byteCount,
1131                                         chars, charIndex,
1132                                         this.FallbackBuffer,
1133                                         out bytesProcessed, out charsProcessed,
1134                                         ref leftBytes, ref leftBits, ref procBytes,
1135                                         flush) == DecoderStatus.InsufficientSpace)
1136                                 throw new ArgumentException ("Insufficient Space", "bytes");
1137                         return charsProcessed;
1138                 }
1139
1140                 public override int GetChars (byte[] bytes, int byteIndex,
1141                                                  int byteCount, char[] chars, int charIndex)
1142                 {
1143                         return GetChars (bytes, byteIndex, byteCount, chars, charIndex, true);
1144                 }
1145
1146                 public override void Reset ()
1147                 {
1148                         base.Reset ();
1149                         leftBytes = 0;
1150                         leftBits = 0;
1151                         procBytes = 0;
1152                 }
1153
1154                 public unsafe override void Convert (
1155                         byte* bytes, int byteCount,
1156                         char* chars, int charCount, bool flush,
1157                         out int bytesUsed, out int charsUsed, out bool completed)
1158                 {
1159                         InternalGetCharsDecode (
1160                                         bytes, byteCount,
1161                                         chars, charCount,
1162                                         this.FallbackBuffer,
1163                                         out bytesUsed, out charsUsed,
1164                                         ref leftBytes, ref leftBits, ref procBytes,
1165                                         flush);
1166                         // only completed if all bytes have been processed and
1167                         // succesful converted to chars!!
1168                         completed = (byteCount == bytesUsed);
1169                 }
1170         } // class UTF8Decoder
1171
1172         // UTF-8 encoder implementation.
1173         [Serializable]
1174         private class UTF8Encoder : Encoder
1175         {
1176                 private bool emitIdentifier;
1177
1178                 // internal encoder state
1179                 private uint leftChar;
1180                 private bool emittedIdentifier;
1181
1182                 // Constructor.
1183                 public UTF8Encoder (EncoderFallback fallback, bool emitIdentifier)
1184                 {
1185                         this.Fallback = fallback;
1186                         this.leftChar = 0;
1187                         this.emitIdentifier = false; //emitIdentifier;
1188                         this.emittedIdentifier = false;
1189                 }
1190
1191                 // Override inherited methods.
1192                 [ComVisibleAttribute(false)]
1193                 public unsafe override int GetByteCount (char* chars, int count, bool flush)
1194                 {
1195                         int charsProcessed, bytesProcessed, preambleSize = 0;
1196                         if (emitIdentifier && !emittedIdentifier) {
1197                                 preambleSize = 3;
1198                                 emittedIdentifier = true;
1199                         }
1200                         InternalGetBytesCount (
1201                                         chars, count,
1202                                         this.FallbackBuffer,
1203                                         out charsProcessed, out bytesProcessed,
1204                                         ref leftChar,
1205                                         flush);
1206                         return bytesProcessed + preambleSize;
1207                 }
1208
1209                 public override int GetByteCount (char[] chars, int index,
1210                                                         int count, bool flush)
1211                 {
1212                         int charsProcessed, bytesProcessed, preambleSize = 0;
1213                         if (emitIdentifier && !emittedIdentifier) {
1214                                 preambleSize = 3;
1215                                 emittedIdentifier = true;
1216                         }
1217                         InternalGetBytesCount (
1218                                         chars, index, count,
1219                                         this.FallbackBuffer,
1220                                         out charsProcessed, out bytesProcessed,
1221                                         ref leftChar,
1222                                         flush);
1223                         return bytesProcessed + preambleSize;
1224                 }
1225
1226                 [ComVisibleAttribute(false)]
1227                 public unsafe override int GetBytes (char* chars, int charCount,
1228                         byte* bytes, int byteCount, bool flush)
1229                 {
1230                         int charsProcessed, bytesProcessed, preambleSize = 0;
1231                         if (emitIdentifier && !emittedIdentifier) {
1232                                 if (byteCount < 3)
1233                                         throw new ArgumentException ("Insufficient Space", "UTF8 preamble");
1234                                 *bytes++ = 0xEF;
1235                                 *bytes++ = 0xBB;
1236                                 *bytes++ = 0xBF;
1237                                 preambleSize = 3;
1238                                 emittedIdentifier = true;
1239                                 byteCount -= 3;
1240                         }
1241                         if (InternalGetBytesEncode (
1242                                         chars, charCount,
1243                                         bytes, byteCount,
1244                                         this.FallbackBuffer,
1245                                         out charsProcessed, out bytesProcessed,
1246                                         ref leftChar,
1247                                         flush) == EncoderStatus.InsufficientSpace)
1248                                 throw new ArgumentException ("Insufficient Space", "bytes");
1249                         return bytesProcessed + preambleSize;
1250                 }
1251
1252                 public override int GetBytes (char[] chars, int charIndex,
1253                                                 int charCount, byte[] bytes,
1254                                                 int byteIndex, bool flush)
1255                 {
1256                         int charsProcessed, bytesProcessed, preambleSize = 0;
1257                         if (emitIdentifier && !emittedIdentifier) {
1258                                 if (bytes.Length - byteIndex < 3)
1259                                         throw new ArgumentException ("Insufficient Space", "UTF8 preamble");
1260                                 bytes[byteIndex++] = 0xEF;
1261                                 bytes[byteIndex++] = 0xBB;
1262                                 bytes[byteIndex++] = 0xBF;
1263                                 preambleSize = 3;
1264                                 emittedIdentifier = true;
1265                         }
1266                         if (InternalGetBytesEncode (
1267                                         chars, charIndex, charCount,
1268                                         bytes, byteIndex,
1269                                         this.FallbackBuffer,
1270                                         out charsProcessed, out bytesProcessed,
1271                                         ref leftChar,
1272                                         flush) == EncoderStatus.InsufficientSpace)
1273                                 throw new ArgumentException ("Insufficient Space", "bytes");
1274                         return bytesProcessed + preambleSize;
1275                 }
1276
1277                 public override void Reset ()
1278                 {
1279                         base.Reset ();
1280                         this.leftChar = 0;
1281                         this.emittedIdentifier = false;
1282                 }
1283
1284                 public unsafe override void Convert (
1285                         char* chars, int charCount,
1286                         byte* bytes, int byteCount, bool flush,
1287                         out int charsUsed, out int bytesUsed, out bool completed)
1288                 {
1289                         int preambleSize = 0;
1290                         if (emitIdentifier && !emittedIdentifier) {
1291                                 if (bytes != null && byteCount >= 3)
1292                                 {
1293                                         *bytes++ = 0xEF;
1294                                         *bytes++ = 0xBB;
1295                                         *bytes++ = 0xBF;
1296                                         preambleSize = 3;
1297                                         emittedIdentifier = true;
1298                                         byteCount -= 3;
1299                                 }
1300                         }
1301                         InternalGetBytesEncode (
1302                                         chars, charCount,
1303                                         bytes, byteCount,
1304                                         this.FallbackBuffer,
1305                                         out charsUsed, out bytesUsed,
1306                                         ref leftChar,
1307                                         flush);
1308                         // only completed if all chars have been processed and
1309                         // succesful converted to chars!!
1310                         completed = (charCount == charsUsed);
1311                         bytesUsed += preambleSize;
1312                 }
1313         } // class UTF8Encoder
1314
1315 }; // class UTF8Encoding
1316
1317 }; // namespace System.Text