Added counter mode in InternalGetChars.
[mono.git] / mcs / class / corlib / System.Text / UTF8Encoding.cs
index f65f9af14a326752c174eedfd0e7de49f8c2c492..9252147e5442657afa1da40aecbe4c083e84dc97 100644 (file)
@@ -79,8 +79,8 @@ public class UTF8Encoding : Encoding
        };
 
        // following method decodes an utf8 character from a byte buffer.
-       // NOTE: If 'chars' is null, this function only counts bytes and chars
-       //       without writing anything.
+       // NOTE: If 'charCount' is < 0, this function only counts bytes and
+       //       chars without writing anything.
        // NOTE: BOM (0xEF 0xBB 0xBF) is not yet supported.
        //       See http://www.cl.cam.ac.uk/~mgk25/unicode.html
        private unsafe static DecoderStatus InternalGetChar (
@@ -183,14 +183,14 @@ public class UTF8Encoding : Encoding
 
                // convert this character to UTF-16
                if (leftBits < (uint) 0x10000) {
-                       if (chars != null) {
+                       if (charCount >= 0) {
                                if (charCount < 1)
                                        return DecoderStatus.InsufficientSpace;
                                *chars = (char) leftBits;
                        }
                        charsProcessed++;
                } else  {
-                       if (chars != null) {
+                       if (charCount >= 0) {
                                if (charCount < 2)
                                        return DecoderStatus.InsufficientSpace;
                                leftBits -= (uint) 0x10000;
@@ -205,30 +205,82 @@ public class UTF8Encoding : Encoding
                return DecoderStatus.Ok;
        }
 
+       // This function is called when we want to flush the decoder state
+       // (i.e. in case of invalid UTF-8 characters or interrupted sequences)
+       // TODO: if we run out of output space during fallback replacement an
+       //       ArgumentException is thrown -- maybe this is not the correct
+       //       behaviour
+       internal unsafe static void InternalGetCharsFlush (
+               char* chars, int charCount,
+               DecoderFallbackBuffer fallbackBuffer,
+               DecoderStatus s,
+               int bytesProcessed, ref int charsProcessed,
+               ref uint leftBytes, ref uint leftBits, ref uint procBytes)
+       {
+               // now we build a 'bytesUnknown' array with the
+               // stored bytes in 'procBytes'.
+               int extra = 0;
+               for (uint t = procBytes; t != 0; extra++)
+                       t = t >> 8;
+               byte [] bytesUnknown = new byte [extra];
+               for (int i = extra; i > 0; i--)
+                       bytesUnknown [i - 1] = (byte) ((procBytes >> (8 * (extra - i))) & 0xff);
+               // partial reset: this condition avoids infinite loops
+               if (s == DecoderStatus.InvalidSequence)
+                       leftBytes = 0;
+               // call the fallback and cross fingers
+               fallbackBuffer.Fallback (bytesUnknown, bytesProcessed - extra);
+               if (chars != null) {
+                       while (fallbackBuffer.Remaining > 0) {
+                               if (charsProcessed >= charCount)
+                                       throw new ArgumentException ("Insufficient Space", "chars/fallback");
+                               chars [charsProcessed++] = fallbackBuffer.GetNextChar ();
+                       }
+               } else
+                       charsProcessed += fallbackBuffer.Remaining;
+               fallbackBuffer.Reset ();
+
+               // recovery was succesful, flush decoder state
+               leftBits = leftBytes = procBytes = 0;
+       }
+
+       // InternalGetBytes processor. Can decode or count space needed for
+       // decoding, depending on the enabled mode:
+       //   - decoder
+       //       enabled when charCount >= 0 (but chars may be null)
+       //   - counter
+       //       enabled when chars == null && charCount < 0
        internal unsafe static DecoderStatus InternalGetChars (
                byte* bytes, int byteCount,
                char* chars, int charCount,
                DecoderFallbackBuffer fallbackBuffer,
                out int bytesProcessed, out int charsProcessed,
-               ref uint leftBytes, ref uint leftBits, ref uint procBytes)
+               ref uint leftBytes, ref uint leftBits, ref uint procBytes,
+               bool flush)
        {
                DecoderStatus s;
                int t_bytesProcessed, t_charsProcessed;
 
                // Validate parameters
-               if (bytes == null)
-                       throw new ArgumentNullException ("bytes");
                if (byteCount < 0)
                        throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_NonNegative"));
-               if (charCount < 0)
-                       throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
+               else
+                       if (byteCount > 0 && bytes == null)
+                               throw new ArgumentNullException ("bytes");
+               if (chars == null) {
+                       if (charCount > 0)
+                               throw new ArgumentNullException ("chars");
+               } else {
+                       if (charCount < 0)
+                               throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
+               }
 
                // reset counters
                charsProcessed = 0;
                bytesProcessed = 0;
 
                // byte processing loop
-               while (byteCount - bytesProcessed > 0 && (chars == null || charCount - charsProcessed > 0)) {
+               while (byteCount - bytesProcessed > 0) {
                        // fetch a char from the input byte array
                        s = chars != null
                                ? InternalGetChar (
@@ -238,10 +290,16 @@ public class UTF8Encoding : Encoding
                                        ref leftBytes, ref leftBits, ref procBytes)
                                : InternalGetChar (
                                        bytes + bytesProcessed, byteCount - bytesProcessed,
-                                       null, 0,
+                                       null, charCount,
                                        out t_bytesProcessed, out t_charsProcessed,
                                        ref leftBytes, ref leftBits, ref procBytes);
 
+                       // if not enough space return here
+                       // NOTE: maybe we should restore the original encoder
+                       //       state ... we should check what ms do in this case
+                       if(s == DecoderStatus.InsufficientSpace)
+                               return DecoderStatus.InsufficientSpace;
+
                        // update counters
                        charsProcessed += t_charsProcessed;
                        bytesProcessed += t_bytesProcessed;
@@ -250,80 +308,131 @@ public class UTF8Encoding : Encoding
                        case DecoderStatus.Ok:
                                break;  // everything OK :D
 
-                       case DecoderStatus.InsufficientSpace:
-                               throw new ArgumentException ("Insufficient Space", "chars");
-
                        case DecoderStatus.Overlong:
                        case DecoderStatus.InvalidSequence:
                        case DecoderStatus.InvalidStart:
                        case DecoderStatus.InvalidChar:
                        case DecoderStatus.SurrogateFound:
-                               // Invalid UTF-8 characters and sequences...
-                               // now we build a 'bytesUnknown' array with the
-                               // stored bytes in 'procBytes'.
-                               int extra = 0;
-                               for (uint t = procBytes; t != 0; extra++)
-                                       t = t >> 8;
-                               byte [] bytesUnknown = new byte [extra];
-                               for (int i = extra; i > 0; i--)
-                                       bytesUnknown [i - 1] = (byte) ((procBytes >> (8 * (extra - i))) & 0xff);
-                               // partial reset: this condition avoids
-                               // infinite loops
-                               if (s == DecoderStatus.InvalidSequence)
-                                       leftBytes = 0;
-                               // call the fallback and cross fingers
-                               fallbackBuffer.Fallback (bytesUnknown, bytesProcessed - extra);
-                               if (chars != null) {
-                                       while (fallbackBuffer.Remaining > 0) {
-                                               if (charsProcessed >= charCount)
-                                                       throw new ArgumentException ("Insufficient Space", "chars/fallback");
-                                               chars [charsProcessed++] = fallbackBuffer.GetNextChar ();
-                                       }
-                               } else
-                                       charsProcessed += fallbackBuffer.Remaining;
-                               fallbackBuffer.Reset ();
-                               // recovery was succesful, reset decoder state
-                               leftBits = leftBytes = procBytes = 0;
+                               InternalGetCharsFlush (
+                                       chars, charCount,
+                                       fallbackBuffer,
+                                       s,
+                                       bytesProcessed, ref charsProcessed,
+                                       ref leftBytes, ref leftBits, ref procBytes);
                                break;
 
                        case DecoderStatus.InputRunOut:
+                               if (flush)
+                                       InternalGetCharsFlush (
+                                               chars, charCount,
+                                               fallbackBuffer,
+                                               s,
+                                               bytesProcessed, ref charsProcessed,
+                                               ref leftBytes, ref leftBits, ref procBytes);
                                return DecoderStatus.InputRunOut;
                        }
                }
                return DecoderStatus.Ok;
        }
 
-       // Get the characters that result from decoding a byte buffer.
-       internal unsafe static DecoderStatus InternalGetChars (
+       internal unsafe static DecoderStatus InternalGetCharsDecode (
+               byte* bytes, int byteCount,
+               char* chars, int charCount,
+               DecoderFallbackBuffer fallbackBuffer,
+               out int bytesProcessed, out int charsProcessed,
+               ref uint leftBytes, ref uint leftBits, ref uint procBytes,
+               bool flush)
+       {
+               if (byteCount < 0)
+                       throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
+               if (charCount < 0)
+                       throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
+
+               return InternalGetChars (
+                               bytes, byteCount,
+                               chars, charCount,
+                               fallbackBuffer,
+                               out bytesProcessed, out charsProcessed,
+                               ref leftBytes, ref leftBits, ref procBytes,
+                               flush);
+       }
+
+       internal unsafe static DecoderStatus InternalGetCharsDecode (
                byte[] bytes, int byteIndex, int byteCount,
                char[] chars, int charIndex,
                DecoderFallbackBuffer fallbackBuffer,
                out int bytesProcessed, out int charsProcessed,
-               ref uint leftBytes, ref uint leftBits, ref uint procBytes)
+               ref uint leftBytes, ref uint leftBits, ref uint procBytes,
+               bool flush)
        {
-               // Validate the parameters.
                if (bytes == null)
                        throw new ArgumentNullException ("bytes");
-               if (byteIndex < 0 || byteIndex >= bytes.Length)
+               if (chars == null)
+                       throw new ArgumentNullException ("chars");
+               if (byteIndex < 0 || byteIndex > bytes.Length)
                        throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
                if (byteCount < 0 || byteCount > (bytes.Length - byteIndex))
                        throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
-               if (charIndex < 0 || charIndex > (chars != null && chars.Length > 0 ? chars.Length - 1 : 0))
+               if (charIndex < 0 || charIndex > chars.Length)
                        throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
 
                fixed (char* cptr = chars) {
                        fixed (byte* bptr = bytes) {
                                return InternalGetChars (
                                                bptr + byteIndex, byteCount,
-                                               chars != null ? cptr + charIndex : null,
-                                               chars != null ? chars.Length - charIndex : 0,
+                                               cptr + charIndex, chars.Length - charIndex,
                                                fallbackBuffer,
                                                out bytesProcessed, out charsProcessed,
-                                               ref leftBytes, ref leftBits, ref procBytes);
+                                               ref leftBytes, ref leftBits, ref procBytes,
+                                               flush);
                        }
                }
        }
 
+       internal unsafe static DecoderStatus InternalGetCharsCount (
+               byte* bytes, int byteCount,
+               DecoderFallbackBuffer fallbackBuffer,
+               out int bytesProcessed, out int charsProcessed,
+               ref uint leftBytes, ref uint leftBits, ref uint procBytes,
+               bool flush)
+       {
+               if (byteCount < 0)
+                       throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
+
+               return InternalGetChars (
+                               bytes, byteCount,
+                               null, -1,
+                               fallbackBuffer,
+                               out bytesProcessed, out charsProcessed,
+                               ref leftBytes, ref leftBits, ref procBytes,
+                               flush);
+       }
+
+       internal unsafe static DecoderStatus InternalGetCharsCount (
+               byte[] bytes, int byteIndex, int byteCount,
+               DecoderFallbackBuffer fallbackBuffer,
+               out int bytesProcessed, out int charsProcessed,
+               ref uint leftBytes, ref uint leftBits, ref uint procBytes,
+               bool flush)
+       {
+               if (bytes == null)
+                       throw new ArgumentNullException ("bytes");
+               if (byteIndex < 0 || byteIndex > bytes.Length)
+                       throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
+               if (byteCount < 0 || byteCount > (bytes.Length - byteIndex))
+                       throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
+
+               fixed (byte* bptr = bytes) {
+                       return InternalGetChars (
+                                       bptr + byteIndex, byteCount,
+                                       null, -1,
+                                       fallbackBuffer,
+                                       out bytesProcessed, out charsProcessed,
+                                       ref leftBytes, ref leftBits, ref procBytes,
+                                       flush);
+               }
+       }
+
        ///////////////////////////////////////////////////////////////////////
        // INTERNAL ENCODING FUNCTION (CHAR/UTF16 -> UTF8)
        ///////////////////////////////////////////////////////////////////////
@@ -337,8 +446,8 @@ public class UTF8Encoding : Encoding
        };
 
        // following method encodes an utf8 character into a byte buffer.
-       // NOTE: If 'bytes' is null, this function only counts bytes and chars
-       //       without writing anything.
+       // NOTE: If 'byteCount' is < 0, this function only counts used bytes
+       //       without writing anything.
        // NOTE: BOM (0xEF 0xBB 0xBF) is not yet supported.
        //       See http://www.cl.cam.ac.uk/~mgk25/unicode.html
        private unsafe static EncoderStatus InternalGetByte (
@@ -364,7 +473,7 @@ again:
                        charsProcessed++;
                        charCount--;
                        if (ch < (uint) 0x80) {
-                               if (bytes != null) {
+                               if (byteCount >= 0) {
                                        if (byteCount < 1)
                                                return EncoderStatus.InsufficientSpace;
                                        *bytes++ = (byte) ch;
@@ -372,7 +481,7 @@ again:
                                }
                                bytesProcessed++;
                        } else if (ch < (uint) 0x0800) {
-                               if (bytes != null) {
+                               if (byteCount >= 0) {
                                        if (byteCount < 2)
                                                return EncoderStatus.InsufficientSpace;
                                        *bytes++ = (byte) ((uint) 0xC0 | (ch >> 6) & 0x3f);
@@ -381,7 +490,7 @@ again:
                                }
                                bytesProcessed += 2;
                        } else if (ch < (uint) 0xD800 || ch > (uint) 0xDFFF) {
-                               if (bytes != null) {
+                               if (byteCount >= 0) {
                                        if (byteCount < 3)
                                                return EncoderStatus.InsufficientSpace;
                                        *bytes++ = (byte) ((uint) 0xE0 | (ch >> 12));
@@ -397,6 +506,7 @@ again:
                        } else {
                                // We have a surrogate tail without 
                                // leading surrogate.
+                               leftChar = ch;
                                return EncoderStatus.InvalidChar;
                        }
                } else {
@@ -404,7 +514,7 @@ again:
                                // We have a correct surrogate pair.
                                ch = 0x10000 + (uint) ch - (uint) 0xDC00
                                        + ((leftChar - (uint) 0xD800) << 10);
-                               if (bytes != null) {
+                               if (byteCount >= 0) {
                                        if (byteCount < 4)
                                                return EncoderStatus.InsufficientSpace;
                                        *bytes++ = (byte) (0xF0 | (ch >> 18));
@@ -434,23 +544,95 @@ again:
                return EncoderStatus.Ok;
        }
 
+       // This function is called when we want to flush the decoder state
+       // (i.e. in case of invalid UTF-16 characters or dangling surrogates)
+       // TODO: if we run out of output space during fallback replacement an
+       //       ArgumentException is thrown -- maybe this is not the correct
+       //       behaviour
+       internal unsafe static void InternalGetBytesFlush (
+               byte* bytes, int byteCount,
+               EncoderFallbackBuffer fallbackBuffer,
+               int charsProcessed, ref int bytesProcessed,
+               ref uint leftChar)
+       {
+               int t_charsProcessed, t_bytesProcessed;
+
+               // in normal circumstances fallbackBuffer never is null, except
+               // when we have called InternalGetBytes from this function
+               // (for avoiding infinite recursive calls)
+               if (fallbackBuffer == null)
+                       return;
+
+               // invalid UTF-16 or invalid surrogate
+               fallbackBuffer.Fallback ((char) leftChar, charsProcessed - 1);
+               // if we've arrived here we are working in replacement mode:
+               // build a replacement fallback_chars buffer
+               char[] fallback_chars = new char [fallbackBuffer.Remaining];
+               for (int i = 0; i < fallback_chars.Length; i++)
+                       fallback_chars [i] = fallbackBuffer.GetNextChar ();
+               fallbackBuffer.Reset ();
+               // and encode it into UTF8 bytes...
+               fixed (char *fb_chars = fallback_chars) {
+                       leftChar = 0;
+                       switch (bytes != null
+                               ? InternalGetBytes (
+                                               fb_chars, fallback_chars.Length,
+                                               bytes + bytesProcessed, byteCount - bytesProcessed,
+                                               null, out t_charsProcessed, out t_bytesProcessed,
+                                               ref leftChar,
+                                               true)
+                               : InternalGetBytes (
+                                               fb_chars, fallback_chars.Length,
+                                               null, byteCount,
+                                               null, out t_charsProcessed, out t_bytesProcessed,
+                                               ref leftChar,
+                                               true)) {
+                       case EncoderStatus.Ok:
+                               // everything OK :D
+                               bytesProcessed += t_bytesProcessed;
+                               break;
+                       case EncoderStatus.InsufficientSpace:
+                               throw new ArgumentException ("Insufficient Space", "fallback buffer bytes");
+                       case EncoderStatus.InputRunOut:
+                       case EncoderStatus.InvalidChar:
+                       case EncoderStatus.InvalidSurrogate:
+                               throw new ArgumentException ("Fallback chars are pure evil.", "fallback buffer bytes");
+                       }
+               }
+               // flush encoder state
+               leftChar = 0;
+       }
+
+       // InternalGetBytes processor. Can encode or count space needed for
+       // encoding, depending on the enabled mode:
+       //   - encoder
+       //       enabled when byteCount >= 0 (but bytes may be null)
+       //   - counter
+       //       enabled when bytes == null && byteCount < 0
        internal unsafe static EncoderStatus InternalGetBytes (
                char* chars, int charCount,
                byte* bytes, int byteCount,
                EncoderFallbackBuffer fallbackBuffer,
                out int charsProcessed, out int bytesProcessed,
-               ref uint leftChar)
+               ref uint leftChar,
+               bool flush)
        {
                EncoderStatus s;
                int t_charsProcessed, t_bytesProcessed;
 
                // Validate the parameters
-               if (chars == null)
-                       throw new ArgumentNullException ("bytes");
                if (charCount < 0)
                        throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
-               if (byteCount < 0)
-                       throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_NonNegative"));
+               else
+                       if (charCount > 0 && chars == null)
+                               throw new ArgumentNullException ("chars");
+               if (bytes == null) {
+                       if (byteCount > 0)
+                               throw new ArgumentNullException ("bytes");
+               } else {
+                       if (byteCount <= 0)
+                               throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_NonNegative"));
+               }
 
                // reset counters
                charsProcessed = 0;
@@ -465,9 +647,16 @@ again:
                                        out t_charsProcessed, out t_bytesProcessed, ref leftChar)
                                : InternalGetByte (
                                        chars + charsProcessed, charCount - charsProcessed,
-                                       null, 0,
+                                       null, byteCount,
                                        out t_charsProcessed, out t_bytesProcessed, ref leftChar);
 
+                       // if not enough space return here
+                       // NOTE: maybe we should restore the original encoder
+                       //       state ... we should check what ms do in this case
+                       if(s == EncoderStatus.InsufficientSpace)
+                               return EncoderStatus.InsufficientSpace;
+
+                       // update counters
                        charsProcessed += t_charsProcessed;
                        bytesProcessed += t_bytesProcessed;
 
@@ -475,88 +664,67 @@ again:
                        case EncoderStatus.Ok:
                                break;  // everything OK :D
 
-                       case EncoderStatus.InsufficientSpace:
-                               throw new ArgumentException ("Insufficient Space", "bytes");
-
                        case EncoderStatus.InputRunOut:
+                               if (flush)
+                                       InternalGetBytesFlush (
+                                               bytes, byteCount,
+                                               fallbackBuffer,
+                                               charsProcessed, ref bytesProcessed,
+                                               ref leftChar);
                                return EncoderStatus.InputRunOut;
 
                        case EncoderStatus.InvalidChar:
                        case EncoderStatus.InvalidSurrogate:
-                               // we've found an invalid char or surrogate
-                               if (fallbackBuffer == null) {
-                                       // without a fallbackBuffer abort
-                                       // returning 'InvalidChar' or
-                                       // 'InvalidSurrogate'
-                                       return s;
-                               }
-                               if (t_charsProcessed >= 1) {
-                                       // one-char invalid UTF-16 or an
-                                       // invalid surrogate
-                                       fallbackBuffer.Fallback (
-                                               chars [charsProcessed - 1],
-                                               charsProcessed - 1);
-                               } else {
-                                       // we've read a two-char invalid UTF-16
-                                       // but in this buffer we have only the
-                                       // invalid surrogate tail
-                                       fallbackBuffer.Fallback (
-                                               (char) leftChar,
-                                               -1);
-                               }
-                               // if we've arrived here we are working in
-                               // replacement mode: build a replacement
-                               // fallback_chars buffer
-                               char[] fallback_chars = new char [fallbackBuffer.Remaining];
-                               for (int i = 0; i < fallback_chars.Length; i++)
-                                       fallback_chars [i] = fallbackBuffer.GetNextChar ();
-                               fallbackBuffer.Reset ();
-                               // and encode it into UTF8 bytes...
-                               fixed (char *fb_chars = fallback_chars) {
-                                       leftChar = 0;
-                                       switch (bytes != null
-                                               ? InternalGetBytes (fb_chars, fallback_chars.Length,
-                                                                   bytes + bytesProcessed, byteCount - bytesProcessed,
-                                                                   null, out t_charsProcessed, out t_bytesProcessed,
-                                                                   ref leftChar)
-                                               : InternalGetBytes (fb_chars, fallback_chars.Length,
-                                                                   null, 0,
-                                                                   null, out t_charsProcessed, out t_bytesProcessed,
-                                                                   ref leftChar)) {
-                                       case EncoderStatus.Ok:
-                                               // everything OK :D
-                                               bytesProcessed += t_bytesProcessed;
-                                               break;
-                                       case EncoderStatus.InsufficientSpace:
-                                               throw new ArgumentException ("Insufficient Space", "fallback buffer bytes");
-                                       case EncoderStatus.InputRunOut:
-                                       case EncoderStatus.InvalidChar:
-                                       case EncoderStatus.InvalidSurrogate:
-                                               throw new ArgumentException ("Fallback chars are pure evil.", "fallback buffer bytes");
-                                       }
-                               }
-                               // partial reset of encoder state
-                               leftChar = 0;
+                               InternalGetBytesFlush (
+                                       bytes, byteCount,
+                                       fallbackBuffer,
+                                       charsProcessed, ref bytesProcessed,
+                                       ref leftChar);
                                break;
                        }
                }
                return EncoderStatus.Ok;
        }
 
-       internal unsafe static EncoderStatus InternalGetBytes (
+       internal unsafe static EncoderStatus InternalGetBytesEncode (
+               char* chars, int charCount,
+               byte* bytes, int byteCount,
+               EncoderFallbackBuffer fallbackBuffer,
+               out int charsProcessed, out int bytesProcessed,
+               ref uint leftChar,
+               bool flush)
+       {
+               if (charCount < 0)
+                       throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
+               if (byteCount < 0)
+                       throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
+
+               return InternalGetBytes (
+                       chars, charCount,
+                       bytes, byteCount,
+                       fallbackBuffer,
+                       out charsProcessed, out bytesProcessed,
+                       ref leftChar,
+                       flush);
+       }
+
+       internal unsafe static EncoderStatus InternalGetBytesEncode (
                char[] chars, int charIndex, int charCount,
                byte[] bytes, int byteIndex,
                EncoderFallbackBuffer fallbackBuffer,
                out int charsProcessed, out int bytesProcessed,
-               ref uint leftChar)
+               ref uint leftChar,
+               bool flush)
        {
                if (chars == null)
                        throw new ArgumentNullException ("chars");
-               if (charIndex < 0 || charIndex >= chars.Length)
+               if (bytes == null)
+                       throw new ArgumentNullException ("bytes");
+               if (charIndex < 0 || charIndex > chars.Length)
                        throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
                if (charCount < 0 || charCount > (chars.Length - charIndex))
                        throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
-               if (byteIndex < 0 || byteIndex > (bytes != null && bytes.Length > 0 ? bytes.Length - 1 : 0))
+               if (byteIndex < 0 || byteIndex > bytes.Length)
                        throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
 
                unsafe {
@@ -564,16 +732,60 @@ again:
                                fixed (byte *bptr = bytes) {
                                        return InternalGetBytes (
                                                cptr + charIndex, charCount,
-                                               bytes != null ? bptr + byteIndex : null,
-                                               bytes != null ? bytes.Length - byteIndex : 0,
+                                               bptr + byteIndex, bytes.Length - byteIndex,
                                                fallbackBuffer,
                                                out charsProcessed, out bytesProcessed,
-                                               ref leftChar);
+                                               ref leftChar,
+                                               flush);
                                }
                        }
                }
        }
 
+       internal unsafe static EncoderStatus InternalGetBytesCount (
+               char* chars, int charCount,
+               EncoderFallbackBuffer fallbackBuffer,
+               out int charsProcessed, out int bytesProcessed,
+               ref uint leftChar,
+               bool flush)
+       {
+               if (charCount < 0)
+                       throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
+
+               return InternalGetBytes (
+                       chars, charCount,
+                       null, -1,
+                       fallbackBuffer,
+                       out charsProcessed, out bytesProcessed,
+                       ref leftChar,
+                       flush);
+       }
+
+       internal unsafe static EncoderStatus InternalGetBytesCount (
+               char[] chars, int charIndex, int charCount,
+               EncoderFallbackBuffer fallbackBuffer,
+               out int charsProcessed, out int bytesProcessed,
+               ref uint leftChar,
+               bool flush)
+       {
+               if (chars == null)
+                       throw new ArgumentNullException ("chars");
+               if (charIndex < 0 || charIndex > chars.Length)
+                       throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
+               if (charCount < 0 || charCount > (chars.Length - charIndex))
+                       throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
+
+               fixed (char *cptr = chars) {
+                       return InternalGetBytes (
+                               cptr + charIndex, charCount,
+                               null, -1,
+                               fallbackBuffer,
+                               out charsProcessed, out bytesProcessed,
+                               ref leftChar,
+                               flush);
+               }
+       }
+
        #region GetByteCount()
 
        // Get the number of bytes needed to encode a character buffer.
@@ -581,11 +793,12 @@ again:
        {
                uint leftChar = 0;
                int charsProcessed, bytesProcessed;
-               InternalGetBytes (chars, index, count,
-                                 null, 0,
-                                 EncoderFallback.CreateFallbackBuffer (),
-                                 out charsProcessed, out bytesProcessed,
-                                 ref leftChar);
+               InternalGetBytesCount (
+                               chars, index, count,
+                               EncoderFallback.CreateFallbackBuffer (),
+                               out charsProcessed, out bytesProcessed,
+                               ref leftChar,
+                               true);
                return bytesProcessed;
        }
 
@@ -596,15 +809,12 @@ again:
        {
                int charsProcessed, bytesProcessed;
                uint leftChar = 0;
-               if (chars == null)
-                       throw new ArgumentNullException ("chars");
-               if (count < 0)
-                       throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
-               InternalGetBytes (chars, count,
-                                 null, 0,
-                                 EncoderFallback.CreateFallbackBuffer (),
-                                 out charsProcessed, out bytesProcessed,
-                                 ref leftChar);
+               InternalGetBytesCount (
+                       chars, count,
+                       EncoderFallback.CreateFallbackBuffer (),
+                       out charsProcessed, out bytesProcessed,
+                       ref leftChar,
+                       true);
                return bytesProcessed;
        }
 
@@ -618,15 +828,14 @@ again:
        {
                int charsProcessed, bytesProcessed;
                uint leftChar = 0;
-               if (bytes == null) {
-                       throw new ArgumentNullException ("bytes");
-               }
-
-               InternalGetBytes (chars, charIndex, charCount,
-                                 bytes, byteIndex,
-                                 EncoderFallback.CreateFallbackBuffer (),
-                                 out charsProcessed, out bytesProcessed,
-                                 ref leftChar);
+               if (InternalGetBytesEncode (
+                               chars, charIndex, charCount,
+                               bytes, byteIndex,
+                               EncoderFallback.CreateFallbackBuffer (),
+                               out charsProcessed, out bytesProcessed,
+                               ref leftChar,
+                               true) == EncoderStatus.InsufficientSpace)
+                       throw new ArgumentException ("Insufficient Space", "bytes");
                return bytesProcessed;
        }
 
@@ -636,28 +845,30 @@ again:
        {
                int charsProcessed, bytesProcessed;
                uint leftChar = 0;
+               EncoderStatus status;
                if (s == null)
                        throw new ArgumentNullException ("s");
-               if (bytes == null)
-                       throw new ArgumentNullException ("bytes");
                if (charIndex < 0 || charIndex >= s.Length)
                        throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
                if (charCount < 0 || charCount > (s.Length - charIndex))
                        throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
-               if (byteIndex < 0 || byteIndex > (bytes.Length > 0 ? bytes.Length - 1 : 0))
+               if (byteIndex < 0 || byteIndex > bytes.Length)
                        throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
                unsafe {
                        fixed (char *cptr = s) {
                                fixed (byte *bptr = bytes) {
-                                       InternalGetBytes (
+                                       status = InternalGetBytesEncode (
                                                cptr + charIndex, charCount,
                                                bptr + byteIndex, bytes.Length - byteIndex,
                                                EncoderFallback.CreateFallbackBuffer (),
                                                out charsProcessed, out bytesProcessed,
-                                               ref leftChar);
+                                               ref leftChar,
+                                               true);
                                }
                        }
                }
+               if (status == EncoderStatus.InsufficientSpace)
+                       throw new ArgumentException ("Insufficient Space", "bytes");
                return bytesProcessed;
        }
 
@@ -667,19 +878,13 @@ again:
        {
                int charsProcessed, bytesProcessed;
                uint leftChar = 0;
-               if (chars == null)
-                       throw new ArgumentNullException ("chars");
-               if (charCount < 0)
-                       throw new IndexOutOfRangeException ("charCount");
-               if (bytes == null)
-                       throw new ArgumentNullException ("bytes");
-               if (byteCount < 0)
-                       throw new IndexOutOfRangeException ("charCount");
-               InternalGetBytes (
+               if (InternalGetBytesEncode (
                                chars, charCount, bytes, byteCount,
                                EncoderFallback.CreateFallbackBuffer (),
                                out charsProcessed, out bytesProcessed,
-                               ref leftChar);
+                               ref leftChar,
+                               true) == EncoderStatus.InsufficientSpace)
+                       throw new ArgumentException ("Insufficient Space", "bytes");
                return bytesProcessed;
        }
 
@@ -692,12 +897,12 @@ again:
        {
                int bytesProcessed, charsProcessed;
                uint leftBytes = 0, leftBits = 0, procBytes = 0;
-               InternalGetChars (
-                       bytes, index, count,
-                       null, 0,
-                       DecoderFallback.CreateFallbackBuffer (),
-                       out bytesProcessed, out charsProcessed,
-                       ref leftBytes, ref leftBits, ref procBytes);
+               InternalGetCharsCount (
+                               bytes, index, count,
+                               DecoderFallback.CreateFallbackBuffer (),
+                               out bytesProcessed, out charsProcessed,
+                               ref leftBytes, ref leftBits, ref procBytes,
+                               true);
                return charsProcessed;
        }
 
@@ -707,12 +912,12 @@ again:
        {
                int bytesProcessed, charsProcessed;
                uint leftBytes = 0, leftBits = 0, procBytes = 0;
-               InternalGetChars (
-                       bytes, count,
-                       null, 0,
-                       DecoderFallback.CreateFallbackBuffer (),
-                       out bytesProcessed, out charsProcessed,
-                       ref leftBytes, ref leftBits, ref procBytes);
+               InternalGetCharsCount (
+                               bytes, count,
+                               DecoderFallback.CreateFallbackBuffer (),
+                               out bytesProcessed, out charsProcessed,
+                               ref leftBytes, ref leftBits, ref procBytes,
+                               true);
                return charsProcessed;
        }
 
@@ -724,12 +929,16 @@ again:
        {
                int bytesProcessed, charsProcessed;
                uint leftBytes = 0, leftBits = 0, procBytes = 0;
-               InternalGetChars (
-                       bytes, byteIndex, byteCount,
-                       chars, charIndex,
-                       DecoderFallback.CreateFallbackBuffer (),
-                       out bytesProcessed, out charsProcessed,
-                       ref leftBytes, ref leftBits, ref procBytes);
+
+               if (InternalGetCharsDecode (
+                               bytes, byteIndex, byteCount,
+                               chars, charIndex,
+                               DecoderFallback.CreateFallbackBuffer (),
+                               out bytesProcessed, out charsProcessed,
+                               ref leftBytes, ref leftBits, ref procBytes,
+                               true) == DecoderStatus.InsufficientSpace)
+                       throw new ArgumentException ("Insufficient Space", "bytes");
+
                return charsProcessed;
        }
 
@@ -739,12 +948,16 @@ again:
        {
                int bytesProcessed, charsProcessed;
                uint leftBytes = 0, leftBits = 0, procBytes = 0;
-               InternalGetChars (
-                       bytes, byteCount,
-                       chars, charCount,
-                       DecoderFallback.CreateFallbackBuffer (),
-                       out bytesProcessed, out charsProcessed,
-                       ref leftBytes, ref leftBits, ref procBytes);
+
+               if (InternalGetCharsDecode (
+                               bytes, byteCount,
+                               chars, charCount,
+                               DecoderFallback.CreateFallbackBuffer (),
+                               out bytesProcessed, out charsProcessed,
+                               ref leftBytes, ref leftBits, ref procBytes,
+                               true) == DecoderStatus.InsufficientSpace)
+                       throw new ArgumentException ("Insufficient Space", "bytes");
+
                return charsProcessed;
        }
 
@@ -841,39 +1054,34 @@ again:
                }
 
                // Override inherited methods.
-               public override int GetCharCount (byte[] bytes, int index, int count)
+               public override int GetCharCount (byte[] bytes, int index, int count, bool flush)
                {
                        int bytesProcessed, charsProcessed;
-                       InternalGetChars (
-                               bytes, index, count,
-                               null, 0,
-                               this.FallbackBuffer,
-                               out bytesProcessed, out charsProcessed,
-                               ref leftBytes, ref leftBits, ref procBytes);
+                       InternalGetCharsCount (
+                                       bytes, index, count,
+                                       this.FallbackBuffer,
+                                       out bytesProcessed, out charsProcessed,
+                                       ref leftBytes, ref leftBits, ref procBytes,
+                                       flush);
                        return charsProcessed;
                }
 
                [ComVisibleAttribute(false)]
-               public override int GetCharCount (byte[] bytes, int index, int count, bool flush)
+               public override int GetCharCount (byte[] bytes, int index, int count)
                {
-                       int r = GetCharCount (bytes, index, count);
-                       if (flush)
-                               leftBytes = leftBits = procBytes = 0;
-                       return r;
+                       return GetCharCount (bytes, index, count, true);
                }
 
                [ComVisibleAttribute(false)] 
                public unsafe override int GetCharCount (byte* bytes, int count, bool flush)
                {
                        int bytesProcessed, charsProcessed;
-                       InternalGetChars (
-                               bytes, count,
-                               null, 0,
-                               this.FallbackBuffer,
-                               out bytesProcessed, out charsProcessed,
-                               ref leftBytes, ref leftBits, ref procBytes);
-                       if (flush)
-                               leftBytes = leftBits = procBytes = 0;
+                       InternalGetCharsCount (
+                                       bytes, count,
+                                       this.FallbackBuffer,
+                                       out bytesProcessed, out charsProcessed,
+                                       ref leftBytes, ref leftBits, ref procBytes,
+                                       flush);
                        return charsProcessed;
                }
 
@@ -882,37 +1090,36 @@ again:
                                                char* chars, int charCount, bool flush)
                {
                        int bytesProcessed, charsProcessed;
-                       InternalGetChars (
-                               bytes, byteCount,
-                               chars, charCount,
-                               this.FallbackBuffer,
-                               out bytesProcessed, out charsProcessed,
-                               ref leftBytes, ref leftBits, ref procBytes);
-                       if (flush)
-                               leftBytes = leftBits = procBytes = 0;
+                       if (InternalGetCharsDecode (
+                                       bytes, byteCount,
+                                       chars, charCount,
+                                       this.FallbackBuffer,
+                                       out bytesProcessed, out charsProcessed,
+                                       ref leftBytes, ref leftBits, ref procBytes,
+                                       flush) == DecoderStatus.InsufficientSpace)
+                               throw new ArgumentException ("Insufficient Space", "bytes");
                        return charsProcessed;
                }
 
                public override int GetChars (byte[] bytes, int byteIndex,
-                                                int byteCount, char[] chars, int charIndex)
+                                                int byteCount, char[] chars, int charIndex, bool flush)
                {
                        int bytesProcessed, charsProcessed;
-                       InternalGetChars (
-                               bytes, byteIndex, byteCount,
-                               chars, charIndex,
-                               this.FallbackBuffer,
-                               out bytesProcessed, out charsProcessed,
-                               ref leftBytes, ref leftBits, ref procBytes);
+                       if (InternalGetCharsDecode (
+                                       bytes, byteIndex, byteCount,
+                                       chars, charIndex,
+                                       this.FallbackBuffer,
+                                       out bytesProcessed, out charsProcessed,
+                                       ref leftBytes, ref leftBits, ref procBytes,
+                                       flush) == DecoderStatus.InsufficientSpace)
+                               throw new ArgumentException ("Insufficient Space", "bytes");
                        return charsProcessed;
                }
 
                public override int GetChars (byte[] bytes, int byteIndex,
-                                                int byteCount, char[] chars, int charIndex, bool flush)
+                                                int byteCount, char[] chars, int charIndex)
                {
-                       int r = GetChars (bytes, byteIndex, byteCount, chars, charIndex);
-                       if (flush)
-                               leftBytes = leftBits = procBytes = 0;
-                       return r;
+                       return GetChars (bytes, byteIndex, byteCount, chars, charIndex, true);
                }
 
                public override void Reset ()
@@ -928,26 +1135,16 @@ again:
                        char* chars, int charCount, bool flush,
                        out int bytesUsed, out int charsUsed, out bool completed)
                {
-                       if (chars == null)
-                               throw new ArgumentNullException ("chars");
-                       if (charCount < 0)
-                               throw new IndexOutOfRangeException ("charCount");
-                       if (bytes == null)
-                               throw new ArgumentNullException ("bytes");
-                       if (byteCount < 0)
-                               throw new IndexOutOfRangeException ("charCount");
-                       UTF8Encoding.InternalGetChars (
+                       InternalGetCharsDecode (
                                        bytes, byteCount,
                                        chars, charCount,
                                        this.FallbackBuffer,
                                        out bytesUsed, out charsUsed,
-                                       ref leftBytes, ref leftBits, ref procBytes);
+                                       ref leftBytes, ref leftBits, ref procBytes,
+                                       flush);
                        // only completed if all bytes have been processed and
                        // succesful converted to chars!!
                        completed = (byteCount == bytesUsed);
-                       // flush state
-                       if (flush)
-                               leftBytes = leftBits = procBytes = 0;
                }
        } // class UTF8Decoder
 
@@ -966,7 +1163,7 @@ again:
                {
                        this.Fallback = fallback;
                        this.leftChar = 0;
-                       this.emitIdentifier = emitIdentifier;
+                       this.emitIdentifier = false; //emitIdentifier;
                        this.emittedIdentifier = false;
                }
 
@@ -979,13 +1176,12 @@ again:
                                preambleSize = 3;
                                emittedIdentifier = true;
                        }
-                       InternalGetBytes (chars, count,
-                                         null, 0,
-                                         this.FallbackBuffer,
-                                         out charsProcessed, out bytesProcessed,
-                                         ref leftChar);
-                       if (flush)
-                               leftChar = 0;
+                       InternalGetBytesCount (
+                                       chars, count,
+                                       this.FallbackBuffer,
+                                       out charsProcessed, out bytesProcessed,
+                                       ref leftChar,
+                                       flush);
                        return bytesProcessed + preambleSize;
                }
 
@@ -997,13 +1193,12 @@ again:
                                preambleSize = 3;
                                emittedIdentifier = true;
                        }
-                       InternalGetBytes (chars, index, count,
-                                         null, 0,
-                                         this.FallbackBuffer,
-                                         out charsProcessed, out bytesProcessed,
-                                         ref leftChar);
-                       if (flush)
-                               leftChar = 0;
+                       InternalGetBytesCount (
+                                       chars, index, count,
+                                       this.FallbackBuffer,
+                                       out charsProcessed, out bytesProcessed,
+                                       ref leftChar,
+                                       flush);
                        return bytesProcessed + preambleSize;
                }
 
@@ -1022,13 +1217,14 @@ again:
                                emittedIdentifier = true;
                                byteCount -= 3;
                        }
-                       InternalGetBytes (chars, charCount,
-                                         bytes, byteCount,
-                                         this.FallbackBuffer,
-                                         out charsProcessed, out bytesProcessed,
-                                         ref leftChar);
-                       if (flush)
-                               leftChar = 0;
+                       if (InternalGetBytesEncode (
+                                       chars, charCount,
+                                       bytes, byteCount,
+                                       this.FallbackBuffer,
+                                       out charsProcessed, out bytesProcessed,
+                                       ref leftChar,
+                                       flush) == EncoderStatus.InsufficientSpace)
+                               throw new ArgumentException ("Insufficient Space", "bytes");
                        return bytesProcessed + preambleSize;
                }
 
@@ -1046,13 +1242,14 @@ again:
                                preambleSize = 3;
                                emittedIdentifier = true;
                        }
-                       InternalGetBytes (chars, charIndex, charCount,
-                                         bytes, byteIndex,
-                                         this.FallbackBuffer,
-                                         out charsProcessed, out bytesProcessed,
-                                         ref leftChar);
-                       if (flush)
-                               leftChar = 0;
+                       if (InternalGetBytesEncode (
+                                       chars, charIndex, charCount,
+                                       bytes, byteIndex,
+                                       this.FallbackBuffer,
+                                       out charsProcessed, out bytesProcessed,
+                                       ref leftChar,
+                                       flush) == EncoderStatus.InsufficientSpace)
+                               throw new ArgumentException ("Insufficient Space", "bytes");
                        return bytesProcessed + preambleSize;
                }
 
@@ -1069,36 +1266,28 @@ again:
                        out int charsUsed, out int bytesUsed, out bool completed)
                {
                        int preambleSize = 0;
-                       if (bytes == null)
-                               throw new ArgumentNullException ("bytes");
-                       if (byteCount < 0)
-                               throw new IndexOutOfRangeException ("charCount");
-                       if (chars == null)
-                               throw new ArgumentNullException ("chars");
-                       if (charCount < 0)
-                               throw new IndexOutOfRangeException ("charCount");
                        if (emitIdentifier && !emittedIdentifier) {
-                               if (byteCount < 3)
-                                       throw new ArgumentException ("Insufficient Space", "UTF8 preamble");
-                               *bytes++ = 0xEF;
-                               *bytes++ = 0xBB;
-                               *bytes++ = 0xBF;
-                               preambleSize = 3;
-                               emittedIdentifier = true;
-                               byteCount -= 3;
+                               if (bytes != null && byteCount >= 3)
+                               {
+                                       *bytes++ = 0xEF;
+                                       *bytes++ = 0xBB;
+                                       *bytes++ = 0xBF;
+                                       preambleSize = 3;
+                                       emittedIdentifier = true;
+                                       byteCount -= 3;
+                               }
                        }
-                       InternalGetBytes (
+                       InternalGetBytesEncode (
                                        chars, charCount,
                                        bytes, byteCount,
                                        this.FallbackBuffer,
                                        out charsUsed, out bytesUsed,
-                                       ref leftChar);
+                                       ref leftChar,
+                                       flush);
                        // only completed if all chars have been processed and
                        // succesful converted to chars!!
                        completed = (charCount == charsUsed);
                        bytesUsed += preambleSize;
-                       if (flush)
-                               leftChar = 0;
                }
        } // class UTF8Encoder