X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=mcs%2Fclass%2Fcorlib%2FSystem.Text%2FUTF8Encoding.cs;h=2c00c72eeaf43682588adfc19c1a40b9b3c86417;hb=7e18ed47c9606f3981e7b18cbc238d6781843153;hp=af8062ed660349512fbe77c7242cd6a8309bb611;hpb=7ff8f29ff29fa3f08ef305ac43ef079097323286;p=mono.git diff --git a/mcs/class/corlib/System.Text/UTF8Encoding.cs b/mcs/class/corlib/System.Text/UTF8Encoding.cs old mode 100755 new mode 100644 index af8062ed660..2c00c72eeaf --- a/mcs/class/corlib/System.Text/UTF8Encoding.cs +++ b/mcs/class/corlib/System.Text/UTF8Encoding.cs @@ -30,6 +30,9 @@ using System; [Serializable] [MonoTODO ("Fix serialization compatibility with MS.NET")] +#if NET_2_0 +[MonoTODO ("EncoderFallback is not handled")] +#endif public class UTF8Encoding : Encoding { // Magic number used by Windows for UTF-8. @@ -37,7 +40,9 @@ public class UTF8Encoding : Encoding // Internal state. private bool emitIdentifier; +#if !NET_2_0 private bool throwOnInvalid; +#endif // Constructors. public UTF8Encoding () : this (false, false) {} @@ -48,7 +53,14 @@ public class UTF8Encoding : Encoding : base (UTF8_CODE_PAGE) { emitIdentifier = encoderShouldEmitUTF8Identifier; +#if NET_2_0 + if (throwOnInvalidBytes) + SetFallbackInternal (null, new DecoderExceptionFallback ()); + else + SetFallbackInternal (null, new DecoderReplacementFallback (String.Empty)); +#else throwOnInvalid = throwOnInvalidBytes; +#endif web_name = body_name = header_name = "utf-8"; encoding_name = "Unicode (UTF-8)"; @@ -358,10 +370,17 @@ public class UTF8Encoding : Encoding // Internal version of "GetCharCount" which can handle a rolling // state between multiple calls to this method. - private static int InternalGetCharCount (byte[] bytes, int index, int count, - uint leftOverBits, - uint leftOverCount, - bool throwOnInvalid, bool flush) +#if NET_2_0 + // Internal version of "GetCharCount" which can handle a rolling + // state between multiple calls to this method. + private static int InternalGetCharCount ( + byte[] bytes, int index, int count, uint leftOverBits, + uint leftOverCount, DecoderFallbackBuffer fallbackBuffer, bool flush) +#else + private static int InternalGetCharCount ( + byte[] bytes, int index, int count, uint leftOverBits, + uint leftOverCount, bool throwOnInvalid, bool flush) +#endif { // Validate the parameters. if (bytes == null) { @@ -408,16 +427,19 @@ public class UTF8Encoding : Encoding leftBits = (ch & (uint)0x03); leftSoFar = 1; leftSize = 5; - } else if ((ch & (uint)0xFC) == (uint)0xFC) { + } else if ((ch & (uint)0xFE) == (uint)0xFC) { // Six-byte UTF-8 character. leftBits = (ch & (uint)0x03); leftSoFar = 1; leftSize = 6; } else { // Invalid UTF-8 start character. - if (throwOnInvalid) { +#if NET_2_0 + length += Fallback (fallbackBuffer, bytes, index - 1); +#else + if (throwOnInvalid) throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes"); - } +#endif } } else { // Process an extra byte in a multi-byte sequence. @@ -426,73 +448,116 @@ public class UTF8Encoding : Encoding if (++leftSoFar >= leftSize) { // We have a complete character now. if (leftBits < (uint)0x10000) { - if (leftBits != (uint)0xFEFF) { - // is it an overlong ? - bool overlong = false; - switch (leftSize) { - case 2: - overlong = (leftBits <= 0x7F); - break; - case 3: - overlong = (leftBits <= 0x07FF); - break; - case 4: - overlong = (leftBits <= 0xFFFF); - break; - case 5: - overlong = (leftBits <= 0x1FFFFF); - break; - case 6: - overlong = (leftBits <= 0x03FFFFFF); - break; - } - if (overlong) { - if (throwOnInvalid) - throw new ArgumentException (_("Overlong"), leftBits.ToString ()); - } - else - ++length; + // is it an overlong ? + bool overlong = false; + switch (leftSize) { + case 2: + overlong = (leftBits <= 0x7F); + break; + case 3: + overlong = (leftBits <= 0x07FF); + break; + case 4: + overlong = (leftBits <= 0xFFFF); + break; + case 5: + overlong = (leftBits <= 0x1FFFFF); + break; + case 6: + overlong = (leftBits <= 0x03FFFFFF); + break; + } + if (overlong) { +#if NET_2_0 + length += Fallback (fallbackBuffer, bytes, index - 1); +#else + if (throwOnInvalid) + throw new ArgumentException (_("Overlong"), leftBits.ToString ()); +#endif } + else + ++length; } else if (leftBits < (uint)0x110000) { length += 2; - } else if (throwOnInvalid) { - throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes"); + } else { +#if NET_2_0 + length += Fallback (fallbackBuffer, bytes, index - 1); +#else + if (throwOnInvalid) + throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes"); +#endif } leftSize = 0; } } else { // Invalid UTF-8 sequence: clear and restart. - if (throwOnInvalid) { +#if NET_2_0 + length += Fallback (fallbackBuffer, bytes, index - 1); +#else + if (throwOnInvalid) throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes"); - } +#endif leftSize = 0; --index; ++count; } } } - if (flush && leftSize != 0 && throwOnInvalid) { + if (flush && leftSize != 0) { // We had left-over bytes that didn't make up // a complete UTF-8 character sequence. - throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes"); +#if NET_2_0 + length += Fallback (fallbackBuffer, bytes, index - 1); +#else + if (throwOnInvalid) + throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes"); +#endif } // Return the final length to the caller. return length; } +#if NET_2_0 + // for GetCharCount() + static int Fallback (DecoderFallbackBuffer buffer, byte [] bytes, int index) + { + buffer.Fallback (bytes, index - 1); + return buffer.Remaining; + } + + // for GetChars() + static void Fallback (DecoderFallbackBuffer buffer, byte [] bytes, int byteIndex, + char [] chars, ref int charIndex) + { + buffer.Fallback (bytes, byteIndex - 1); + while (buffer.Remaining > 0) + chars [charIndex++] = buffer.GetNextChar (); + } +#endif + // Get the number of characters needed to decode a byte buffer. public override int GetCharCount (byte[] bytes, int index, int count) { +#if NET_2_0 + return InternalGetCharCount (bytes, index, count, 0, 0, DecoderFallback.CreateFallbackBuffer (), true); +#else return InternalGetCharCount (bytes, index, count, 0, 0, throwOnInvalid, true); +#endif } // Get the characters that result from decoding a byte buffer. - private static int InternalGetChars (byte[] bytes, int byteIndex, - int byteCount, char[] chars, - int charIndex, ref uint leftOverBits, - ref uint leftOverCount, - bool throwOnInvalid, bool flush) +#if NET_2_0 + private static int InternalGetChars ( + byte[] bytes, int byteIndex, int byteCount, char[] chars, + int charIndex, ref uint leftOverBits, ref uint leftOverCount, + DecoderFallbackBuffer fallbackBuffer, bool flush) +#else + private static int InternalGetChars ( + byte[] bytes, int byteIndex, int byteCount, char[] chars, + int charIndex, ref uint leftOverBits, ref uint leftOverCount, + bool throwOnInvalid, bool flush) +#endif { // Validate the parameters. if (bytes == null) { @@ -553,16 +618,19 @@ public class UTF8Encoding : Encoding leftBits = (ch & (uint)0x03); leftSoFar = 1; leftSize = 5; - } else if ((ch & (uint)0xFC) == (uint)0xFC) { + } else if ((ch & (uint)0xFE) == (uint)0xFC) { // Six-byte UTF-8 character. leftBits = (ch & (uint)0x03); leftSoFar = 1; leftSize = 6; } else { // Invalid UTF-8 start character. - if (throwOnInvalid) { +#if NET_2_0 + Fallback (fallbackBuffer, bytes, byteIndex, chars, ref posn); +#else + if (throwOnInvalid) throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes"); - } +#endif } } else { // Process an extra byte in a multi-byte sequence. @@ -571,37 +639,39 @@ public class UTF8Encoding : Encoding if (++leftSoFar >= leftSize) { // We have a complete character now. if (leftBits < (uint)0x10000) { - if (leftBits != (uint)0xFEFF) { - // is it an overlong ? - bool overlong = false; - switch (leftSize) { - case 2: - overlong = (leftBits <= 0x7F); - break; - case 3: - overlong = (leftBits <= 0x07FF); - break; - case 4: - overlong = (leftBits <= 0xFFFF); - break; - case 5: - overlong = (leftBits <= 0x1FFFFF); - break; - case 6: - overlong = (leftBits <= 0x03FFFFFF); - break; - } - if (overlong) { - if (throwOnInvalid) - throw new ArgumentException (_("Overlong"), leftBits.ToString ()); - } - else { - if (posn >= length) { - throw new ArgumentException - (_("Arg_InsufficientSpace"), "chars"); - } - chars[posn++] = (char)leftBits; + // is it an overlong ? + bool overlong = false; + switch (leftSize) { + case 2: + overlong = (leftBits <= 0x7F); + break; + case 3: + overlong = (leftBits <= 0x07FF); + break; + case 4: + overlong = (leftBits <= 0xFFFF); + break; + case 5: + overlong = (leftBits <= 0x1FFFFF); + break; + case 6: + overlong = (leftBits <= 0x03FFFFFF); + break; + } + if (overlong) { +#if NET_2_0 + Fallback (fallbackBuffer, bytes, byteIndex, chars, ref posn); +#else + if (throwOnInvalid) + throw new ArgumentException (_("Overlong"), leftBits.ToString ()); +#endif + } + else { + if (posn >= length) { + throw new ArgumentException + (_("Arg_InsufficientSpace"), "chars"); } + chars[posn++] = (char)leftBits; } } else if (leftBits < (uint)0x110000) { if ((posn + 2) > length) { @@ -613,26 +683,39 @@ public class UTF8Encoding : Encoding (uint)0xD800); chars[posn++] = (char)((leftBits & (uint)0x3FF) + (uint)0xDC00); - } else if (throwOnInvalid) { - throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes"); + } else { +#if NET_2_0 + Fallback (fallbackBuffer, bytes, byteIndex, chars, ref posn); +#else + if (throwOnInvalid) + throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes"); +#endif } leftSize = 0; } } else { // Invalid UTF-8 sequence: clear and restart. - if (throwOnInvalid) { +#if NET_2_0 + Fallback (fallbackBuffer, bytes, byteIndex, chars, ref posn); +#else + if (throwOnInvalid) throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes"); - } +#endif leftSize = 0; --byteIndex; ++byteCount; } } } - if (flush && leftSize != 0 && throwOnInvalid) { + if (flush && leftSize != 0) { // We had left-over bytes that didn't make up // a complete UTF-8 character sequence. - throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes"); +#if NET_2_0 + Fallback (fallbackBuffer, bytes, byteIndex, chars, ref posn); +#else + if (throwOnInvalid) + throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes"); +#endif } leftOverBits = leftBits; leftOverCount = (leftSoFar | (leftSize << 4)); @@ -647,8 +730,13 @@ public class UTF8Encoding : Encoding { uint leftOverBits = 0; uint leftOverCount = 0; +#if NET_2_0 + return InternalGetChars (bytes, byteIndex, byteCount, chars, + charIndex, ref leftOverBits, ref leftOverCount, DecoderFallback.CreateFallbackBuffer (), true); +#else return InternalGetChars (bytes, byteIndex, byteCount, chars, charIndex, ref leftOverBits, ref leftOverCount, throwOnInvalid, true); +#endif } // Get the maximum number of bytes needed to encode a @@ -674,7 +762,11 @@ public class UTF8Encoding : Encoding // Get a UTF8-specific decoder that is attached to this instance. public override Decoder GetDecoder () { +#if NET_2_0 + return new UTF8Decoder (DecoderFallback); +#else return new UTF8Decoder (throwOnInvalid); +#endif } // Get a UTF8-specific encoder that is attached to this instance. @@ -702,9 +794,16 @@ public class UTF8Encoding : Encoding { UTF8Encoding enc = (value as UTF8Encoding); if (enc != null) { +#if NET_2_0 + return (codePage == enc.codePage && + emitIdentifier == enc.emitIdentifier && + DecoderFallback == enc.DecoderFallback && + EncoderFallback == enc.EncoderFallback); +#else return (codePage == enc.codePage && emitIdentifier == enc.emitIdentifier && throwOnInvalid == enc.throwOnInvalid); +#endif } else { return false; } @@ -731,14 +830,24 @@ public class UTF8Encoding : Encoding [Serializable] private class UTF8Decoder : Decoder { +#if !NET_2_0 private bool throwOnInvalid; +#endif private uint leftOverBits; private uint leftOverCount; // Constructor. +#if NET_2_0 + public UTF8Decoder (DecoderFallback fallback) +#else public UTF8Decoder (bool throwOnInvalid) +#endif { +#if NET_2_0 + Fallback = fallback; +#else this.throwOnInvalid = throwOnInvalid; +#endif leftOverBits = 0; leftOverCount = 0; } @@ -746,14 +855,24 @@ public class UTF8Encoding : Encoding // Override inherited methods. public override int GetCharCount (byte[] bytes, int index, int count) { +#if NET_2_0 + return InternalGetCharCount (bytes, index, count, + leftOverBits, leftOverCount, FallbackBuffer, false); +#else return InternalGetCharCount (bytes, index, count, leftOverBits, leftOverCount, throwOnInvalid, false); +#endif } public override int GetChars (byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex) { +#if NET_2_0 + return InternalGetChars (bytes, byteIndex, byteCount, + chars, charIndex, ref leftOverBits, ref leftOverCount, FallbackBuffer, false); +#else return InternalGetChars (bytes, byteIndex, byteCount, chars, charIndex, ref leftOverBits, ref leftOverCount, throwOnInvalid, false); +#endif } } // class UTF8Decoder