[Serializable]
[MonoLimitation ("Serialization format not compatible with .NET")]
-[MonoLimitation ("EncoderFallback is not handled")]
[ComVisible (true)]
public class UTF8Encoding : Encoding
{
{
emitIdentifier = encoderShouldEmitUTF8Identifier;
if (throwOnInvalidBytes)
- SetFallbackInternal (null, DecoderFallback.ExceptionFallback);
+ SetFallbackInternal (EncoderFallback.ExceptionFallback, DecoderFallback.ExceptionFallback);
else
- SetFallbackInternal (null, DecoderFallback.StandardSafeFallback);
+ SetFallbackInternal (EncoderFallback.StandardSafeFallback, DecoderFallback.StandardSafeFallback);
web_name = body_name = header_name = "utf-8";
encoding_name = "Unicode (UTF-8)";
// Internal version of "GetByteCount" which can handle a rolling
// state between multiple calls to this method.
- private static int InternalGetByteCount (char[] chars, int index, int count, ref char leftOver, bool flush)
+ private static int InternalGetByteCount (char[] chars, int index, int count, EncoderFallback fallback, ref char leftOver, bool flush)
{
// Validate the parameters.
if (chars == null) {
unsafe {
fixed (char* cptr = chars) {
- return InternalGetByteCount (cptr + index, count, ref leftOver, flush);
+ return InternalGetByteCount (cptr + index, count, fallback, ref leftOver, flush);
}
}
}
- private unsafe static int InternalGetByteCount (char* chars, int count, ref char leftOver, bool flush)
+ private unsafe static int InternalGetByteCount (char* chars, int count, EncoderFallback fallback, ref char leftOver, bool flush)
{
int length = 0;
char* end = chars + count;
+ char* start = chars;
+ EncoderFallbackBuffer buffer = null;
while (chars < end) {
if (leftOver == 0) {
for (; chars < end; chars++) {
break;
} else {
// We have a surrogate tail without
- // leading surrogate. In NET_2_0 it
- // uses fallback. In NET_1_1 we output
- // wrong surrogate.
- length += 3;
+ // leading surrogate.
+ char [] fallback_chars = GetFallbackChars (chars, start, fallback, ref buffer);
+ fixed (char *fb_chars = fallback_chars) {
+ char dummy = '\0';
+ length += InternalGetByteCount (fb_chars, fallback_chars.Length, fallback, ref dummy, true);
+ }
+
leftOver = '\0';
}
}
// invalid, but we have to do something.
// We write out the surrogate start and then
// re-visit the current character again.
- length += 3;
+ char [] fallback_chars = GetFallbackChars (chars, start, fallback, ref buffer);
+ fixed (char *fb_chars = fallback_chars) {
+ char dummy = '\0';
+ length += InternalGetByteCount (fb_chars, fallback_chars.Length, fallback, ref dummy, true);
+ }
}
leftOver = '\0';
}
return length;
}
+ unsafe static char [] GetFallbackChars (char *chars, char *start, EncoderFallback fallback, ref EncoderFallbackBuffer buffer)
+ {
+ if (buffer == null)
+ buffer = fallback.CreateFallbackBuffer ();
+
+ buffer.Fallback (*chars, (int) (chars - start));
+
+ char [] fallback_chars = new char [buffer.Remaining];
+ for (int i = 0; i < fallback_chars.Length; i++)
+ fallback_chars [i] = buffer.GetNextChar ();
+
+ buffer.Reset ();
+
+ return fallback_chars;
+ }
+
// Get the number of bytes needed to encode a character buffer.
public override int GetByteCount (char[] chars, int index, int count)
{
char dummy = '\0';
- return InternalGetByteCount (chars, index, count, ref dummy, true);
+ return InternalGetByteCount (chars, index, count, EncoderFallback, ref dummy, true);
}
if (count == 0)
return 0;
char dummy = '\0';
- return InternalGetByteCount (chars, count, ref dummy, true);
+ return InternalGetByteCount (chars, count, EncoderFallback, ref dummy, true);
}
#endregion
// state between multiple calls to this method.
private static int InternalGetBytes (char[] chars, int charIndex,
int charCount, byte[] bytes,
- int byteIndex, ref char leftOver,
- bool flush)
+ int byteIndex,
+ EncoderFallback fallback, ref EncoderFallbackBuffer buffer,
+ ref char leftOver, bool flush)
{
// Validate the parameters.
if (chars == null) {
if (bytes.Length == byteIndex)
return InternalGetBytes (
cptr + charIndex, charCount,
- null, 0, ref leftOver, flush);
+ null, 0, fallback, ref buffer, ref leftOver, flush);
fixed (byte *bptr = bytes) {
return InternalGetBytes (
cptr + charIndex, charCount,
bptr + byteIndex, bytes.Length - byteIndex,
+ fallback, ref buffer,
ref leftOver, flush);
}
}
}
}
- private unsafe static int InternalGetBytes (char* chars, int count, byte* bytes, int bcount, ref char leftOver, bool flush)
+ private unsafe static int InternalGetBytes (char* chars, int count, byte* bytes, int bcount, EncoderFallback fallback, ref EncoderFallbackBuffer buffer, ref char leftOver, bool flush)
{
char* end = chars + count;
+ char* start = chars;
+ byte* start_bytes = bytes;
byte* end_bytes = bytes + bcount;
while (chars < end) {
if (leftOver == 0) {
break;
} else {
// We have a surrogate tail without
- // leading surrogate. In NET_2_0 it
- // uses fallback. In NET_1_1 we output
- // wrong surrogate.
- if (bytes + 2 >= end_bytes)
+ // leading surrogate.
+ char [] fallback_chars = GetFallbackChars (chars, start, fallback, ref buffer);
+ char dummy = '\0';
+ if (bytes + InternalGetByteCount (fallback_chars, 0, fallback_chars.Length, fallback, ref dummy, true) > end_bytes)
goto fail_no_space;
- bytes [0] = (byte) (0xE0 | (ch >> 12));
- bytes [1] = (byte) (0x80 | ((ch >> 6) & 0x3F));
- bytes [2] = (byte) (0x80 | (ch & 0x3F));
- bytes += 3;
+ fixed (char *fb_chars = fallback_chars) {
+ bytes += InternalGetBytes (fb_chars, fallback_chars.Length, bytes, bcount - (int) (bytes - start_bytes), fallback, ref buffer, ref dummy, true);
+ }
+
leftOver = '\0';
}
}
// invalid, but we have to do something.
// We write out the surrogate start and then
// re-visit the current character again.
- int ch = leftOver;
- if (bytes + 2 >= end_bytes)
+ char [] fallback_chars = GetFallbackChars (chars, start, fallback, ref buffer);
+ char dummy = '\0';
+ if (bytes + InternalGetByteCount (fallback_chars, 0, fallback_chars.Length, fallback, ref dummy, true) > end_bytes)
goto fail_no_space;
- bytes [0] = (byte) (0xE0 | (ch >> 12));
- bytes [1] = (byte) (0x80 | ((ch >> 6) & 0x3F));
- bytes [2] = (byte) (0x80 | (ch & 0x3F));
- bytes += 3;
+ fixed (char *fb_chars = fallback_chars) {
+ InternalGetBytes (fb_chars, fallback_chars.Length, bytes, bcount - (int) (bytes - start_bytes), fallback, ref buffer, ref dummy, true);
+ }
+
+ leftOver = '\0';
}
leftOver = '\0';
}
byte[] bytes, int byteIndex)
{
char leftOver = '\0';
- return InternalGetBytes (chars, charIndex, charCount, bytes, byteIndex, ref leftOver, true);
+ EncoderFallbackBuffer buffer = null;
+ return InternalGetBytes (chars, charIndex, charCount, bytes, byteIndex, EncoderFallback, ref buffer, ref leftOver, true);
}
// Convenience wrappers for "GetBytes".
unsafe {
fixed (char* cptr = s) {
char dummy = '\0';
+ EncoderFallbackBuffer buffer = null;
if (bytes.Length == byteIndex)
return InternalGetBytes (
cptr + charIndex, charCount,
- null, 0, ref dummy, true);
+ null, 0, EncoderFallback, ref buffer, ref dummy, true);
fixed (byte *bptr = bytes) {
return InternalGetBytes (
cptr + charIndex, charCount,
bptr + byteIndex, bytes.Length - byteIndex,
+ EncoderFallback, ref buffer,
ref dummy, true);
}
}
return 0;
char dummy = '\0';
+ EncoderFallbackBuffer buffer = null;
if (byteCount == 0)
- return InternalGetBytes (chars, charCount, null, 0, ref dummy, true);
+ return InternalGetBytes (chars, charCount, null, 0, EncoderFallback, ref buffer, ref dummy, true);
else
- return InternalGetBytes (chars, charCount, bytes, byteCount, ref dummy, true);
+ return InternalGetBytes (chars, charCount, bytes, byteCount, EncoderFallback, ref buffer, ref dummy, true);
}
#endregion
throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
}
- if (charIndex == chars.Length)
+ if (charIndex == chars.Length && byteCount == 0)
return 0;
fixed (char* cptr = chars) {
if (leftOverCount == 0) {
int end = byteIndex + byteCount;
for (; byteIndex < end; posn++, byteIndex++, byteCount--) {
- if (bytes [byteIndex] < 0x80)
+ if (bytes [byteIndex] < 0x80) {
+ if (posn >= length) {
+ throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
+ }
chars [posn] = (char) bytes [byteIndex];
- else
+ } else {
break;
+ }
}
}
if (charCount < 0) {
throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
}
- return charCount * 4;
+
+ // Add 1 to charCount since there may be a lead surrogate left from the previous call to GetBytes/Encoder.Convert
+ charCount = charCount + 1;
+ if (EncoderFallback.MaxCharCount > 1) {
+ charCount = charCount * EncoderFallback.MaxCharCount;
+ }
+
+ return charCount * 3;
}
// Get the maximum number of characters needed to decode a
if (byteCount < 0) {
throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_NonNegative"));
}
- return byteCount;
+
+ // Add 1 to byteCount since there may be the bytes from part of a surrogate pair left from the previous call to GetChars/Decoder.Convert
+ int maxCharCount = byteCount + 1;
+ if (DecoderFallback.MaxCharCount > 1) {
+ maxCharCount = maxCharCount * DecoderFallback.MaxCharCount;
+ }
+
+ return maxCharCount;
}
// Get a UTF8-specific decoder that is attached to this instance.
// Get a UTF8-specific encoder that is attached to this instance.
public override Encoder GetEncoder ()
{
- return new UTF8Encoder (emitIdentifier);
+ return new UTF8Encoder (EncoderFallback, emitIdentifier);
}
// Get the UTF8 preamble.
if (emitIdentifier)
return new byte [] { 0xEF, 0xBB, 0xBF };
- return new byte [0];
+ return EmptyArray<byte>.Value;
}
// Determine if this object is equal to another.
private char leftOverForConv;
// Constructor.
- public UTF8Encoder (bool emitIdentifier)
+ public UTF8Encoder (EncoderFallback fallback, bool emitIdentifier)
{
+ Fallback = fallback;
// this.emitIdentifier = emitIdentifier;
leftOverForCount = '\0';
leftOverForConv = '\0';
public override int GetByteCount (char[] chars, int index,
int count, bool flush)
{
- return InternalGetByteCount (chars, index, count, ref leftOverForCount, flush);
+ return InternalGetByteCount (chars, index, count, Fallback, ref leftOverForCount, flush);
}
public override int GetBytes (char[] chars, int charIndex,
int charCount, byte[] bytes, int byteIndex, bool flush)
{
int result;
- result = InternalGetBytes (chars, charIndex, charCount, bytes, byteIndex, ref leftOverForConv, flush);
+ EncoderFallbackBuffer buffer = null;
+ result = InternalGetBytes (chars, charIndex, charCount, bytes, byteIndex, Fallback, ref buffer, ref leftOverForConv, flush);
// emitIdentifier = false;
return result;
}
public unsafe override int GetByteCount (char* chars, int count, bool flush)
{
- return InternalGetByteCount (chars, count, ref leftOverForCount, flush);
+ return InternalGetByteCount (chars, count, Fallback, ref leftOverForCount, flush);
}
public unsafe override int GetBytes (char* chars, int charCount,
byte* bytes, int byteCount, bool flush)
{
int result;
- result = InternalGetBytes (chars, charCount, bytes, byteCount, ref leftOverForConv, flush);
+ EncoderFallbackBuffer buffer = null;
+ result = InternalGetBytes (chars, charCount, bytes, byteCount, Fallback, ref buffer, ref leftOverForConv, flush);
// emitIdentifier = false;
return result;
}