if (throwOnInvalidBytes)
SetFallbackInternal (null, new DecoderExceptionFallback ());
else
- SetFallbackInternal (null, new DecoderReplacementFallback (String.Empty));
+ SetFallbackInternal (null, new DecoderReplacementFallback ("\uFFFD"));
#else
throwOnInvalid = throwOnInvalidBytes;
#endif
is_browser_save = true;
is_browser_display = true;
is_mail_news_display = true;
+ is_mail_news_save = true;
windows_code_page = UnicodeEncoding.UNICODE_CODE_PAGE;
}
}
}
-
private unsafe static int InternalGetByteCount (char* chars, int count, ref char leftOver, bool flush)
{
- int index = 0;
-
- // Determine the lengths of all characters.
- char ch;
int length = 0;
- char pair = leftOver;
- while (count > 0) {
- ch = chars[index];
- if (pair == 0) {
- if (ch < '\u0080') {
- // fast path optimization
- int end = index + count;
- for (; index < end; index++, count--) {
- if (chars [index] < '\x80')
- ++length;
- else
- break;
+ char* end = chars + count;
+ while (chars < end) {
+ if (leftOver == 0) {
+ for (; chars < end; chars++) {
+ if (*chars < '\x80') {
+ ++length;
+ } else if (*chars < '\x800') {
+ length += 2;
+ } else if (*chars < '\uD800' || *chars > '\uDFFF') {
+ length += 3;
+ } else if (*chars <= '\uDBFF') {
+ // This is a surrogate start char, exit the inner loop only
+ // if we don't find the complete surrogate pair.
+ if (chars + 1 < end && chars [1] >= '\uDC00' && chars [1] <= '\uDFFF') {
+ length += 4;
+ chars++;
+ continue;
+ }
+ leftOver = *chars;
+ chars++;
+ break;
+ } else {
+ // We have a surrogate tail without
+ // leading surrogate. In NET_2_0 it
+ // uses fallback. In NET_1_1 we output
+ // wrong surrogate.
+ length += 3;
+ leftOver = '\0';
}
- continue;
- //length++;
- } else if (ch < '\u0800') {
- length += 2;
- } else if (ch >= '\uD800' && ch <= '\uDBFF') {
- // This is the start of a surrogate pair.
- pair = ch;
- } else {
- length += 3;
}
- } else if (ch >= '\uDC00' && ch <= '\uDFFF') {
- if (pair != 0) {
- // We have a surrogate pair.
+ } else {
+ if (*chars >= '\uDC00' && *chars <= '\uDFFF') {
+ // We have a correct surrogate pair.
length += 4;
- pair = '\0';
+ chars++;
} else {
- // We have a surrogate tail without
- // leading surrogate. In NET_2_0 it
- // uses fallback. In NET_1_1 we output
- // wrong surrogate.
+ // We have a surrogate start followed by a
+ // regular character. Technically, this is
+ // invalid, but we have to do something.
+ // We write out the surrogate start and then
+ // re-visit the current character again.
length += 3;
- pair = '\0';
}
- } else {
- // We have a surrogate start followed by a
- // regular character. Technically, this is
- // invalid, but we have to do something.
- // We write out the surrogate start and then
- // re-visit the current character again.
- length += 3;
- pair = '\0';
- continue;
+ leftOver = '\0';
}
- ++index;
- --count;
}
if (flush) {
- if (pair != '\0')
- // Flush the left-over surrogate pair start.
+ // Flush the left-over surrogate pair start.
+ if (leftOver != '\0') {
length += 3;
- leftOver = '\0';
+ leftOver = '\0';
+ }
}
- else
- leftOver = pair;
-
- // Return the final length to the caller.
return length;
}
#if !NET_2_0
// Convenience wrappers for "GetByteCount".
- public override int GetByteCount (String s)
+ public override int GetByteCount (String chars)
{
// Validate the parameters.
- if (s == null) {
- throw new ArgumentNullException ("s");
+ if (chars == null) {
+ throw new ArgumentNullException ("chars");
}
unsafe {
- fixed (char* cptr = s) {
+ fixed (char* cptr = chars) {
char dummy = '\0';
- return InternalGetByteCount (cptr, s.Length, ref dummy, true);
+ return InternalGetByteCount (cptr, chars.Length, ref dummy, true);
}
}
}
}
}
- private unsafe static int InternalGetBytes (char* chars, int charCount,
- byte* bytes, int byteCount,
- ref char leftOver, bool flush)
+ private unsafe static int InternalGetBytes (char* chars, int count, byte* bytes, int bcount, ref char leftOver, bool flush)
{
- int charIndex = 0;
- int byteIndex = 0;
-
- // Convert the characters into bytes.
- // Convert the characters into bytes.
- char ch;
- int length = byteCount;
- char pair = leftOver;
- int posn = byteIndex;
- int code = 0;
-
- while (charCount > 0) {
- // Fetch the next UTF-16 character pair value.
- ch = chars [charIndex];
- if (pair == '\0') {
- if (ch < '\uD800' || ch >= '\uE000') {
- if (ch < '\x80') { // fast path optimization
- int end = charIndex + charCount;
- for (; charIndex < end; posn++, charIndex++, charCount--) {
- if (chars [charIndex] < '\x80')
- bytes [posn] = (byte) chars [charIndex];
- else
- break;
- }
- continue;
+ char* end = chars + count;
+ byte* end_bytes = bytes + bcount;
+ while (chars < end) {
+ if (leftOver == 0) {
+ for (; chars < end; chars++) {
+ int ch = *chars;
+ if (ch < '\x80') {
+ if (bytes >= end_bytes)
+ goto fail_no_space;
+ *bytes++ = (byte)ch;
+ } else if (ch < '\x800') {
+ if (bytes + 1 >= end_bytes)
+ goto fail_no_space;
+ bytes [0] = (byte) (0xC0 | (ch >> 6));
+ bytes [1] = (byte) (0x80 | (ch & 0x3F));
+ bytes += 2;
+ } else if (ch < '\uD800' || ch > '\uDFFF') {
+ if (bytes + 2 >= end_bytes)
+ goto fail_no_space;
+ bytes [0] = (byte) (0xE0 | (ch >> 12));
+ bytes [1] = (byte) (0x80 | ((ch >> 6) & 0x3F));
+ bytes [2] = (byte) (0x80 | (ch & 0x3F));
+ bytes += 3;
+ } else if (ch <= '\uDBFF') {
+ // This is a surrogate char, exit the inner loop.
+ leftOver = *chars;
+ chars++;
+ break;
+ } else {
+ // We have a surrogate tail without
+ // leading surrogate. In NET_2_0 it
+ // uses fallback. In NET_1_1 we output
+ // wrong surrogate.
+ if (bytes + 2 >= end_bytes)
+ goto fail_no_space;
+ bytes [0] = (byte) (0xE0 | (ch >> 12));
+ bytes [1] = (byte) (0x80 | ((ch >> 6) & 0x3F));
+ bytes [2] = (byte) (0x80 | (ch & 0x3F));
+ bytes += 3;
+ leftOver = '\0';
}
- code = ch;
- }
- else if (ch < '\uDC00') {
- // surrogate start
- pair = ch;
- ++charIndex;
- --charCount;
- continue;
- } else { // ch <= '\uDFFF'
- // We have a surrogate tail without leading
- // surrogate. In NET_2_0 it uses fallback.
- // In NET_1_1 we output wrong surrogate.
- if (posn > length - 3) {
- throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
- }
- bytes [posn++] = (byte) (0xE0 | (ch >> 12));
- bytes [posn++] = (byte) (0x80 | ((ch >> 6) & 0x3F));
- bytes [posn++] = (byte) (0x80 | (ch & 0x3F));
- ++charIndex;
- --charCount;
- continue;
}
} else {
- if ('\uDC00' <= ch && ch <= '\uDFFF')
- code = 0x10000 + (int) ch - 0xDC00 +
- (((int) pair - 0xD800) << 10);
- else {
+ if (*chars >= '\uDC00' && *chars <= '\uDFFF') {
+ // We have a correct surrogate pair.
+ int ch = 0x10000 + (int) *chars - 0xDC00 + (((int) leftOver - 0xD800) << 10);
+ if (bytes + 3 >= end_bytes)
+ goto fail_no_space;
+ bytes [0] = (byte) (0xF0 | (ch >> 18));
+ bytes [1] = (byte) (0x80 | ((ch >> 12) & 0x3F));
+ bytes [2] = (byte) (0x80 | ((ch >> 6) & 0x3F));
+ bytes [3] = (byte) (0x80 | (ch & 0x3F));
+ bytes += 4;
+ chars++;
+ } else {
// We have a surrogate start followed by a
// regular character. Technically, this is
// invalid, but we have to do something.
// We write out the surrogate start and then
// re-visit the current character again.
- if (posn > length - 3) {
- throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
- }
- bytes [posn++] = (byte) (0xE0 | (pair >> 12));
- bytes [posn++] = (byte) (0x80 | ((pair >> 6) & 0x3F));
- bytes [posn++] = (byte) (0x80 | (pair & 0x3F));
- pair = '\0';
- continue;
+ int ch = leftOver;
+ if (bytes + 2 >= end_bytes)
+ goto fail_no_space;
+ bytes [0] = (byte) (0xE0 | (ch >> 12));
+ bytes [1] = (byte) (0x80 | ((ch >> 6) & 0x3F));
+ bytes [2] = (byte) (0x80 | (ch & 0x3F));
+ bytes += 3;
}
- pair = '\0';
- }
- ++charIndex;
- --charCount;
-
- // Encode the character pair value.
- if (code < 0x0080) {
- if (posn >= length)
- throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
- bytes [posn++] = (byte)code;
- } else if (code < 0x0800) {
- if ((posn + 2) > length)
- throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
- bytes [posn++] = (byte) (0xC0 | (code >> 6));
- bytes [posn++] = (byte) (0x80 | (code & 0x3F));
- } else if (code < 0x10000) {
- if (posn > length - 3)
- throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
- bytes [posn++] = (byte) (0xE0 | (code >> 12));
- bytes [posn++] = (byte) (0x80 | ((code >> 6) & 0x3F));
- bytes [posn++] = (byte) (0x80 | (code & 0x3F));
- } else {
- if (posn > length - 4)
- throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
- bytes [posn++] = (byte) (0xF0 | (code >> 18));
- bytes [posn++] = (byte) (0x80 | ((code >> 12) & 0x3F));
- bytes [posn++] = (byte) (0x80 | ((code >> 6) & 0x3F));
- bytes [posn++] = (byte) (0x80 | (code & 0x3F));
+ leftOver = '\0';
}
}
-
if (flush) {
- if (pair != '\0') {
- // Flush the left-over incomplete surrogate.
- if (posn > length - 3) {
- throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
+ // Flush the left-over surrogate pair start.
+ if (leftOver != '\0') {
+ int ch = leftOver;
+ if (bytes + 2 < end_bytes) {
+ bytes [0] = (byte) (0xE0 | (ch >> 12));
+ bytes [1] = (byte) (0x80 | ((ch >> 6) & 0x3F));
+ bytes [2] = (byte) (0x80 | (ch & 0x3F));
+ bytes += 3;
+ } else {
+ goto fail_no_space;
}
- bytes [posn++] = (byte) (0xE0 | (pair >> 12));
- bytes [posn++] = (byte) (0x80 | ((pair >> 6) & 0x3F));
- bytes [posn++] = (byte) (0x80 | (pair & 0x3F));
+ leftOver = '\0';
}
- leftOver = '\0';
}
- else
- leftOver = pair;
-Char.IsLetterOrDigit (pair);
-
- // Return the final count to the caller.
- return posn - byteIndex;
- }
-
- private unsafe int Fallback (byte* bytes, int byteCount, char lead, char tail)
- {
- throw new NotImplementedException ();
+ return (int)(bytes - (end_bytes - bcount));
+fail_no_space:
+ throw new ArgumentException ("Insufficient Space", "bytes");
}
// Get the bytes that result from encoding a character buffer.
} else {
// Invalid UTF-8 start character.
#if NET_2_0
- length += Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, index - 1);
+ length += Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, index - 1, 1);
#else
if (throwOnInvalid)
throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
}
if (overlong) {
#if NET_2_0
- length += Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, index - 1);
+ length += Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, index - leftSoFar, leftSoFar);
#else
if (throwOnInvalid)
throw new ArgumentException (_("Overlong"), leftBits.ToString ());
length += 2;
} else {
#if NET_2_0
- length += Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, index - 1);
+ length += Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, index - leftSoFar, leftSoFar);
#else
if (throwOnInvalid)
throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
} else {
// Invalid UTF-8 sequence: clear and restart.
#if NET_2_0
- length += Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, index - 1);
+ length += Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, index - leftSoFar, leftSoFar);
#else
if (throwOnInvalid)
throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
// We had left-over bytes that didn't make up
// a complete UTF-8 character sequence.
#if NET_2_0
- length += Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, index);
+ length += Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, index - leftSoFar, leftSoFar);
#else
if (throwOnInvalid)
throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
#if NET_2_0
// for GetCharCount()
- static unsafe int Fallback (object provider, ref DecoderFallbackBuffer buffer, ref byte [] bufferArg, byte* bytes, int index)
+ static unsafe int Fallback (object provider, ref DecoderFallbackBuffer buffer, ref byte [] bufferArg, byte* bytes, long index, uint size)
{
if (buffer == null) {
DecoderFallback fb = provider as DecoderFallback;
}
if (bufferArg == null)
bufferArg = new byte [1];
- bufferArg [0] = bytes [index];
- buffer.Fallback (bufferArg, 0);
- return buffer.Remaining;
+ int ret = 0;
+ for (int i = 0; i < size; i++) {
+ bufferArg [0] = bytes [(int) index + i];
+ buffer.Fallback (bufferArg, 0);
+ ret += buffer.Remaining;
+ buffer.Reset ();
+ }
+ return ret;
}
// for GetChars()
- static unsafe void Fallback (object provider, ref DecoderFallbackBuffer buffer, ref byte [] bufferArg, byte* bytes, int byteIndex,
+ static unsafe void Fallback (object provider, ref DecoderFallbackBuffer buffer, ref byte [] bufferArg, byte* bytes, long byteIndex, uint size,
char* chars, ref int charIndex)
{
if (buffer == null) {
}
if (bufferArg == null)
bufferArg = new byte [1];
- bufferArg [0] = bytes [byteIndex];
- buffer.Fallback (bufferArg, 0);
- while (buffer.Remaining > 0)
- chars [charIndex++] = buffer.GetNextChar ();
+ for (int i = 0; i < size; i++) {
+ bufferArg [0] = bytes [byteIndex + i];
+ buffer.Fallback (bufferArg, 0);
+ while (buffer.Remaining > 0)
+ chars [charIndex++] = buffer.GetNextChar ();
+ buffer.Reset ();
+ }
}
#endif
} else {
// Invalid UTF-8 start character.
#if NET_2_0
- Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, byteIndex, chars, ref posn);
+ Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, byteIndex, 1, chars, ref posn);
#else
if (throwOnInvalid)
throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
}
if (overlong) {
#if NET_2_0
- Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, byteIndex, chars, ref posn);
+ Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, byteIndex - leftSoFar, leftSoFar, chars, ref posn);
#else
if (throwOnInvalid)
throw new ArgumentException (_("Overlong"), leftBits.ToString ());
else if ((leftBits & 0xF800) == 0xD800) {
// UTF-8 doesn't use surrogate characters
#if NET_2_0
- Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, byteIndex, chars, ref posn);
+ Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, byteIndex - leftSoFar, leftSoFar, chars, ref posn);
#else
if (throwOnInvalid)
throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
(char)((leftBits & (uint)0x3FF) + (uint)0xDC00);
} else {
#if NET_2_0
- Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, byteIndex, chars, ref posn);
+ Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, byteIndex - leftSoFar, leftSoFar, chars, ref posn);
#else
if (throwOnInvalid)
throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
} else {
// Invalid UTF-8 sequence: clear and restart.
#if NET_2_0
- Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, byteIndex, chars, ref posn);
+ Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, byteIndex - leftSoFar, leftSoFar, chars, ref posn);
#else
if (throwOnInvalid)
throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
// We had left-over bytes that didn't make up
// a complete UTF-8 character sequence.
#if NET_2_0
- Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, byteIndex, chars, ref posn);
+ Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, byteIndex - leftSoFar, leftSoFar, chars, ref posn);
#else
if (throwOnInvalid)
throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
}
#if NET_2_0
- [MonoTODO]
- public override int GetByteCount (string s)
+ public override int GetByteCount (string chars)
{
// hmm, does this override make any sense?
- return base.GetByteCount (s);
+ return base.GetByteCount (chars);
}
- [MonoTODO]
[ComVisible (false)]
public override string GetString (byte [] bytes, int index, int count)
{
[Serializable]
private class UTF8Encoder : Encoder
{
- private bool emitIdentifier;
+// private bool emitIdentifier;
private char leftOverForCount;
private char leftOverForConv;
// Constructor.
public UTF8Encoder (bool emitIdentifier)
{
- this.emitIdentifier = emitIdentifier;
+// this.emitIdentifier = emitIdentifier;
leftOverForCount = '\0';
leftOverForConv = '\0';
}
{
int result;
result = InternalGetBytes (chars, charIndex, charCount, bytes, byteIndex, ref leftOverForConv, flush);
- emitIdentifier = false;
+// emitIdentifier = false;
return result;
}
{
int result;
result = InternalGetBytes (chars, charCount, bytes, byteCount, ref leftOverForConv, flush);
- emitIdentifier = false;
+// emitIdentifier = false;
return result;
}
#endif