using System.IO;
using System.Text;
-#if NET_2_0
using DecoderException = System.Text.DecoderFallbackException;
-#else
-using DecoderException = System.ArgumentException;
-#endif
using AssertType = NUnit.Framework.Assert;
}
[Test]
-#if NET_2_0
- [Category ("NotWorking")]
-#endif
public void TestMaxCharCount()
{
UTF8Encoding UTF8enc = new UTF8Encoding ();
-#if NET_2_0
- // hmm, where is this extra 1 coming from?
+ Encoding UTF8encWithBOM = new UTF8Encoding(true);
Assert.AreEqual (51, UTF8enc.GetMaxCharCount(50), "UTF #1");
-#else
- Assert.AreEqual (50, UTF8enc.GetMaxCharCount(50), "UTF #1");
-#endif
+ Assert.AreEqual (UTF8enc.GetMaxByteCount(50), UTF8encWithBOM.GetMaxByteCount(50), "UTF #2");
+ }
+
+ [Test]
+ public void TestMaxCharCountWithCustomFallback()
+ {
+ Encoding encoding = Encoding.GetEncoding("utf-8", new EncoderReplacementFallback("\u2047\u2047"), new DecoderReplacementFallback("\u2047\u2047"));
+ Assert.AreEqual (102, encoding.GetMaxCharCount(50), "UTF #1");
}
[Test]
-#if NET_2_0
- [Category ("NotWorking")]
-#endif
public void TestMaxByteCount()
{
UTF8Encoding UTF8enc = new UTF8Encoding ();
-#if NET_2_0
- // maybe under .NET 2.0 insufficient surrogate pair is
- // just not handled, and 3 is Preamble size.
+ Encoding UTF8encWithBOM = new UTF8Encoding(true);
+
Assert.AreEqual (153, UTF8enc.GetMaxByteCount(50), "UTF #1");
-#else
- Assert.AreEqual (200, UTF8enc.GetMaxByteCount(50), "UTF #1");
-#endif
+ Assert.AreEqual (UTF8enc.GetMaxByteCount(50), UTF8encWithBOM.GetMaxByteCount(50), "UTF #2");
+ }
+
+ [Test]
+ public void TestMaxByteCountWithCustomFallback()
+ {
+ Encoding encoding = Encoding.GetEncoding("utf-8", new EncoderReplacementFallback("\u2047\u2047"), new DecoderReplacementFallback("?"));
+ Assert.AreEqual (306, encoding.GetMaxByteCount(50), "UTF #1");
}
// regression for bug #59648
UTF8Encoding u = new UTF8Encoding (true, false);
byte[] data = new byte [] { 0xC0, 0xAF };
-#if NET_2_0
Assert.AreEqual (2, u.GetCharCount (data), "#A0");
string s = u.GetString (data);
Assert.AreEqual ("\uFFFD\uFFFD", s, "#A1");
-#else
- Assert.AreEqual (0, u.GetCharCount (data), "#A0");
- string s = u.GetString (data);
- Assert.AreEqual (String.Empty, s, "#A1");
-#endif
data = new byte [] { 0x30, 0x31, 0xC0, 0xAF, 0x30, 0x32 };
s = u.GetString (data);
-#if NET_2_0
Assert.AreEqual (6, s.Length, "#B1");
Assert.AreEqual (0x30, (int) s [0], "#B2");
Assert.AreEqual (0x31, (int) s [1], "#B3");
Assert.AreEqual (0xFFFD, (int) s [3], "#B5");
Assert.AreEqual (0x30, (int) s [4], "#B6");
Assert.AreEqual (0x32, (int) s [5], "#B7");
-#else
- Assert.AreEqual (4, s.Length, "#B1");
- Assert.AreEqual (0x30, (int) s [0], "#B2");
- Assert.AreEqual (0x31, (int) s [1], "#B3");
- Assert.AreEqual (0x30, (int) s [2], "#B4");
- Assert.AreEqual (0x32, (int) s [3], "#B5");
-#endif
}
// UTF8 decoding tests from http://www.cl.cam.ac.uk/~mgk25/
}
[Test]
-#if NET_2_0
[ExpectedException (typeof (DecoderFallbackException))]
-#else
-// MS Fx 1.1 accept this
- [Category ("NotDotNet")]
- [ExpectedException (typeof (DecoderException))]
-#endif
public void T5_IllegalCodePosition_1_UTF16Surrogates_511 ()
{
byte[] data = { 0xED, 0xA0, 0x80 };
}
[Test]
-#if NET_2_0
[ExpectedException (typeof (DecoderFallbackException))]
-#else
-// MS Fx 1.1 accept this
- [Category ("NotDotNet")]
- [ExpectedException (typeof (DecoderException))]
-#endif
public void T5_IllegalCodePosition_1_UTF16Surrogates_512 ()
{
byte[] data = { 0xED, 0xAD, 0xBF };
}
[Test]
-#if NET_2_0
[ExpectedException (typeof (DecoderFallbackException))]
-#else
-// MS Fx 1.1 accept this
- [Category ("NotDotNet")]
- [ExpectedException (typeof (DecoderException))]
-#endif
public void T5_IllegalCodePosition_1_UTF16Surrogates_513 ()
{
byte[] data = { 0xED, 0xAE, 0x80 };
}
[Test]
-#if NET_2_0
[ExpectedException (typeof (DecoderFallbackException))]
-#else
-// MS Fx 1.1 accept this
- [Category ("NotDotNet")]
- [ExpectedException (typeof (DecoderException))]
-#endif
public void T5_IllegalCodePosition_1_UTF16Surrogates_514 ()
{
byte[] data = { 0xED, 0xAF, 0xBF };
}
[Test]
-#if NET_2_0
[ExpectedException (typeof (DecoderFallbackException))]
-#else
-// MS Fx 1.1 accept this
- [Category ("NotDotNet")]
- [ExpectedException (typeof (DecoderException))]
-#endif
public void T5_IllegalCodePosition_1_UTF16Surrogates_515 ()
{
byte[] data = { 0xED, 0xB0, 0x80 };
}
[Test]
-#if NET_2_0
[ExpectedException (typeof (DecoderFallbackException))]
-#else
-// MS Fx 1.1 accept this
- [Category ("NotDotNet")]
- [ExpectedException (typeof (DecoderException))]
-#endif
public void T5_IllegalCodePosition_1_UTF16Surrogates_516 ()
{
byte[] data = { 0xED, 0xBE, 0x80 };
}
[Test]
-#if NET_2_0
[ExpectedException (typeof (DecoderFallbackException))]
-#else
-// MS Fx 1.1 accept this
- [Category ("NotDotNet")]
- [ExpectedException (typeof (DecoderException))]
-#endif
public void T5_IllegalCodePosition_1_UTF16Surrogates_517 ()
{
byte[] data = { 0xED, 0xBF, 0xBF };
}
[Test]
-#if NET_2_0
[ExpectedException (typeof (DecoderFallbackException))]
-#else
-// MS Fx 1.1 accept this
- [Category ("NotDotNet")]
- [ExpectedException (typeof (DecoderException))]
-#endif
public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_521 ()
{
byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80 };
}
[Test]
-#if NET_2_0
[ExpectedException (typeof (DecoderFallbackException))]
-#else
-// MS Fx 1.1 accept this
- [Category ("NotDotNet")]
- [ExpectedException (typeof (DecoderException))]
-#endif
public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_522 ()
{
byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xBF, 0xBF };
}
[Test]
-#if NET_2_0
[ExpectedException (typeof (DecoderFallbackException))]
-#else
-// MS Fx 1.1 accept this
- [Category ("NotDotNet")]
- [ExpectedException (typeof (DecoderException))]
-#endif
public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_523 ()
{
byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xB0, 0x80 };
}
[Test]
-#if NET_2_0
[ExpectedException (typeof (DecoderFallbackException))]
-#else
-// MS Fx 1.1 accept this
- [Category ("NotDotNet")]
- [ExpectedException (typeof (DecoderException))]
-#endif
public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_524 ()
{
byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xBF, 0xBF };
}
[Test]
-#if NET_2_0
[ExpectedException (typeof (DecoderFallbackException))]
-#else
-// MS Fx 1.1 accept this
- [Category ("NotDotNet")]
- [ExpectedException (typeof (DecoderException))]
-#endif
public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_525 ()
{
byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xB0, 0x80 };
}
[Test]
-#if NET_2_0
[ExpectedException (typeof (DecoderFallbackException))]
-#else
-// MS Fx 1.1 accept this
- [Category ("NotDotNet")]
- [ExpectedException (typeof (DecoderException))]
-#endif
public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_526 ()
{
byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xBF, 0x8F };
}
[Test]
-#if NET_2_0
[ExpectedException (typeof (DecoderFallbackException))]
-#else
-// MS Fx 1.1 accept this
- [Category ("NotDotNet")]
- [ExpectedException (typeof (DecoderException))]
-#endif
public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_527 ()
{
byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xB0, 0x80 };
}
[Test]
-#if NET_2_0
[ExpectedException (typeof (DecoderFallbackException))]
-#else
-// MS Fx 1.1 accept this
- [Category ("NotDotNet")]
- [ExpectedException (typeof (DecoderException))]
-#endif
public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_528 ()
{
byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xBF, 0xBF };
Assert.AreEqual ('A', chars [1], "#2");
}
-#if NET_2_0
[Test]
public void CloneNotReadOnly ()
{
Assert.AreEqual (false, e.IsReadOnly);
e.EncoderFallback = new EncoderExceptionFallback ();
}
-#endif
[Test]
-#if NET_2_0
[ExpectedException (typeof (DecoderFallbackException))]
-#else
- [ExpectedException (typeof (ArgumentException))]
- [Category ("NotDotNet")] // MS Bug
-#endif
public void Bug77315 ()
{
new UTF8Encoding (false, true).GetString (
e.GetBytes (chars, 0, 1, bytes, 0, false);
try {
int ret = e.GetBytes (chars, 1, 0, bytes, 0, true);
-#if NET_2_0
Assert.AreEqual (0, ret, "drop insufficient char in 2.0: char[]");
-#else
- Assert.Fail ("ArgumentException is expected: char[]");
-#endif
- } catch (ArgumentException ae) {
-#if ! NET_2_0
- throw ae;
-#endif
+ } catch (ArgumentException) {
}
string s = "\uD800";
try {
int ret = Encoding.UTF8.GetBytes (s, 0, 1, bytes, 0);
-#if NET_2_0
Assert.AreEqual (0, ret, "drop insufficient char in 2.0: string");
-#else
- Assert.Fail ("ArgumentException is expected: string");
-#endif
- } catch (ArgumentException ae) {
-#if ! NET_2_0
- throw ae;
-#endif
+ } catch (ArgumentException) {
}
}
}
}
-#if NET_2_0
[Test] // bug #77550
public void DecoderFallbackSimple ()
{
Encoding.UTF8.GetString(buf);
}
}
-#endif
[Test]
[ExpectedException (typeof (ArgumentException))]
Assert.AreEqual (0, charactersWritten, "#3");
}
- // DecoderFallbackExceptionTest
- // This struct describes a DecoderFallbackExceptions test. It
- // contains the expected indexes (eindex) and bad-bytes lengths
- // (elen) delivered by the first and subsequent
- // DecoderFallbackException throwed when the utf8 conversion routines
- // are exposed by the array of bytes (bytes) contained in this test.
- // It also has a nice description (description) for documentation and
- // debugging.
- //
- // The hardcoded 'eindex' and 'elen' info is the output that you will
- // got if you run this strings on the MS.NET platform.
- struct DecoderFallbackExceptionTest
- {
- public string description;
- public byte [] bytes;
- public int [] eindex;
- public int [] elen;
- public DecoderFallbackExceptionTest (
- string description,
- int [] eindex,
- int [] elen,
- byte [] bytes)
- {
- this.description = description;
- this.bytes = bytes;
- if (eindex.Length != elen.Length)
- throw new ApplicationException ("eindex.Length != elen.Length in test '" + description + "'");
- this.eindex = eindex;
- this.elen = elen;
+ [Test]
+ public void EncodingFallback ()
+ {
+ /* Legal UTF-8 Byte Sequences
+ * 1st 2nd 3rd 4th
+ * 00..7F
+ * C2..DF 80..BF
+ * E0 A0..BF 80..BF
+ * E1..EF 80..BF 80..BF
+ * F0 90..BF 80..BF 80..BF
+ * F1..F3 80..BF 80..BF 80..BF
+ * F4 80..8F 80..BF 80..BF
+ */
+
+ var t = new EncodingTester ("utf-8");
+ byte [] data;
+
+ // Invalid 1st byte
+ for (byte b = 0x80; b <= 0xC1; b++) {
+ data = new byte [] { b };
+ t.TestDecoderFallback (data, "?", new byte [] { b });
}
- }
- // try to convert the all current test's bytes with Getchars()
- // in only one step
- private void DecoderFallbackExceptions_GetChars (
- char [] chars,
- int testno,
- Decoder dec,
- DecoderFallbackExceptionTest t)
- {
- try {
- dec.GetChars (t.bytes, 0, t.bytes.Length, chars, 0, true);
- Assert.IsTrue (
- t.eindex.Length == 0,
- String.Format (
- "test#{0}-1: UNEXPECTED SUCCESS",
- testno));
- } catch(DecoderFallbackException ex) {
- Assert.IsTrue (
- t.eindex.Length > 0,
- String.Format (
- "test#{0}-1: UNEXPECTED FAIL",
- testno));
- Assert.IsTrue (
- ex.Index == t.eindex[0],
- String.Format (
- "test#{0}-1: Expected exception at {1} not {2}.",
- testno,
- t.eindex[0],
- ex.Index));
- Assert.IsTrue (
- ex.BytesUnknown.Length == t.elen[0],
- String.Format (
- "test#{0}-1: Expected BytesUnknown.Length of {1} not {2}.",
- testno,
- t.elen[0],
- ex.BytesUnknown.Length));
- for (int i = 0; i < ex.BytesUnknown.Length; i++)
- Assert.IsTrue (
- ex.BytesUnknown[i] == t.bytes[ex.Index + i],
- String.Format (
- "test#{0}-1: expected byte {1:X} not {2:X} at {3}.",
- testno,
- t.bytes[ex.Index + i],
- ex.BytesUnknown[i],
- ex.Index + i));
- dec.Reset ();
+ ///Invalid 2nd byte
+ // C2..DF 80..BF
+ for (byte b = 0xC2; b <= 0xDF; b++) {
+ data = new byte [] { b, 0x61 };
+ t.TestDecoderFallback (data, "?a", new byte [] { b });
}
- }
- // convert bytes to string using a fixed blocksize.
- // If something bad happens, try to recover using the
- // DecoderFallbackException info.
- private void DecoderFallbackExceptions_Convert (
- char [] chars,
- int testno,
- Decoder dec,
- DecoderFallbackExceptionTest t,
- int block_size)
- {
- int charsUsed, bytesUsed;
- bool completed;
-
- int ce = 0; // current exception
- for (int c = 0; c < t.bytes.Length; ) {
- try {
- int bu = c + block_size > t.bytes.Length
- ? t.bytes.Length - c
- : block_size;
- dec.Convert (
- t.bytes, c, bu,
- chars, 0, chars.Length,
- c + bu >= t.bytes.Length,
- out bytesUsed, out charsUsed,
- out completed);
- c += bytesUsed;
- } catch(DecoderFallbackException ex) {
- Assert.IsTrue (
- t.eindex.Length > ce,
- String.Format (
- "test#{0}-2-{1}#{2}: UNEXPECTED FAIL (c={3}, eIndex={4}, eBytesUnknwon={5})",
- testno, block_size, ce, c,
- ex.Index,
- ex.BytesUnknown.Length));
- Assert.IsTrue (
- ex.Index + c == t.eindex[ce],
- String.Format (
- "test#{0}-2-{1}#{2}: Expected at {3} not {4}.",
- testno, block_size, ce,
- t.eindex[ce],
- ex.Index + c));
- Assert.IsTrue (
- ex.BytesUnknown.Length == t.elen[ce],
- String.Format (
- "test#{0}-2-{1}#{2}: Expected BytesUnknown.Length of {3} not {4} @{5}.",
- testno, block_size, ce,
- t.elen[0], ex.BytesUnknown.Length, c));
- for (int i = 0; i < ex.BytesUnknown.Length; i++)
- Assert.IsTrue (
- ex.BytesUnknown[i] == t.bytes[ex.Index + i + c],
- String.Format (
- "test#{0}-2-{1}#{2}: Expected byte {3:X} not {4:X} at {5}.",
- testno, block_size, ce,
- t.bytes[ex.Index + i + c],
- ex.BytesUnknown[i],
- ex.Index + i));
- c += ex.BytesUnknown.Length + ex.Index;
- ce++;
- dec.Reset ();
- continue;
- }
+ // E0 A0..BF
+ data = new byte [] { 0xE0, 0x99};
+ t.TestDecoderFallback (data, "?", new byte [] { 0xE0, 0x99});
+
+ // E1..EF 80..BF
+ for (byte b = 0xE1; b <= 0xEF; b++) {
+ data = new byte [] { b, 0x61 };
+ t.TestDecoderFallback (data, "?a", new byte [] { b });
+ }
+
+ // F0 90..BF
+ data = new byte [] { 0xF0, 0x8F};
+ t.TestDecoderFallback (data, "?", new byte [] { 0xF0, 0x8F });
+
+ // F1..F4 80..XX
+ for (byte b = 0xF1; b <= 0xF4; b++) {
+ data = new byte [] { b, 0x61 };
+ t.TestDecoderFallback (data, "?a", new byte [] { b });
}
- Assert.IsTrue (
- t.eindex.Length <= ce,
- String.Format (
- "test#{0}-2-{1}: UNEXPECTED SUCCESS",
- testno, block_size));
- }
-
- [Test]
- public void DecoderFallbackExceptions ()
- {
-
- DecoderFallbackExceptionTest [] tests = new DecoderFallbackExceptionTest []
- {
- /* #1 */
- new DecoderFallbackExceptionTest (
- "Greek word 'kosme'",
- new int [] { },
- new int [] { },
- new byte [] {
- 0xce, 0xba, 0xe1, 0xbd, 0xb9, 0xcf,
- 0x83, 0xce, 0xbc, 0xce, 0xb5 }),
- /* #2 */
- new DecoderFallbackExceptionTest (
- "First possible sequence of 1 byte",
- new int [] { },
- new int [] { },
- new byte [] { 0x00 }),
- /* #3 */
- new DecoderFallbackExceptionTest (
- "First possible sequence of 2 bytes",
- new int [] { },
- new int [] { },
- new byte [] { 0xc2, 0x80 }),
- /* #4 */
- new DecoderFallbackExceptionTest (
- "First possible sequence of 3 bytes",
- new int [] { },
- new int [] { },
- new byte [] { 0xe0, 0xa0, 0x80 }),
- /* #5 */
- new DecoderFallbackExceptionTest (
- "First possible sequence of 4 bytes",
- new int [] { },
- new int [] { },
- new byte [] { 0xf0, 0x90, 0x80, 0x80 }),
- /* #6 */
- new DecoderFallbackExceptionTest (
- "First possible sequence of 5 bytes",
- new int [] { 0, 1, 2, 3, 4 },
- new int [] { 1, 1, 1, 1, 1 },
- new byte [] { 0xf8, 0x88, 0x80, 0x80, 0x80 }),
- /* #7 */
- new DecoderFallbackExceptionTest (
- "First possible sequence of 6 bytes",
- new int [] { 0, 1, 2, 3, 4, 5 },
- new int [] { 1, 1, 1, 1, 1, 1 },
- new byte [] {
- 0xfc, 0x84, 0x80, 0x80, 0x80, 0x80 }),
- /* #8 */
- new DecoderFallbackExceptionTest (
- "Last possible sequence of 1 byte",
- new int [] { },
- new int [] { },
- new byte [] { 0x7f }),
- /* #9 */
- new DecoderFallbackExceptionTest (
- "Last possible sequence of 2 bytes",
- new int [] { },
- new int [] { },
- new byte [] { 0xdf, 0xbf }),
- /* #10 */
- new DecoderFallbackExceptionTest (
- "Last possible sequence of 3 bytes",
- new int [] { },
- new int [] { },
- new byte [] { 0xef, 0xbf, 0xbf }),
- /* #11 */
- new DecoderFallbackExceptionTest (
- "Last possible sequence of 4 bytes",
- new int [] { 0, 1, 2, 3 },
- new int [] { 1, 1, 1, 1 },
- new byte [] { 0xf7, 0xbf, 0xbf, 0xbf }),
- /* #12 */
- new DecoderFallbackExceptionTest (
- "Last possible sequence of 5 bytes",
- new int [] { 0, 1, 2, 3, 4 },
- new int [] { 1, 1, 1, 1, 1 },
- new byte [] { 0xfb, 0xbf, 0xbf, 0xbf, 0xbf }),
- /* #13 */
- new DecoderFallbackExceptionTest (
- "Last possible sequence of 6 bytes",
- new int [] { 0, 1, 2, 3, 4, 5 },
- new int [] { 1, 1, 1, 1, 1, 1 },
- new byte [] { 0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf }),
- /* #14 */
- new DecoderFallbackExceptionTest (
- "U-0000D7FF = ed 9f bf",
- new int [] { },
- new int [] { },
- new byte [] { 0xed, 0x9f, 0xbf }),
- /* #15 */
- new DecoderFallbackExceptionTest (
- "U-0000E000 = ee 80 80",
- new int [] { },
- new int [] { },
- new byte [] { 0xee, 0x80, 0x80 }),
- /* #16 */
- new DecoderFallbackExceptionTest (
- "U-0000FFFD = ef bf bd",
- new int [] { },
- new int [] { },
- new byte [] { 0xef, 0xbf, 0xbd }),
- /* #17 */
- new DecoderFallbackExceptionTest (
- "U-0010FFFF = f4 8f bf bf",
- new int [] { },
- new int [] { },
- new byte [] { 0xf4, 0x8f, 0xbf, 0xbf }),
- /* #18 */
- new DecoderFallbackExceptionTest (
- "U-00110000 = f4 90 80 80",
- new int [] { 0, 2, 3 },
- new int [] { 2, 1, 1 },
- new byte [] { 0xf4, 0x90, 0x80, 0x80 }),
- /* #19 */
- new DecoderFallbackExceptionTest (
- "First continuation byte 0x80",
- new int [] { 0 },
- new int [] { 1 },
- new byte [] { 0x80 }),
- /* #20 */
- new DecoderFallbackExceptionTest (
- "Last continuation byte 0xbf",
- new int [] { 0 },
- new int [] { 1 },
- new byte [] { 0xbf }),
- /* #21 */
- new DecoderFallbackExceptionTest (
- "2 continuation bytes",
- new int [] { 0, 1 },
- new int [] { 1, 1 },
- new byte [] { 0x80, 0xbf }),
- /* #22 */
- new DecoderFallbackExceptionTest (
- "3 continuation bytes",
- new int [] { 0, 1, 2 },
- new int [] { 1, 1, 1 },
- new byte [] { 0x80, 0xbf, 0x80 }),
- /* #23 */
- new DecoderFallbackExceptionTest (
- "4 continuation bytes",
- new int [] { 0, 1, 2, 3 },
- new int [] { 1, 1, 1, 1 },
- new byte [] { 0x80, 0xbf, 0x80, 0xbf }),
- /* #24 */
- new DecoderFallbackExceptionTest (
- "5 continuation bytes",
- new int [] { 0, 1, 2, 3, 4 },
- new int [] { 1, 1, 1, 1, 1 },
- new byte [] { 0x80, 0xbf, 0x80, 0xbf, 0x80 }),
- /* #25 */
- new DecoderFallbackExceptionTest (
- "6 continuation bytes",
- new int [] { 0, 1, 2, 3, 4, 5 },
- new int [] { 1, 1, 1, 1, 1, 1 },
- new byte [] {
- 0x80, 0xbf, 0x80, 0xbf, 0x80, 0xbf }),
- /* #26 */
- new DecoderFallbackExceptionTest (
- "7 continuation bytes",
- new int [] { 0, 1, 2, 3, 4, 5, 6 },
- new int [] { 1, 1, 1, 1, 1, 1, 1 },
- new byte [] {
- 0x80, 0xbf, 0x80, 0xbf, 0x80, 0xbf,
- 0x80 }),
- /* #27 */
- new DecoderFallbackExceptionTest (
- "Sequence of all 64 continuation bytes",
- new int [] {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
- 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
- 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
- 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
- 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
- 60, 61, 62, 63 },
- new int [] {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1 },
- new byte [] {
- 0x80, 0x81, 0x82, 0x83, 0x84, 0x85,
- 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b,
- 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91,
- 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
- 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d,
- 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3,
- 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9,
- 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
- 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5,
- 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb,
- 0xbc, 0xbd, 0xbe, 0xbf }),
- /* #28 */
- new DecoderFallbackExceptionTest (
- "All 32 first bytes of 2-byte sequences (0xc0-0xdf), each followed by a space character",
- new int [] {
- 0, 2, 4, 6, 8,
- 10, 12, 14, 16, 18,
- 20, 22, 24, 26, 28,
- 30, 32, 34, 36, 38,
- 40, 42, 44, 46, 48,
- 50, 52, 54, 56, 58,
- 60, 62 },
- new int [] {
- 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1,
- 1, 1 },
- new byte [] {
- 0xc0, 0x20, 0xc1, 0x20, 0xc2, 0x20,
- 0xc3, 0x20, 0xc4, 0x20, 0xc5, 0x20,
- 0xc6, 0x20, 0xc7, 0x20, 0xc8, 0x20,
- 0xc9, 0x20, 0xca, 0x20, 0xcb, 0x20,
- 0xcc, 0x20, 0xcd, 0x20, 0xce, 0x20,
- 0xcf, 0x20, 0xd0, 0x20, 0xd1, 0x20,
- 0xd2, 0x20, 0xd3, 0x20, 0xd4, 0x20,
- 0xd5, 0x20, 0xd6, 0x20, 0xd7, 0x20,
- 0xd8, 0x20, 0xd9, 0x20, 0xda, 0x20,
- 0xdb, 0x20, 0xdc, 0x20, 0xdd, 0x20,
- 0xde, 0x20, 0xdf, 0x20 }),
- /* #29 */
- new DecoderFallbackExceptionTest (
- "All 16 first bytes of 3-byte sequences (0xe0-0xef), each followed by a space character",
- new int [] {
- 0, 2, 4, 6, 8,
- 10, 12, 14, 16, 18,
- 20, 22, 24, 26, 28,
- 30 },
- new int [] {
- 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1,
- 1 },
- new byte [] {
- 0xe0, 0x20, 0xe1, 0x20, 0xe2, 0x20,
- 0xe3, 0x20, 0xe4, 0x20, 0xe5, 0x20,
- 0xe6, 0x20, 0xe7, 0x20, 0xe8, 0x20,
- 0xe9, 0x20, 0xea, 0x20, 0xeb, 0x20,
- 0xec, 0x20, 0xed, 0x20, 0xee, 0x20,
- 0xef, 0x20 }),
- /* #30 */
- new DecoderFallbackExceptionTest (
- "All 8 first bytes of 4-byte sequences (0xf0-0xf7), each followed by a space character",
- new int [] { 0, 2, 4, 6, 8, 10, 12, 14 },
- new int [] { 1, 1, 1, 1, 1, 1, 1, 1 },
- new byte [] {
- 0xf0, 0x20, 0xf1, 0x20, 0xf2, 0x20,
- 0xf3, 0x20, 0xf4, 0x20, 0xf5, 0x20,
- 0xf6, 0x20, 0xf7, 0x20 }),
- /* #31 */
- new DecoderFallbackExceptionTest (
- "All 4 first bytes of 5-byte sequences (0xf8-0xfb), each followed by a space character",
- new int [] { 0, 2, 4, 6 },
- new int [] { 1, 1, 1, 1 },
- new byte [] {
- 0xf8, 0x20, 0xf9, 0x20, 0xfa, 0x20,
- 0xfb, 0x20 }),
- /* #32 */
- new DecoderFallbackExceptionTest (
- "All 2 first bytes of 6-byte sequences (0xfc-0xfd), each followed by a space character",
- new int [] { 0, 2 },
- new int [] { 1, 1 },
- new byte [] { 0xfc, 0x20, 0xfd, 0x20 }),
- /* #33 */
- new DecoderFallbackExceptionTest (
- "2-byte sequence with last byte missing",
- new int [] { 0 },
- new int [] { 1 },
- new byte [] { 0xc0 }),
- /* #34 */
- new DecoderFallbackExceptionTest (
- "3-byte sequence with last byte missing",
- new int [] { 0 },
- new int [] { 2 },
- new byte [] { 0xe0, 0x80 }),
- /* #35 */
- new DecoderFallbackExceptionTest (
- "4-byte sequence with last byte missing",
- new int [] { 0, 2 },
- new int [] { 2, 1 },
- new byte [] { 0xf0, 0x80, 0x80 }),
- /* #36 */
- new DecoderFallbackExceptionTest (
- "5-byte sequence with last byte missing",
- new int [] { 0, 1, 2, 3 },
- new int [] { 1, 1, 1, 1 },
- new byte [] { 0xf8, 0x80, 0x80, 0x80 }),
- /* #37 */
- new DecoderFallbackExceptionTest (
- "6-byte sequence with last byte missing",
- new int [] { 0, 1, 2, 3, 4 },
- new int [] { 1, 1, 1, 1, 1 },
- new byte [] { 0xfc, 0x80, 0x80, 0x80, 0x80 }),
- /* #38 */
- new DecoderFallbackExceptionTest (
- "2-byte sequence with last byte missing",
- new int [] { 0 },
- new int [] { 1 },
- new byte [] { 0xdf }),
- /* #39 */
- new DecoderFallbackExceptionTest (
- "3-byte sequence with last byte missing",
- new int [] { 0 },
- new int [] { 2 },
- new byte [] { 0xef, 0xbf }),
- /* #40 */
- new DecoderFallbackExceptionTest (
- "4-byte sequence with last byte missing",
- new int [] { 0, 1, 2 },
- new int [] { 1, 1, 1 },
- new byte [] { 0xf7, 0xbf, 0xbf }),
- /* #41 */
- new DecoderFallbackExceptionTest (
- "5-byte sequence with last byte missing",
- new int [] { 0, 1, 2, 3 },
- new int [] { 1, 1, 1, 1 },
- new byte [] { 0xfb, 0xbf, 0xbf, 0xbf }),
- /* #42 */
- new DecoderFallbackExceptionTest (
- "6-byte sequence with last byte missing",
- new int [] { 0, 1, 2, 3, 4 },
- new int [] { 1, 1, 1, 1, 1 },
- new byte [] { 0xfd, 0xbf, 0xbf, 0xbf, 0xbf }),
- /* #43 */
- new DecoderFallbackExceptionTest (
- "All the 10 sequences of 3.3 concatenated",
- new int [] {
- 0, 1, 3,
- 5, 6, 7, 8, 9,
- 10, 11, 12, 13, 14,
- 15, 16, 18, 19,
- 20, 21, 22, 23, 24,
- 25, 26, 27, 28, 29 },
- new int [] {
- 1, 2, 2,
- 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1,
- 1, 2, 1, 1,
- 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1 },
- new byte [] {
- 0xc0, 0xe0, 0x80, 0xf0, 0x80, 0x80,
- 0xf8, 0x80, 0x80, 0x80, 0xfc, 0x80,
- 0x80, 0x80, 0x80, 0xdf, 0xef, 0xbf,
- 0xf7, 0xbf, 0xbf, 0xfb, 0xbf, 0xbf,
- 0xbf, 0xfd, 0xbf, 0xbf, 0xbf, 0xbf }),
- /* #44 */
- new DecoderFallbackExceptionTest (
- "Bad chars fe",
- new int [] { 0 },
- new int [] { 1 },
- new byte [] { 0xfe }),
- /* #45 */
- new DecoderFallbackExceptionTest (
- "Bad chars ff",
- new int [] { 0 },
- new int [] { 1 },
- new byte [] { 0xff }),
- /* #46 */
- new DecoderFallbackExceptionTest (
- "Bad chars fe fe ff ff",
- new int [] { 0, 1, 2, 3 },
- new int [] { 1, 1, 1, 1 },
- new byte [] { 0xfe, 0xfe, 0xff, 0xff }),
- /* #47 */
- new DecoderFallbackExceptionTest (
- "Overlong U+002F = c0 af",
- new int [] { 0, 1 },
- new int [] { 1, 1 },
- new byte [] { 0xc0, 0xaf }),
- /* #48 */
- new DecoderFallbackExceptionTest (
- "Overlong U+002F = e0 80 af",
- new int [] { 0, 2 },
- new int [] { 2, 1 },
- new byte [] { 0xe0, 0x80, 0xaf }),
- /* #49 */
- new DecoderFallbackExceptionTest (
- "Overlong U+002F = f0 80 80 af",
- new int [] { 0, 2, 3 },
- new int [] { 2, 1, 1 },
- new byte [] { 0xf0, 0x80, 0x80, 0xaf }),
- /* #50 */
- new DecoderFallbackExceptionTest (
- "Overlong U+002F = f8 80 80 80 af",
- new int [] { 0, 1, 2, 3, 4 },
- new int [] { 1, 1, 1, 1, 1 },
- new byte [] { 0xf8, 0x80, 0x80, 0x80, 0xaf }),
- /* #51 */
- new DecoderFallbackExceptionTest (
- "Overlong U+002F = fc 80 80 80 80 af",
- new int [] { 0, 1, 2, 3, 4, 5 },
- new int [] { 1, 1, 1, 1, 1, 1 },
- new byte [] {
- 0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf }),
- /* #52 */
- new DecoderFallbackExceptionTest (
- "Maximum overlong U-0000007F",
- new int [] { 0, 1 },
- new int [] { 1, 1 },
- new byte [] { 0xc1, 0xbf }),
- /* #53 */
- new DecoderFallbackExceptionTest (
- "Maximum overlong U-000007FF",
- new int [] { 0, 2 },
- new int [] { 2, 1, },
- new byte [] { 0xe0, 0x9f, 0xbf }),
- /* #54 */
- new DecoderFallbackExceptionTest (
- "Maximum overlong U-0000FFFF",
- new int [] { 0, 2, 3 },
- new int [] { 2, 1, 1 },
- new byte [] { 0xf0, 0x8f, 0xbf, 0xbf }),
- /* #55 */
- new DecoderFallbackExceptionTest (
- "Maximum overlong U-001FFFFF",
- new int [] { 0, 1, 2, 3, 4 },
- new int [] { 1, 1, 1, 1, 1 },
- new byte [] { 0xf8, 0x87, 0xbf, 0xbf, 0xbf }),
- /* #56 */
- new DecoderFallbackExceptionTest (
- "Maximum overlong U-03FFFFFF",
- new int [] { 0, 1, 2, 3, 4, 5 },
- new int [] { 1, 1, 1, 1, 1, 1 },
- new byte [] {
- 0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf }),
- /* #57 */
- new DecoderFallbackExceptionTest (
- "Null overlong c0 80",
- new int [] { 0, 1 },
- new int [] { 1, 1 },
- new byte [] { 0xc0, 0x80, 0x22 }),
- /* #58 */
- new DecoderFallbackExceptionTest (
- "Null overlong e0 80 80",
- new int [] { 0, 2 },
- new int [] { 2, 1 },
- new byte [] { 0xe0, 0x80, 0x80 }),
- /* #59 */
- new DecoderFallbackExceptionTest (
- "Null overlong f0 80 80 80",
- new int [] { 0, 2, 3 },
- new int [] { 2, 1, 1 },
- new byte [] { 0xf0, 0x80, 0x80, 0x80 }),
- /* #60 */
- new DecoderFallbackExceptionTest (
- "Null overlong f8 80 80 80 80",
- new int [] { 0, 1, 2, 3, 4 },
- new int [] { 1, 1, 1, 1, 1 },
- new byte [] { 0xf8, 0x80, 0x80, 0x80, 0x80 }),
- /* #61 */
- new DecoderFallbackExceptionTest (
- "Null overlong fc 80 80 80 80 80",
- new int [] { 0, 1, 2, 3, 4, 5 },
- new int [] { 1, 1, 1, 1, 1, 1 },
- new byte [] {
- 0xfc, 0x80, 0x80, 0x80, 0x80, 0x80 }),
- /* #62 */
- new DecoderFallbackExceptionTest (
- "Single UTF-16 surrogate U+D800",
- new int [] { 0, 2 },
- new int [] { 2, 1 },
- new byte [] { 0xed, 0xa0, 0x80 }),
- /* #63 */
- new DecoderFallbackExceptionTest (
- "Single UTF-16 surrogate U+DB7F",
- new int [] { 0, 2 },
- new int [] { 2, 1 },
- new byte [] { 0xed, 0xad, 0xbf }),
- /* #64 */
- new DecoderFallbackExceptionTest (
- "Single UTF-16 surrogate U+DB80",
- new int [] { 0, 2 },
- new int [] { 2, 1 },
- new byte [] { 0xed, 0xae, 0x80 }),
- /* #65 */
- new DecoderFallbackExceptionTest (
- "Single UTF-16 surrogate U+DBFF",
- new int [] { 0, 2 },
- new int [] { 2, 1 },
- new byte [] { 0xed, 0xaf, 0xbf }),
- /* #66 */
- new DecoderFallbackExceptionTest (
- "Single UTF-16 surrogate U+DC00",
- new int [] { 0, 2 },
- new int [] { 2, 1 },
- new byte [] { 0xed, 0xb0, 0x80 }),
- /* #67 */
- new DecoderFallbackExceptionTest (
- "Single UTF-16 surrogate U+DF80",
- new int [] { 0, 2 },
- new int [] { 2, 1 },
- new byte [] { 0xed, 0xbe, 0x80 }),
- /* #68 */
- new DecoderFallbackExceptionTest (
- "Single UTF-16 surrogate U+DFFF",
- new int [] { 0, 2 },
- new int [] { 2, 1 },
- new byte [] { 0xed, 0xbf, 0xbf }),
- /* #69 */
- new DecoderFallbackExceptionTest (
- "Paired UTF-16 surrogate U+D800 U+DC00",
- new int [] { 0, 2, 3, 5 },
- new int [] { 2, 1, 2, 1 },
- new byte [] {
- 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80 }),
- /* #70 */
- new DecoderFallbackExceptionTest (
- "Paired UTF-16 surrogate U+D800 U+DFFF",
- new int [] { 0, 2, 3, 5 },
- new int [] { 2, 1, 2, 1 },
- new byte [] {
- 0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf }),
- /* #71 */
- new DecoderFallbackExceptionTest (
- "Paired UTF-16 surrogate U+DB7F U+DC00",
- new int [] { 0, 2, 3, 5 },
- new int [] { 2, 1, 2, 1 },
- new byte [] {
- 0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80 }),
- /* #72 */
- new DecoderFallbackExceptionTest (
- "Paired UTF-16 surrogate U+DB7F U+DFFF",
- new int [] { 0, 2, 3, 5 },
- new int [] { 2, 1, 2, 1 },
- new byte [] {
- 0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf }),
- /* #73 */
- new DecoderFallbackExceptionTest (
- "Paired UTF-16 surrogate U+DB80 U+DC00",
- new int [] { 0, 2, 3, 5 },
- new int [] { 2, 1, 2, 1 },
- new byte [] {
- 0xed, 0xae, 0x80, 0xed, 0xb0, 0x80 }),
- /* #74 */
- new DecoderFallbackExceptionTest (
- "Paired UTF-16 surrogate U+DB80 U+DFFF",
- new int [] { 0, 2, 3, 5 },
- new int [] { 2, 1, 2, 1 },
- new byte [] {
- 0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf }),
- /* #75 */
- new DecoderFallbackExceptionTest (
- "Paired UTF-16 surrogate U+DBFF U+DC00",
- new int [] { 0, 2, 3, 5 },
- new int [] { 2, 1, 2, 1 },
- new byte [] {
- 0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80 }),
- /* #76 */
- new DecoderFallbackExceptionTest (
- "Paired UTF-16 surrogate U+DBFF U+DFFF",
- new int [] { 0, 2, 3, 5 },
- new int [] { 2, 1, 2, 1 },
- new byte [] {
- 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf }),
- /* #77 */
- new DecoderFallbackExceptionTest (
- "Illegal code position U+FFFE",
- new int [] { },
- new int [] { },
- new byte [] { 0xef, 0xbf, 0xbe }),
- /* #78 */
- new DecoderFallbackExceptionTest (
- "Illegal code position U+FFFF",
- new int [] { },
- new int [] { },
- new byte [] { 0xef, 0xbf, 0xbf }),
- };
- Encoding utf8 = Encoding.GetEncoding (
- "utf-8",
- new EncoderExceptionFallback(),
- new DecoderExceptionFallback());
- Decoder dec = utf8.GetDecoder ();
- char [] chars;
-
- for(int t = 0; t < tests.Length; t++) {
- chars = new char [utf8.GetMaxCharCount (tests[t].bytes.Length)];
-
- // #1 complete conversion
- DecoderFallbackExceptions_GetChars (chars, t, dec, tests[t]);
-
- // #2 convert with several block_sizes
- for (int bs = 1; bs < tests[t].bytes.Length; bs++)
- DecoderFallbackExceptions_Convert (chars, t, dec, tests[t], bs);
+
+ // C2..F3 XX..BF
+ for (byte b = 0xC2; b <= 0xF3; b++) {
+ data = new byte [] { b, 0xC0 };
+ t.TestDecoderFallback (data, "??", new byte [] { b }, new byte [] { 0xC0 });
+ }
+
+ // Invalid 3rd byte
+ // E0..F3 90..BF 80..BF
+ for (byte b = 0xE0; b <= 0xF3; b++) {
+ data = new byte [] { b, 0xB0, 0x61 };
+ t.TestDecoderFallback (data, "?a", new byte [] { b, 0xB0 });
+ data = new byte [] { b, 0xB0, 0xC0 };
+ t.TestDecoderFallback (data, "??", new byte [] { b, 0xB0 }, new byte [] { 0xC0 });
+ }
+
+ // F4 80..8F 80..BF
+ data = new byte [] { 0xF4, 0x8F, 0xC0 };
+ t.TestDecoderFallback (data, "??", new byte [] { 0xF4, 0x8F }, new byte [] { 0xC0 });
+
+ // Invalid 4th byte
+ // F0..F3 90..BF 80..BF 80..BF
+ for (byte b = 0xF0; b <= 0xF3; b++) {
+ data = new byte [] { b, 0xB0, 0xB0, 0x61 };
+ t.TestDecoderFallback (data, "?a", new byte [] { b, 0xB0, 0xB0 });
+ data = new byte [] { b, 0xB0, 0xB0, 0xC0 };
+ t.TestDecoderFallback (data, "??", new byte [] { b, 0xB0, 0xB0 }, new byte [] { 0xC0 });
+ }
+
+ // F4 80..8F 80..BF 80..BF
+ data = new byte [] { 0xF4, 0x8F, 0xB0, 0xC0 };
+ t.TestDecoderFallback (data, "??", new byte [] { 0xF4, 0x8F, 0xB0 }, new byte [] { 0xC0 });
+ }
+
+ [Test]
+ public void DecoderBug23771 ()
+ {
+ var input = "\u733F"; // 'mono' on Japanese, 3bytes in UTF-8.
+ var encoded = Encoding.UTF8.GetBytes (input);
+ var decoder = Encoding.UTF8.GetDecoder ();
+ var chars = new char [10]; // Just enough space to decode.
+ var result = new StringBuilder ();
+ var bytes = new byte [1]; // Simulates chunked input bytes.
+ // Specify encoded bytes separetely.
+ foreach (var b in encoded) {
+ bytes [0] = b;
+ int bytesUsed, charsUsed;
+ bool completed;
+ decoder.Convert (bytes, 0, bytes.Length, chars, 0, chars.Length, false, out bytesUsed, out charsUsed, out completed);
+ result.Append (chars, 0, charsUsed);
+ // Expected outputs are written in bottom.
+ //Debug.Print ("bytesUsed:{0}, charsUsed:{1}, completed:{2}, result:'{3}'", bytesUsed, charsUsed, completed, result);
}
+
+ // Expected: NO assertion error.
+ Assert.AreEqual (input, result.ToString (), "#1");
+
+ /*
+ * Expected Debug outputs are:
+ * bytesUsed:1, charsUsed:0, completed:True, result:''
+ * bytesUsed:1, charsUsed:0, completed:True, result:''
+ * bytesUsed:1, charsUsed:1, completed:True, result:'猿'
+ *
+ * -- Note: '猿' is U+733F (1char in UTF-16)
+ *
+ * Actual Debug output are:
+ * bytesUsed:3, charsUsed:1, completed:False, result:'�'
+ * bytesUsed:3, charsUsed:1, completed:False, result:'��'
+ * bytesUsed:3, charsUsed:1, completed:False, result:'���'
+ *
+ * All output parameters are not match.
+ * -- Note: '�' is decoder fallback char (U+FFFD)
+ */
}
}
}