2 // UTF8EncodingTest.cs - NUnit Test Cases for System.Text.UTF8Encoding
5 // Patrick Kalkman kalkman@cistron.nl
6 // Sebastien Pouliot (spouliot@motus.com)
8 // (C) 2003 Patrick Kalkman
9 // (C) 2004 Novell (http://www.novell.com)
12 using NUnit.Framework;
18 using DecoderException = System.Text.DecoderFallbackException;
20 using DecoderException = System.ArgumentException;
23 using AssertType = NUnit.Framework.Assert;
25 namespace MonoTests.System.Text
28 public class UTF8EncodingTest
30 private UTF8Encoding utf8;
35 utf8 = new UTF8Encoding (true, true);
39 public void IsBrowserDisplay ()
41 Assert.IsTrue (utf8.IsBrowserDisplay);
45 public void IsBrowserSave ()
47 Assert.IsTrue (utf8.IsBrowserSave);
51 public void IsMailNewsDisplay ()
53 Assert.IsTrue (utf8.IsMailNewsDisplay);
57 public void IsMailNewsSave ()
59 Assert.IsTrue (utf8.IsMailNewsSave);
63 public void TestCompat ()
65 Assert.IsTrue (new UTF8Encoding ().Equals (new UTF8Encoding ()));
69 public void TestEncodingGetBytes1()
71 UTF8Encoding utf8Enc = new UTF8Encoding ();
72 string UniCode = "\u0041\u2262\u0391\u002E";
74 // "A<NOT IDENTICAL TO><ALPHA>." may be encoded as 41 E2 89 A2 CE 91 2E
76 byte[] utf8Bytes = utf8Enc.GetBytes (UniCode);
78 Assert.AreEqual (0x41, utf8Bytes [0], "UTF #1");
79 Assert.AreEqual (0xE2, utf8Bytes [1], "UTF #2");
80 Assert.AreEqual (0x89, utf8Bytes [2], "UTF #3");
81 Assert.AreEqual (0xA2, utf8Bytes [3], "UTF #4");
82 Assert.AreEqual (0xCE, utf8Bytes [4], "UTF #5");
83 Assert.AreEqual (0x91, utf8Bytes [5], "UTF #6");
84 Assert.AreEqual (0x2E, utf8Bytes [6], "UTF #7");
88 public void TestEncodingGetBytes2()
90 UTF8Encoding utf8Enc = new UTF8Encoding ();
91 string UniCode = "\u0048\u0069\u0020\u004D\u006F\u006D\u0020\u263A\u0021";
93 // "Hi Mom <WHITE SMILING FACE>!" may be encoded as 48 69 20 4D 6F 6D 20 E2 98 BA 21
95 byte[] utf8Bytes = new byte [11];
97 int ByteCnt = utf8Enc.GetBytes (UniCode.ToCharArray(), 0, UniCode.Length, utf8Bytes, 0);
98 Assert.AreEqual (11, ByteCnt, "UTF #1");
99 Assert.AreEqual (0x48, utf8Bytes [0], "UTF #2");
100 Assert.AreEqual (0x69, utf8Bytes [1], "UTF #3");
101 Assert.AreEqual (0x20, utf8Bytes [2], "UTF #4");
102 Assert.AreEqual (0x4D, utf8Bytes [3], "UTF #5");
103 Assert.AreEqual (0x6F, utf8Bytes [4], "UTF #6");
104 Assert.AreEqual (0x6D, utf8Bytes [5], "UTF #7");
105 Assert.AreEqual (0x20, utf8Bytes [6], "UTF #8");
106 Assert.AreEqual (0xE2, utf8Bytes [7], "UTF #9");
107 Assert.AreEqual (0x98, utf8Bytes [8], "UTF #10");
108 Assert.AreEqual (0xBA, utf8Bytes [9], "UTF #11");
109 Assert.AreEqual (0x21, utf8Bytes [10], "UTF #12");
113 public void TestDecodingGetChars1()
115 UTF8Encoding utf8Enc = new UTF8Encoding ();
116 // 41 E2 89 A2 CE 91 2E may be decoded as "A<NOT IDENTICAL TO><ALPHA>."
118 byte[] utf8Bytes = new byte [] {0x41, 0xE2, 0x89, 0xA2, 0xCE, 0x91, 0x2E};
119 char[] UniCodeChars = utf8Enc.GetChars(utf8Bytes);
121 Assert.AreEqual (0x0041, UniCodeChars [0], "UTF #1");
122 Assert.AreEqual (0x2262, UniCodeChars [1], "UTF #2");
123 Assert.AreEqual (0x0391, UniCodeChars [2], "UTF #3");
124 Assert.AreEqual (0x002E, UniCodeChars [3], "UTF #4");
129 [Category ("NotWorking")]
131 public void TestMaxCharCount()
133 UTF8Encoding UTF8enc = new UTF8Encoding ();
135 // hmm, where is this extra 1 coming from?
136 Assert.AreEqual (51, UTF8enc.GetMaxCharCount(50), "UTF #1");
138 Assert.AreEqual (50, UTF8enc.GetMaxCharCount(50), "UTF #1");
144 [Category ("NotWorking")]
146 public void TestMaxByteCount()
148 UTF8Encoding UTF8enc = new UTF8Encoding ();
150 // maybe under .NET 2.0 insufficient surrogate pair is
151 // just not handled, and 3 is Preamble size.
152 Assert.AreEqual (153, UTF8enc.GetMaxByteCount(50), "UTF #1");
154 Assert.AreEqual (200, UTF8enc.GetMaxByteCount(50), "UTF #1");
158 // regression for bug #59648
160 public void TestThrowOnInvalid ()
162 UTF8Encoding u = new UTF8Encoding (true, false);
164 byte[] data = new byte [] { 0xC0, 0xAF };
166 Assert.AreEqual (2, u.GetCharCount (data), "#A0");
167 string s = u.GetString (data);
168 Assert.AreEqual ("\uFFFD\uFFFD", s, "#A1");
170 Assert.AreEqual (0, u.GetCharCount (data), "#A0");
171 string s = u.GetString (data);
172 Assert.AreEqual (String.Empty, s, "#A1");
175 data = new byte [] { 0x30, 0x31, 0xC0, 0xAF, 0x30, 0x32 };
176 s = u.GetString (data);
178 Assert.AreEqual (6, s.Length, "#B1");
179 Assert.AreEqual (0x30, (int) s [0], "#B2");
180 Assert.AreEqual (0x31, (int) s [1], "#B3");
181 Assert.AreEqual (0xFFFD, (int) s [2], "#B4");
182 Assert.AreEqual (0xFFFD, (int) s [3], "#B5");
183 Assert.AreEqual (0x30, (int) s [4], "#B6");
184 Assert.AreEqual (0x32, (int) s [5], "#B7");
186 Assert.AreEqual (4, s.Length, "#B1");
187 Assert.AreEqual (0x30, (int) s [0], "#B2");
188 Assert.AreEqual (0x31, (int) s [1], "#B3");
189 Assert.AreEqual (0x30, (int) s [2], "#B4");
190 Assert.AreEqual (0x32, (int) s [3], "#B5");
194 // UTF8 decoding tests from http://www.cl.cam.ac.uk/~mgk25/
197 public void T1_Correct_GreekWord_kosme ()
199 byte[] data = { 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5 };
200 string s = utf8.GetString (data);
201 // cute but saving source code in unicode can be problematic
202 // so we just ensure we can re-encode this
203 Assert.AreEqual (BitConverter.ToString (data), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted");
207 public void T2_Boundary_1_FirstPossibleSequence_Pass ()
209 byte[] data211 = { 0x00 };
210 string s = utf8.GetString (data211);
211 Assert.AreEqual ("\0", s, "1 byte (U-00000000)");
212 Assert.AreEqual (BitConverter.ToString (data211), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-1");
214 byte[] data212 = { 0xC2, 0x80 };
215 s = utf8.GetString (data212);
216 Assert.AreEqual (128, s [0], "2 bytes (U-00000080)");
217 Assert.AreEqual (BitConverter.ToString (data212), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-2");
219 byte[] data213 = { 0xE0, 0xA0, 0x80 };
220 s = utf8.GetString (data213);
221 Assert.AreEqual (2048, s [0], "3 bytes (U-00000800)");
222 Assert.AreEqual (BitConverter.ToString (data213), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-3");
224 byte[] data214 = { 0xF0, 0x90, 0x80, 0x80 };
225 s = utf8.GetString (data214);
226 Assert.AreEqual (55296, s [0], "4 bytes (U-00010000)-0");
227 Assert.AreEqual (56320, s [1], "4 bytes (U-00010000)-1");
228 Assert.AreEqual (BitConverter.ToString (data214), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-4");
233 [ExpectedException (typeof (DecoderException))]
234 public void T2_Boundary_1_FirstPossibleSequence_Fail_5 ()
236 byte[] data215 = { 0xF8, 0x88, 0x80, 0x80, 0x80 };
237 string s = utf8.GetString (data215);
238 Assert.IsNull (s, "5 bytes (U-00200000)");
239 Assert.AreEqual (BitConverter.ToString (data215), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-5");
244 [ExpectedException (typeof (DecoderException))]
245 public void T2_Boundary_1_FirstPossibleSequence_Fail_6 ()
247 byte[] data216 = { 0xFC, 0x84, 0x80, 0x80, 0x80, 0x80 };
248 string s = utf8.GetString (data216);
249 Assert.IsNull (s, "6 bytes (U-04000000)");
250 Assert.AreEqual (BitConverter.ToString (data216), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-6");
254 public void T2_Boundary_2_LastPossibleSequence_Pass ()
256 byte[] data221 = { 0x7F };
257 string s = utf8.GetString (data221);
258 Assert.AreEqual (127, s [0], "1 byte (U-0000007F)");
259 Assert.AreEqual (BitConverter.ToString (data221), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-1");
261 byte[] data222 = { 0xDF, 0xBF };
262 s = utf8.GetString (data222);
263 Assert.AreEqual (2047, s [0], "2 bytes (U-000007FF)");
264 Assert.AreEqual (BitConverter.ToString (data222), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-2");
266 byte[] data223 = { 0xEF, 0xBF, 0xBF };
267 s = utf8.GetString (data223);
268 Assert.AreEqual (65535, s [0], "3 bytes (U-0000FFFF)");
269 Assert.AreEqual (BitConverter.ToString (data223), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-3");
275 [ExpectedException (typeof (DecoderException))]
276 public void T2_Boundary_2_LastPossibleSequence_Fail_4 ()
278 byte[] data224 = { 0x7F, 0xBF, 0xBF, 0xBF };
279 string s = utf8.GetString (data224);
280 Assert.IsNull (s, "4 bytes (U-001FFFFF)");
281 Assert.AreEqual (BitConverter.ToString (data224), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-4");
286 [ExpectedException (typeof (DecoderException))]
287 public void T2_Boundary_2_LastPossibleSequence_Fail_5 ()
289 byte[] data225 = { 0xFB, 0xBF, 0xBF, 0xBF, 0xBF };
290 string s = utf8.GetString (data225);
291 Assert.IsNull (s, "5 bytes (U-03FFFFFF)");
292 Assert.AreEqual (BitConverter.ToString (data225), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-5");
297 [ExpectedException (typeof (DecoderException))]
298 public void T2_Boundary_2_LastPossibleSequence_Fail_6 ()
300 byte[] data226 = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF, 0xBF };
301 string s = utf8.GetString (data226);
302 Assert.IsNull (s, "6 bytes (U-7FFFFFFF)");
303 Assert.AreEqual (BitConverter.ToString (data226), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-6");
307 public void T2_Boundary_3_Other_Pass ()
309 byte[] data231 = { 0xED, 0x9F, 0xBF };
310 string s = utf8.GetString (data231);
311 Assert.AreEqual (55295, s [0], "U-0000D7FF");
312 Assert.AreEqual (BitConverter.ToString (data231), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-1");
314 byte[] data232 = { 0xEE, 0x80, 0x80 };
315 s = utf8.GetString (data232);
316 Assert.AreEqual (57344, s [0], "U-0000E000");
317 Assert.AreEqual (BitConverter.ToString (data232), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-2");
319 byte[] data233 = { 0xEF, 0xBF, 0xBD };
320 s = utf8.GetString (data233);
321 Assert.AreEqual (65533, s [0], "U-0000FFFD");
322 Assert.AreEqual (BitConverter.ToString (data233), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-3");
324 byte[] data234 = { 0xF4, 0x8F, 0xBF, 0xBF };
325 s = utf8.GetString (data234);
326 Assert.AreEqual (56319, s [0], "U-0010FFFF-0");
327 Assert.AreEqual (57343, s [1], "U-0010FFFF-1");
328 Assert.AreEqual (BitConverter.ToString (data234), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-4");
333 [ExpectedException (typeof (DecoderException))]
334 public void T2_Boundary_3_Other_Fail_5 ()
336 byte[] data235 = { 0xF4, 0x90, 0x80, 0x80 };
337 string s = utf8.GetString (data235);
338 Assert.IsNull (s, "U-00110000");
339 Assert.AreEqual (BitConverter.ToString (data235), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-5");
343 [ExpectedException (typeof (DecoderException))]
344 public void T3_Malformed_1_UnexpectedContinuation_311 ()
346 byte[] data = { 0x80 };
347 string s = utf8.GetString (data);
348 // exception is "really" expected here
352 [ExpectedException (typeof (DecoderException))]
353 public void T3_Malformed_1_UnexpectedContinuation_312 ()
355 byte[] data = { 0xBF };
356 string s = utf8.GetString (data);
357 // exception is "really" expected here
361 [ExpectedException (typeof (DecoderException))]
362 public void T3_Malformed_1_UnexpectedContinuation_313 ()
364 byte[] data = { 0x80, 0xBF };
365 string s = utf8.GetString (data);
366 // exception is "really" expected here
370 [ExpectedException (typeof (DecoderException))]
371 public void T3_Malformed_1_UnexpectedContinuation_314 ()
373 byte[] data = { 0x80, 0xBF, 0x80 };
374 string s = utf8.GetString (data);
375 // exception is "really" expected here
379 [ExpectedException (typeof (DecoderException))]
380 public void T3_Malformed_1_UnexpectedContinuation_315 ()
382 byte[] data = { 0x80, 0xBF, 0x80, 0xBF };
383 string s = utf8.GetString (data);
384 // exception is "really" expected here
388 [ExpectedException (typeof (DecoderException))]
389 public void T3_Malformed_1_UnexpectedContinuation_316 ()
391 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80 };
392 string s = utf8.GetString (data);
393 // exception is "really" expected here
397 [ExpectedException (typeof (DecoderException))]
398 public void T3_Malformed_1_UnexpectedContinuation_317 ()
400 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF };
401 string s = utf8.GetString (data);
402 // exception is "really" expected here
406 [ExpectedException (typeof (DecoderException))]
407 public void T3_Malformed_1_UnexpectedContinuation_318 ()
409 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 };
410 string s = utf8.GetString (data);
411 // exception is "really" expected here
415 [ExpectedException (typeof (DecoderException))]
416 public void T3_Malformed_1_UnexpectedContinuation_319 ()
418 // 64 different continuation characters
420 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
421 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
422 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
423 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF };
424 string s = utf8.GetString (data);
425 // exception is "really" expected here
429 [ExpectedException (typeof (DecoderException))]
430 public void T3_Malformed_2_LonelyStart_321 ()
433 0xC0, 0x20, 0xC1, 0x20, 0xC2, 0x20, 0xC3, 0x20, 0xC4, 0x20, 0xC5, 0x20, 0xC6, 0x20, 0xC7, 0x20,
434 0xC8, 0x20, 0xC9, 0x20, 0xCA, 0x20, 0xCB, 0x20, 0xCC, 0x20, 0xCD, 0x20, 0xCE, 0x20, 0xCF, 0x20,
435 0xD0, 0x20, 0xD1, 0x20, 0xD2, 0x20, 0xD3, 0x20, 0xD4, 0x20, 0xD5, 0x20, 0xD6, 0x20, 0xD7, 0x20,
436 0xD8, 0x20, 0xD9, 0x20, 0xDA, 0x20, 0xDB, 0x20, 0xDC, 0x20, 0xDD, 0x20, 0xDE, 0x20, 0xDF, 0x20 };
437 string s = utf8.GetString (data);
438 // exception is "really" expected here
442 [ExpectedException (typeof (DecoderException))]
443 public void T3_Malformed_2_LonelyStart_322 ()
446 0xE0, 0x20, 0xE1, 0x20, 0xE2, 0x20, 0xE3, 0x20, 0xE4, 0x20, 0xE5, 0x20, 0xE6, 0x20, 0xE7, 0x20,
447 0xE8, 0x20, 0xE9, 0x20, 0xEA, 0x20, 0xEB, 0x20, 0xEC, 0x20, 0xED, 0x20, 0xEE, 0x20, 0xEF, 0x20 };
448 string s = utf8.GetString (data);
449 // exception is "really" expected here
453 [ExpectedException (typeof (DecoderException))]
454 public void T3_Malformed_2_LonelyStart_323 ()
456 byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
457 string s = utf8.GetString (data);
458 // exception is "really" expected here
462 [ExpectedException (typeof (DecoderException))]
463 public void T3_Malformed_2_LonelyStart_324 ()
465 byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
466 string s = utf8.GetString (data);
467 // exception is "really" expected here
471 [ExpectedException (typeof (DecoderException))]
472 public void T3_Malformed_2_LonelyStart_325 ()
474 byte[] data = { 0xFC, 0x20, 0xFD, 0x20 };
475 string s = utf8.GetString (data);
476 // exception is "really" expected here
480 [ExpectedException (typeof (DecoderException))]
481 public void T3_Malformed_3_LastContinuationMissing_331 ()
483 byte[] data = { 0xC0 };
484 string s = utf8.GetString (data);
485 // exception is "really" expected here
489 [ExpectedException (typeof (DecoderException))]
490 public void T3_Malformed_3_LastContinuationMissing_332 ()
492 byte[] data = { 0xE0, 0x80 };
493 string s = utf8.GetString (data);
494 // exception is "really" expected here
498 [ExpectedException (typeof (DecoderException))]
499 public void T3_Malformed_3_LastContinuationMissing_333 ()
501 byte[] data = { 0xF0, 0x80, 0x80 };
502 string s = utf8.GetString (data);
503 // exception is "really" expected here
507 [ExpectedException (typeof (DecoderException))]
508 public void T3_Malformed_3_LastContinuationMissing_334 ()
510 byte[] data = { 0xF8, 0x80, 0x80, 0x80 };
511 string s = utf8.GetString (data);
512 // exception is "really" expected here
516 [ExpectedException (typeof (DecoderException))]
517 public void T3_Malformed_3_LastContinuationMissing_335 ()
519 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80 };
520 string s = utf8.GetString (data);
521 // exception is "really" expected here
525 // MS Fx 1.1 accept this
526 // [ExpectedException (typeof (DecoderException))]
527 public void T3_Malformed_3_LastContinuationMissing_336 ()
529 byte[] data = { 0xDF };
531 string s = utf8.GetString (data);
532 // exception is "really" expected here
533 Assert.AreEqual (String.Empty, s, "MS FX 1.1 behaviour");
535 catch (DecoderException) {
536 // but Mono doesn't - better stick to the standard
541 // MS Fx 1.1 accept this
542 // [ExpectedException (typeof (DecoderException))]
543 public void T3_Malformed_3_LastContinuationMissing_337 ()
545 byte[] data = { 0xEF, 0xBF };
547 string s = utf8.GetString (data);
548 // exception is "really" expected here
549 Assert.AreEqual (String.Empty, s, "MS FX 1.1 behaviour");
551 catch (DecoderException) {
552 // but Mono doesn't - better stick to the standard
557 [ExpectedException (typeof (DecoderException))]
558 public void T3_Malformed_3_LastContinuationMissing_338 ()
560 byte[] data = { 0xF7, 0xBF, 0xBF };
561 string s = utf8.GetString (data);
562 // exception is "really" expected here
566 [ExpectedException (typeof (DecoderException))]
567 public void T3_Malformed_3_LastContinuationMissing_339 ()
569 byte[] data = { 0xF, 0xBF, 0xBF, 0xBF };
570 string s = utf8.GetString (data);
571 // exception is "really" expected here
575 [ExpectedException (typeof (DecoderException))]
576 public void T3_Malformed_3_LastContinuationMissing_3310 ()
578 byte[] data = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
579 string s = utf8.GetString (data);
580 // exception is "really" expected here
584 [ExpectedException (typeof (DecoderException))]
585 public void T3_Malformed_4_ConcatenationImcomplete ()
588 0xC0, 0xE0, 0x80, 0xF0, 0x80, 0x80, 0xF8, 0x80, 0x80, 0x80, 0xFC, 0x80, 0x80, 0x80, 0x80, 0xDF,
589 0xEF, 0xBF, 0xF7, 0xBF, 0xBF, 0xFB, 0xBF, 0xBF, 0xBF, 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
590 string s = utf8.GetString (data);
591 // exception is "really" expected here
595 [ExpectedException (typeof (DecoderException))]
596 public void T3_Malformed_5_ImpossibleBytes_351 ()
598 byte[] data = { 0xFE };
599 string s = utf8.GetString (data);
600 // exception is "really" expected here
604 [ExpectedException (typeof (DecoderException))]
605 public void T3_Malformed_5_ImpossibleBytes_352 ()
607 byte[] data = { 0xFF };
608 string s = utf8.GetString (data);
609 // exception is "really" expected here
613 [ExpectedException (typeof (DecoderException))]
614 public void T3_Malformed_5_ImpossibleBytes_353 ()
616 byte[] data = { 0xFE, 0xFE, 0xFF, 0xFF };
617 string s = utf8.GetString (data);
618 // exception is "really" expected here
621 // Overlong == dangereous -> "safe" decoder should reject them
624 [ExpectedException (typeof (DecoderException))]
625 public void T4_Overlong_1_ASCII_Slash_411 ()
627 byte[] data = { 0xC0, 0xAF };
628 string s = utf8.GetString (data);
629 // exception is "really" expected here
633 [ExpectedException (typeof (DecoderException))]
634 public void T4_Overlong_1_ASCII_Slash_412 ()
636 byte[] data = { 0xE0, 0x80, 0xAF };
637 string s = utf8.GetString (data);
638 // exception is "really" expected here
642 [ExpectedException (typeof (DecoderException))]
643 public void T4_Overlong_1_ASCII_Slash_413 ()
645 byte[] data = { 0xF0, 0x80, 0x80, 0xAF };
646 string s = utf8.GetString (data);
647 // exception is "really" expected here
651 [ExpectedException (typeof (DecoderException))]
652 public void T4_Overlong_1_ASCII_Slash_414 ()
654 byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0xAF };
655 string s = utf8.GetString (data);
656 // exception is "really" expected here
660 [ExpectedException (typeof (DecoderException))]
661 public void T4_Overlong_1_ASCII_Slash_415 ()
663 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0xAF };
664 string s = utf8.GetString (data);
665 // exception is "really" expected here
669 [ExpectedException (typeof (DecoderException))]
670 public void T4_Overlong_2_MaximumBoundary_421 ()
672 byte[] data = { 0xC1, 0xBF };
673 string s = utf8.GetString (data);
674 // exception is "really" expected here
678 [ExpectedException (typeof (DecoderException))]
679 public void T4_Overlong_2_MaximumBoundary_422 ()
681 byte[] data = { 0xE0, 0x9F, 0xBF };
682 string s = utf8.GetString (data);
683 // exception is "really" expected here
687 [ExpectedException (typeof (DecoderException))]
688 public void T4_Overlong_2_MaximumBoundary_423 ()
690 byte[] data = { 0xF0, 0x8F, 0xBF, 0xBF };
691 string s = utf8.GetString (data);
692 // exception is "really" expected here
696 [ExpectedException (typeof (DecoderException))]
697 public void T4_Overlong_2_MaximumBoundary_424 ()
699 byte[] data = { 0xF8, 0x87, 0xBF, 0xBF, 0xBF };
700 string s = utf8.GetString (data);
701 // exception is "really" expected here
705 [ExpectedException (typeof (DecoderException))]
706 public void T4_Overlong_2_MaximumBoundary_425 ()
708 byte[] data = { 0xFC, 0x83, 0xBF, 0xBF, 0xBF, 0xBF };
709 string s = utf8.GetString (data);
710 // exception is "really" expected here
714 [ExpectedException (typeof (DecoderException))]
715 public void T4_Overlong_3_NUL_431 ()
717 byte[] data = { 0xC0, 0x80 };
718 string s = utf8.GetString (data);
719 // exception is "really" expected here
723 [ExpectedException (typeof (DecoderException))]
724 public void T4_Overlong_3_NUL_432 ()
726 byte[] data = { 0xE0, 0x80, 0x80 };
727 string s = utf8.GetString (data);
728 // exception is "really" expected here
732 [ExpectedException (typeof (DecoderException))]
733 public void T4_Overlong_3_NUL_433 ()
735 byte[] data = { 0xF0, 0x80, 0x80, 0x80 };
736 string s = utf8.GetString (data);
737 // exception is "really" expected here
741 [ExpectedException (typeof (DecoderException))]
742 public void T4_Overlong_3_NUL_434 ()
744 byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0x80 };
745 string s = utf8.GetString (data);
746 // exception is "really" expected here
750 [ExpectedException (typeof (DecoderException))]
751 public void T4_Overlong_3_NUL_435 ()
753 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0x80 };
754 string s = utf8.GetString (data);
755 // exception is "really" expected here
760 [ExpectedException (typeof (DecoderFallbackException))]
762 // MS Fx 1.1 accept this
763 [Category ("NotDotNet")]
764 [ExpectedException (typeof (DecoderException))]
766 public void T5_IllegalCodePosition_1_UTF16Surrogates_511 ()
768 byte[] data = { 0xED, 0xA0, 0x80 };
769 string s = utf8.GetString (data);
770 // exception is "really" expected here
771 Assert.AreEqual (55296, s [0], "MS FX 1.1 behaviour");
776 [ExpectedException (typeof (DecoderFallbackException))]
778 // MS Fx 1.1 accept this
779 [Category ("NotDotNet")]
780 [ExpectedException (typeof (DecoderException))]
782 public void T5_IllegalCodePosition_1_UTF16Surrogates_512 ()
784 byte[] data = { 0xED, 0xAD, 0xBF };
785 string s = utf8.GetString (data);
786 // exception is "really" expected here
787 Assert.AreEqual (56191, s [0], "MS FX 1.1 behaviour");
792 [ExpectedException (typeof (DecoderFallbackException))]
794 // MS Fx 1.1 accept this
795 [Category ("NotDotNet")]
796 [ExpectedException (typeof (DecoderException))]
798 public void T5_IllegalCodePosition_1_UTF16Surrogates_513 ()
800 byte[] data = { 0xED, 0xAE, 0x80 };
801 string s = utf8.GetString (data);
802 // exception is "really" expected here
803 Assert.AreEqual (56192, s [0], "MS FX 1.1 behaviour");
808 [ExpectedException (typeof (DecoderFallbackException))]
810 // MS Fx 1.1 accept this
811 [Category ("NotDotNet")]
812 [ExpectedException (typeof (DecoderException))]
814 public void T5_IllegalCodePosition_1_UTF16Surrogates_514 ()
816 byte[] data = { 0xED, 0xAF, 0xBF };
817 string s = utf8.GetString (data);
818 // exception is "really" expected here
819 Assert.AreEqual (56319, s [0], "MS FX 1.1 behaviour");
824 [ExpectedException (typeof (DecoderFallbackException))]
826 // MS Fx 1.1 accept this
827 [Category ("NotDotNet")]
828 [ExpectedException (typeof (DecoderException))]
830 public void T5_IllegalCodePosition_1_UTF16Surrogates_515 ()
832 byte[] data = { 0xED, 0xB0, 0x80 };
833 string s = utf8.GetString (data);
834 // exception is "really" expected here
835 Assert.AreEqual (56320, s [0], "MS FX 1.1 behaviour");
840 [ExpectedException (typeof (DecoderFallbackException))]
842 // MS Fx 1.1 accept this
843 [Category ("NotDotNet")]
844 [ExpectedException (typeof (DecoderException))]
846 public void T5_IllegalCodePosition_1_UTF16Surrogates_516 ()
848 byte[] data = { 0xED, 0xBE, 0x80 };
849 string s = utf8.GetString (data);
850 // exception is "really" expected here
851 Assert.AreEqual (57216, s [0], "MS FX 1.1 behaviour");
856 [ExpectedException (typeof (DecoderFallbackException))]
858 // MS Fx 1.1 accept this
859 [Category ("NotDotNet")]
860 [ExpectedException (typeof (DecoderException))]
862 public void T5_IllegalCodePosition_1_UTF16Surrogates_517 ()
864 byte[] data = { 0xED, 0xBF, 0xBF };
865 string s = utf8.GetString (data);
866 // exception is "really" expected here
867 Assert.AreEqual (57343, s [0], "MS FX 1.1 behaviour");
872 [ExpectedException (typeof (DecoderFallbackException))]
874 // MS Fx 1.1 accept this
875 [Category ("NotDotNet")]
876 [ExpectedException (typeof (DecoderException))]
878 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_521 ()
880 byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80 };
881 string s = utf8.GetString (data);
882 // exception is "really" expected here
883 Assert.AreEqual (55296, s [0], "MS FX 1.1 behaviour");
884 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
889 [ExpectedException (typeof (DecoderFallbackException))]
891 // MS Fx 1.1 accept this
892 [Category ("NotDotNet")]
893 [ExpectedException (typeof (DecoderException))]
895 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_522 ()
897 byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xBF, 0xBF };
898 string s = utf8.GetString (data);
899 // exception is "really" expected here
900 Assert.AreEqual (55296, s [0], "MS FX 1.1 behaviour");
901 Assert.AreEqual (57343, s [1], "MS FX 1.1 behaviour");
906 [ExpectedException (typeof (DecoderFallbackException))]
908 // MS Fx 1.1 accept this
909 [Category ("NotDotNet")]
910 [ExpectedException (typeof (DecoderException))]
912 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_523 ()
914 byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xB0, 0x80 };
915 string s = utf8.GetString (data);
916 // exception is "really" expected here
917 Assert.AreEqual (56191, s [0], "MS FX 1.1 behaviour");
918 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
923 [ExpectedException (typeof (DecoderFallbackException))]
925 // MS Fx 1.1 accept this
926 [Category ("NotDotNet")]
927 [ExpectedException (typeof (DecoderException))]
929 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_524 ()
931 byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xBF, 0xBF };
932 string s = utf8.GetString (data);
933 // exception is "really" expected here
934 Assert.AreEqual (56191, s [0], "MS FX 1.1 behaviour");
935 Assert.AreEqual (57343, s [1], "MS FX 1.1 behaviour");
940 [ExpectedException (typeof (DecoderFallbackException))]
942 // MS Fx 1.1 accept this
943 [Category ("NotDotNet")]
944 [ExpectedException (typeof (DecoderException))]
946 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_525 ()
948 byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xB0, 0x80 };
949 string s = utf8.GetString (data);
950 // exception is "really" expected here
951 Assert.AreEqual (56192, s [0], "MS FX 1.1 behaviour");
952 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
957 [ExpectedException (typeof (DecoderFallbackException))]
959 // MS Fx 1.1 accept this
960 [Category ("NotDotNet")]
961 [ExpectedException (typeof (DecoderException))]
963 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_526 ()
965 byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xBF, 0x8F };
966 string s = utf8.GetString (data);
967 // exception is "really" expected here
968 Assert.AreEqual (56192, s [0], "MS FX 1.1 behaviour");
969 Assert.AreEqual (57295, s [1], "MS FX 1.1 behaviour");
974 [ExpectedException (typeof (DecoderFallbackException))]
976 // MS Fx 1.1 accept this
977 [Category ("NotDotNet")]
978 [ExpectedException (typeof (DecoderException))]
980 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_527 ()
982 byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xB0, 0x80 };
983 string s = utf8.GetString (data);
984 // exception is "really" expected here
985 Assert.AreEqual (56319, s [0], "MS FX 1.1 behaviour");
986 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
991 [ExpectedException (typeof (DecoderFallbackException))]
993 // MS Fx 1.1 accept this
994 [Category ("NotDotNet")]
995 [ExpectedException (typeof (DecoderException))]
997 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_528 ()
999 byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xBF, 0xBF };
1000 string s = utf8.GetString (data);
1001 // exception is "really" expected here
1002 Assert.AreEqual (56319, s [0], "MS FX 1.1 behaviour");
1003 Assert.AreEqual (57343, s [1], "MS FX 1.1 behaviour");
1007 // MS Fx 1.1 accept this
1008 // [ExpectedException (typeof (DecoderException))]
1009 public void T5_IllegalCodePosition_3_Other_531 ()
1011 byte[] data = { 0xEF, 0xBF, 0xBE };
1012 string s = utf8.GetString (data);
1013 // exception is "really" expected here
1014 Assert.AreEqual (65534, s [0], "MS FX 1.1 behaviour");
1018 // MS Fx 1.1 accept this
1019 // [ExpectedException (typeof (DecoderException))]
1020 public void T5_IllegalCodePosition_3_Other_532 ()
1022 byte[] data = { 0xEF, 0xBF, 0xBF };
1023 string s = utf8.GetString (data);
1024 // exception is "really" expected here
1025 Assert.AreEqual (65535, s [0], "MS FX 1.1 behaviour");
1029 // bug #75065 and #73086.
1030 public void GetCharsFEFF ()
1032 byte [] data = new byte [] {0xEF, 0xBB, 0xBF};
1033 Encoding enc = new UTF8Encoding (false, true);
1034 string s = enc.GetString (data);
1035 Assert.AreEqual (s, "\uFEFF");
1037 Encoding utf = Encoding.UTF8;
1038 char[] testChars = {'\uFEFF','A'};
1040 byte[] bytes = utf.GetBytes(testChars);
1041 char[] chars = utf.GetChars(bytes);
1042 Assert.AreEqual ('\uFEFF', chars [0], "#1");
1043 Assert.AreEqual ('A', chars [1], "#2");
1048 public void CloneNotReadOnly ()
1050 Encoding e = Encoding.GetEncoding (65001).Clone ()
1052 Assert.AreEqual (false, e.IsReadOnly);
1053 e.EncoderFallback = new EncoderExceptionFallback ();
1059 [ExpectedException (typeof (DecoderFallbackException))]
1061 [ExpectedException (typeof (ArgumentException))]
1062 [Category ("NotDotNet")] // MS Bug
1064 public void Bug77315 ()
1066 new UTF8Encoding (false, true).GetString (
1067 new byte [] {0xED, 0xA2, 0x8C});
1071 public void SufficientByteArray ()
1073 Encoder e = Encoding.UTF8.GetEncoder ();
1074 byte [] bytes = new byte [0];
1076 char [] chars = new char [] {'\uD800'};
1077 e.GetBytes (chars, 0, 1, bytes, 0, false);
1079 int ret = e.GetBytes (chars, 1, 0, bytes, 0, true);
1081 Assert.AreEqual (0, ret, "drop insufficient char in 2.0: char[]");
1083 Assert.Fail ("ArgumentException is expected: char[]");
1085 } catch (ArgumentException ae) {
1091 string s = "\uD800";
1093 int ret = Encoding.UTF8.GetBytes (s, 0, 1, bytes, 0);
1095 Assert.AreEqual (0, ret, "drop insufficient char in 2.0: string");
1097 Assert.Fail ("ArgumentException is expected: string");
1099 } catch (ArgumentException ae) {
1106 [Test] // bug #565129
1107 public void SufficientByteArray2 ()
1109 var u = Encoding.UTF8;
1110 Assert.AreEqual (3, u.GetByteCount ("\uFFFD"), "#1-1");
1111 Assert.AreEqual (3, u.GetByteCount ("\uD800"), "#1-2");
1112 Assert.AreEqual (3, u.GetByteCount ("\uDC00"), "#1-3");
1113 Assert.AreEqual (4, u.GetByteCount ("\uD800\uDC00"), "#1-4");
1114 byte [] bytes = new byte [10];
1115 Assert.AreEqual (3, u.GetBytes ("\uDC00", 0, 1, bytes, 0), "#1-5"); // was bogus
1117 Assert.AreEqual (3, u.GetBytes ("\uFFFD").Length, "#2-1");
1118 Assert.AreEqual (3, u.GetBytes ("\uD800").Length, "#2-2");
1119 Assert.AreEqual (3, u.GetBytes ("\uDC00").Length, "#2-3");
1120 Assert.AreEqual (4, u.GetBytes ("\uD800\uDC00").Length, "#2-4");
1122 for (char c = char.MinValue; c < char.MaxValue; c++) {
1124 bIn = u.GetBytes (c.ToString ());
1128 new UTF8Encoding (false, true).GetBytes (new char [] {'\uDF45', '\uD808'}, 0, 2);
1129 Assert.Fail ("EncoderFallbackException is expected");
1130 } catch (EncoderFallbackException) {
1135 [Test] // bug #77550
1136 public void DecoderFallbackSimple ()
1138 UTF8Encoding e = new UTF8Encoding (false, false);
1139 AssertType.AreEqual (1, e.GetDecoder ().GetCharCount (
1140 new byte [] {(byte) 183}, 0, 1),
1142 AssertType.AreEqual (1, e.GetDecoder().GetChars (
1143 new byte [] {(byte) 183}, 0, 1,
1146 AssertType.AreEqual (1, e.GetString (new byte [] {(byte) 183}).Length,
1151 public void FallbackDefaultEncodingUTF8 ()
1153 DecoderReplacementFallbackBuffer b =
1154 Encoding.UTF8.DecoderFallback.CreateFallbackBuffer ()
1155 as DecoderReplacementFallbackBuffer;
1156 AssertType.IsTrue (b.Fallback (new byte [] {}, 0), "#1");
1157 AssertType.IsFalse (b.MovePrevious (), "#2");
1158 AssertType.AreEqual (1, b.Remaining, "#3");
1159 AssertType.AreEqual ('\uFFFD', b.GetNextChar (), "#4");
1163 [Category ("MobileNotWorking")]
1164 public void Bug415628 ()
1166 using (var f = File.Open ("Test/resources/415628.bin", FileMode.Open)) {
1167 BinaryReader br = new BinaryReader (f);
1168 byte [] buf = br.ReadBytes (8000);
1169 Encoding.UTF8.GetString(buf);
1175 [ExpectedException (typeof (ArgumentException))]
1176 public void Bug10788()
1178 byte[] bytes = new byte[4096];
1179 char[] chars = new char[10];
1181 Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 4096, chars, 9, false);
1185 public void Bug10789()
1187 byte[] bytes = new byte[4096];
1188 char[] chars = new char[10];
1191 Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 1, chars, 10, false);
1192 Assert.Fail ("ArgumentException is expected #1");
1193 } catch (ArgumentException) {
1197 Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 1, chars, 11, false);
1198 Assert.Fail ("ArgumentOutOfRangeException is expected #2");
1199 } catch (ArgumentOutOfRangeException) {
1202 int charactersWritten = Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 0, chars, 10, false);
1203 Assert.AreEqual (0, charactersWritten, "#3");
1206 // DecoderFallbackExceptionTest
1207 // This struct describes a DecoderFallbackExceptions test. It
1208 // contains the expected indexes (eindex) and bad-bytes lengths
1209 // (elen) delivered by the first and subsequent
1210 // DecoderFallbackException throwed when the utf8 conversion routines
1211 // are exposed by the array of bytes (bytes) contained in this test.
1212 // It also has a nice description (description) for documentation and
1215 // The hardcoded 'eindex' and 'elen' info is the output that you will
1216 // got if you run this strings on the MS.NET platform.
1217 struct DecoderFallbackExceptionTest
1219 public string description;
1220 public byte [] bytes;
1221 public int [] eindex;
1223 public DecoderFallbackExceptionTest (
1229 this.description = description;
1231 if (eindex.Length != elen.Length)
1232 throw new ApplicationException ("eindex.Length != elen.Length in test '" + description + "'");
1233 this.eindex = eindex;
1238 // try to convert the all current test's bytes with Getchars()
1240 private void DecoderFallbackExceptions_GetChars (
1244 DecoderFallbackExceptionTest t)
1247 dec.GetChars (t.bytes, 0, t.bytes.Length, chars, 0, true);
1249 t.eindex.Length == 0,
1251 "test#{0}-1: UNEXPECTED SUCCESS",
1253 } catch(DecoderFallbackException ex) {
1255 t.eindex.Length > 0,
1257 "test#{0}-1: UNEXPECTED FAIL",
1260 ex.Index == t.eindex[0],
1262 "test#{0}-1: Expected exception at {1} not {2}.",
1267 ex.BytesUnknown.Length == t.elen[0],
1269 "test#{0}-1: Expected BytesUnknown.Length of {1} not {2}.",
1272 ex.BytesUnknown.Length));
1273 for (int i = 0; i < ex.BytesUnknown.Length; i++)
1275 ex.BytesUnknown[i] == t.bytes[ex.Index + i],
1277 "test#{0}-1: expected byte {1:X} not {2:X} at {3}.",
1279 t.bytes[ex.Index + i],
1286 // convert bytes to string using a fixed blocksize.
1287 // If something bad happens, try to recover using the
1288 // DecoderFallbackException info.
1289 private void DecoderFallbackExceptions_Convert (
1293 DecoderFallbackExceptionTest t,
1296 int charsUsed, bytesUsed;
1299 int ce = 0; // current exception
1300 for (int c = 0; c < t.bytes.Length; ) {
1302 int bu = c + block_size > t.bytes.Length
1303 ? t.bytes.Length - c
1307 chars, 0, chars.Length,
1308 c + bu >= t.bytes.Length,
1309 out bytesUsed, out charsUsed,
1312 } catch(DecoderFallbackException ex) {
1314 t.eindex.Length > ce,
1316 "test#{0}-2-{1}#{2}: UNEXPECTED FAIL (c={3}, eIndex={4}, eBytesUnknwon={5})",
1317 testno, block_size, ce, c,
1319 ex.BytesUnknown.Length));
1321 ex.Index + c == t.eindex[ce],
1323 "test#{0}-2-{1}#{2}: Expected at {3} not {4}.",
1324 testno, block_size, ce,
1328 ex.BytesUnknown.Length == t.elen[ce],
1330 "test#{0}-2-{1}#{2}: Expected BytesUnknown.Length of {3} not {4} @{5}.",
1331 testno, block_size, ce,
1332 t.elen[0], ex.BytesUnknown.Length, c));
1333 for (int i = 0; i < ex.BytesUnknown.Length; i++)
1335 ex.BytesUnknown[i] == t.bytes[ex.Index + i + c],
1337 "test#{0}-2-{1}#{2}: Expected byte {3:X} not {4:X} at {5}.",
1338 testno, block_size, ce,
1339 t.bytes[ex.Index + i + c],
1342 c += ex.BytesUnknown.Length + ex.Index;
1349 t.eindex.Length <= ce,
1351 "test#{0}-2-{1}: UNEXPECTED SUCCESS",
1352 testno, block_size));
1356 public void DecoderFallbackExceptions ()
1359 DecoderFallbackExceptionTest [] tests = new DecoderFallbackExceptionTest []
1362 new DecoderFallbackExceptionTest (
1363 "Greek word 'kosme'",
1367 0xce, 0xba, 0xe1, 0xbd, 0xb9, 0xcf,
1368 0x83, 0xce, 0xbc, 0xce, 0xb5 }),
1370 new DecoderFallbackExceptionTest (
1371 "First possible sequence of 1 byte",
1374 new byte [] { 0x00 }),
1376 new DecoderFallbackExceptionTest (
1377 "First possible sequence of 2 bytes",
1380 new byte [] { 0xc2, 0x80 }),
1382 new DecoderFallbackExceptionTest (
1383 "First possible sequence of 3 bytes",
1386 new byte [] { 0xe0, 0xa0, 0x80 }),
1388 new DecoderFallbackExceptionTest (
1389 "First possible sequence of 4 bytes",
1392 new byte [] { 0xf0, 0x90, 0x80, 0x80 }),
1394 new DecoderFallbackExceptionTest (
1395 "First possible sequence of 5 bytes",
1396 new int [] { 0, 1, 2, 3, 4 },
1397 new int [] { 1, 1, 1, 1, 1 },
1398 new byte [] { 0xf8, 0x88, 0x80, 0x80, 0x80 }),
1400 new DecoderFallbackExceptionTest (
1401 "First possible sequence of 6 bytes",
1402 new int [] { 0, 1, 2, 3, 4, 5 },
1403 new int [] { 1, 1, 1, 1, 1, 1 },
1405 0xfc, 0x84, 0x80, 0x80, 0x80, 0x80 }),
1407 new DecoderFallbackExceptionTest (
1408 "Last possible sequence of 1 byte",
1411 new byte [] { 0x7f }),
1413 new DecoderFallbackExceptionTest (
1414 "Last possible sequence of 2 bytes",
1417 new byte [] { 0xdf, 0xbf }),
1419 new DecoderFallbackExceptionTest (
1420 "Last possible sequence of 3 bytes",
1423 new byte [] { 0xef, 0xbf, 0xbf }),
1425 new DecoderFallbackExceptionTest (
1426 "Last possible sequence of 4 bytes",
1427 new int [] { 0, 1, 2, 3 },
1428 new int [] { 1, 1, 1, 1 },
1429 new byte [] { 0xf7, 0xbf, 0xbf, 0xbf }),
1431 new DecoderFallbackExceptionTest (
1432 "Last possible sequence of 5 bytes",
1433 new int [] { 0, 1, 2, 3, 4 },
1434 new int [] { 1, 1, 1, 1, 1 },
1435 new byte [] { 0xfb, 0xbf, 0xbf, 0xbf, 0xbf }),
1437 new DecoderFallbackExceptionTest (
1438 "Last possible sequence of 6 bytes",
1439 new int [] { 0, 1, 2, 3, 4, 5 },
1440 new int [] { 1, 1, 1, 1, 1, 1 },
1441 new byte [] { 0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf }),
1443 new DecoderFallbackExceptionTest (
1444 "U-0000D7FF = ed 9f bf",
1447 new byte [] { 0xed, 0x9f, 0xbf }),
1449 new DecoderFallbackExceptionTest (
1450 "U-0000E000 = ee 80 80",
1453 new byte [] { 0xee, 0x80, 0x80 }),
1455 new DecoderFallbackExceptionTest (
1456 "U-0000FFFD = ef bf bd",
1459 new byte [] { 0xef, 0xbf, 0xbd }),
1461 new DecoderFallbackExceptionTest (
1462 "U-0010FFFF = f4 8f bf bf",
1465 new byte [] { 0xf4, 0x8f, 0xbf, 0xbf }),
1467 new DecoderFallbackExceptionTest (
1468 "U-00110000 = f4 90 80 80",
1469 new int [] { 0, 2, 3 },
1470 new int [] { 2, 1, 1 },
1471 new byte [] { 0xf4, 0x90, 0x80, 0x80 }),
1473 new DecoderFallbackExceptionTest (
1474 "First continuation byte 0x80",
1477 new byte [] { 0x80 }),
1479 new DecoderFallbackExceptionTest (
1480 "Last continuation byte 0xbf",
1483 new byte [] { 0xbf }),
1485 new DecoderFallbackExceptionTest (
1486 "2 continuation bytes",
1487 new int [] { 0, 1 },
1488 new int [] { 1, 1 },
1489 new byte [] { 0x80, 0xbf }),
1491 new DecoderFallbackExceptionTest (
1492 "3 continuation bytes",
1493 new int [] { 0, 1, 2 },
1494 new int [] { 1, 1, 1 },
1495 new byte [] { 0x80, 0xbf, 0x80 }),
1497 new DecoderFallbackExceptionTest (
1498 "4 continuation bytes",
1499 new int [] { 0, 1, 2, 3 },
1500 new int [] { 1, 1, 1, 1 },
1501 new byte [] { 0x80, 0xbf, 0x80, 0xbf }),
1503 new DecoderFallbackExceptionTest (
1504 "5 continuation bytes",
1505 new int [] { 0, 1, 2, 3, 4 },
1506 new int [] { 1, 1, 1, 1, 1 },
1507 new byte [] { 0x80, 0xbf, 0x80, 0xbf, 0x80 }),
1509 new DecoderFallbackExceptionTest (
1510 "6 continuation bytes",
1511 new int [] { 0, 1, 2, 3, 4, 5 },
1512 new int [] { 1, 1, 1, 1, 1, 1 },
1514 0x80, 0xbf, 0x80, 0xbf, 0x80, 0xbf }),
1516 new DecoderFallbackExceptionTest (
1517 "7 continuation bytes",
1518 new int [] { 0, 1, 2, 3, 4, 5, 6 },
1519 new int [] { 1, 1, 1, 1, 1, 1, 1 },
1521 0x80, 0xbf, 0x80, 0xbf, 0x80, 0xbf,
1524 new DecoderFallbackExceptionTest (
1525 "Sequence of all 64 continuation bytes",
1527 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
1528 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
1529 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
1530 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
1531 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
1532 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
1535 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1536 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1537 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1538 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1539 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1540 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1543 0x80, 0x81, 0x82, 0x83, 0x84, 0x85,
1544 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b,
1545 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91,
1546 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
1547 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d,
1548 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3,
1549 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9,
1550 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
1551 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5,
1552 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb,
1553 0xbc, 0xbd, 0xbe, 0xbf }),
1555 new DecoderFallbackExceptionTest (
1556 "All 32 first bytes of 2-byte sequences (0xc0-0xdf), each followed by a space character",
1574 0xc0, 0x20, 0xc1, 0x20, 0xc2, 0x20,
1575 0xc3, 0x20, 0xc4, 0x20, 0xc5, 0x20,
1576 0xc6, 0x20, 0xc7, 0x20, 0xc8, 0x20,
1577 0xc9, 0x20, 0xca, 0x20, 0xcb, 0x20,
1578 0xcc, 0x20, 0xcd, 0x20, 0xce, 0x20,
1579 0xcf, 0x20, 0xd0, 0x20, 0xd1, 0x20,
1580 0xd2, 0x20, 0xd3, 0x20, 0xd4, 0x20,
1581 0xd5, 0x20, 0xd6, 0x20, 0xd7, 0x20,
1582 0xd8, 0x20, 0xd9, 0x20, 0xda, 0x20,
1583 0xdb, 0x20, 0xdc, 0x20, 0xdd, 0x20,
1584 0xde, 0x20, 0xdf, 0x20 }),
1586 new DecoderFallbackExceptionTest (
1587 "All 16 first bytes of 3-byte sequences (0xe0-0xef), each followed by a space character",
1599 0xe0, 0x20, 0xe1, 0x20, 0xe2, 0x20,
1600 0xe3, 0x20, 0xe4, 0x20, 0xe5, 0x20,
1601 0xe6, 0x20, 0xe7, 0x20, 0xe8, 0x20,
1602 0xe9, 0x20, 0xea, 0x20, 0xeb, 0x20,
1603 0xec, 0x20, 0xed, 0x20, 0xee, 0x20,
1606 new DecoderFallbackExceptionTest (
1607 "All 8 first bytes of 4-byte sequences (0xf0-0xf7), each followed by a space character",
1608 new int [] { 0, 2, 4, 6, 8, 10, 12, 14 },
1609 new int [] { 1, 1, 1, 1, 1, 1, 1, 1 },
1611 0xf0, 0x20, 0xf1, 0x20, 0xf2, 0x20,
1612 0xf3, 0x20, 0xf4, 0x20, 0xf5, 0x20,
1613 0xf6, 0x20, 0xf7, 0x20 }),
1615 new DecoderFallbackExceptionTest (
1616 "All 4 first bytes of 5-byte sequences (0xf8-0xfb), each followed by a space character",
1617 new int [] { 0, 2, 4, 6 },
1618 new int [] { 1, 1, 1, 1 },
1620 0xf8, 0x20, 0xf9, 0x20, 0xfa, 0x20,
1623 new DecoderFallbackExceptionTest (
1624 "All 2 first bytes of 6-byte sequences (0xfc-0xfd), each followed by a space character",
1625 new int [] { 0, 2 },
1626 new int [] { 1, 1 },
1627 new byte [] { 0xfc, 0x20, 0xfd, 0x20 }),
1629 new DecoderFallbackExceptionTest (
1630 "2-byte sequence with last byte missing",
1633 new byte [] { 0xc0 }),
1635 new DecoderFallbackExceptionTest (
1636 "3-byte sequence with last byte missing",
1639 new byte [] { 0xe0, 0x80 }),
1641 new DecoderFallbackExceptionTest (
1642 "4-byte sequence with last byte missing",
1643 new int [] { 0, 2 },
1644 new int [] { 2, 1 },
1645 new byte [] { 0xf0, 0x80, 0x80 }),
1647 new DecoderFallbackExceptionTest (
1648 "5-byte sequence with last byte missing",
1649 new int [] { 0, 1, 2, 3 },
1650 new int [] { 1, 1, 1, 1 },
1651 new byte [] { 0xf8, 0x80, 0x80, 0x80 }),
1653 new DecoderFallbackExceptionTest (
1654 "6-byte sequence with last byte missing",
1655 new int [] { 0, 1, 2, 3, 4 },
1656 new int [] { 1, 1, 1, 1, 1 },
1657 new byte [] { 0xfc, 0x80, 0x80, 0x80, 0x80 }),
1659 new DecoderFallbackExceptionTest (
1660 "2-byte sequence with last byte missing",
1663 new byte [] { 0xdf }),
1665 new DecoderFallbackExceptionTest (
1666 "3-byte sequence with last byte missing",
1669 new byte [] { 0xef, 0xbf }),
1671 new DecoderFallbackExceptionTest (
1672 "4-byte sequence with last byte missing",
1673 new int [] { 0, 1, 2 },
1674 new int [] { 1, 1, 1 },
1675 new byte [] { 0xf7, 0xbf, 0xbf }),
1677 new DecoderFallbackExceptionTest (
1678 "5-byte sequence with last byte missing",
1679 new int [] { 0, 1, 2, 3 },
1680 new int [] { 1, 1, 1, 1 },
1681 new byte [] { 0xfb, 0xbf, 0xbf, 0xbf }),
1683 new DecoderFallbackExceptionTest (
1684 "6-byte sequence with last byte missing",
1685 new int [] { 0, 1, 2, 3, 4 },
1686 new int [] { 1, 1, 1, 1, 1 },
1687 new byte [] { 0xfd, 0xbf, 0xbf, 0xbf, 0xbf }),
1689 new DecoderFallbackExceptionTest (
1690 "All the 10 sequences of 3.3 concatenated",
1697 25, 26, 27, 28, 29 },
1706 0xc0, 0xe0, 0x80, 0xf0, 0x80, 0x80,
1707 0xf8, 0x80, 0x80, 0x80, 0xfc, 0x80,
1708 0x80, 0x80, 0x80, 0xdf, 0xef, 0xbf,
1709 0xf7, 0xbf, 0xbf, 0xfb, 0xbf, 0xbf,
1710 0xbf, 0xfd, 0xbf, 0xbf, 0xbf, 0xbf }),
1712 new DecoderFallbackExceptionTest (
1716 new byte [] { 0xfe }),
1718 new DecoderFallbackExceptionTest (
1722 new byte [] { 0xff }),
1724 new DecoderFallbackExceptionTest (
1725 "Bad chars fe fe ff ff",
1726 new int [] { 0, 1, 2, 3 },
1727 new int [] { 1, 1, 1, 1 },
1728 new byte [] { 0xfe, 0xfe, 0xff, 0xff }),
1730 new DecoderFallbackExceptionTest (
1731 "Overlong U+002F = c0 af",
1732 new int [] { 0, 1 },
1733 new int [] { 1, 1 },
1734 new byte [] { 0xc0, 0xaf }),
1736 new DecoderFallbackExceptionTest (
1737 "Overlong U+002F = e0 80 af",
1738 new int [] { 0, 2 },
1739 new int [] { 2, 1 },
1740 new byte [] { 0xe0, 0x80, 0xaf }),
1742 new DecoderFallbackExceptionTest (
1743 "Overlong U+002F = f0 80 80 af",
1744 new int [] { 0, 2, 3 },
1745 new int [] { 2, 1, 1 },
1746 new byte [] { 0xf0, 0x80, 0x80, 0xaf }),
1748 new DecoderFallbackExceptionTest (
1749 "Overlong U+002F = f8 80 80 80 af",
1750 new int [] { 0, 1, 2, 3, 4 },
1751 new int [] { 1, 1, 1, 1, 1 },
1752 new byte [] { 0xf8, 0x80, 0x80, 0x80, 0xaf }),
1754 new DecoderFallbackExceptionTest (
1755 "Overlong U+002F = fc 80 80 80 80 af",
1756 new int [] { 0, 1, 2, 3, 4, 5 },
1757 new int [] { 1, 1, 1, 1, 1, 1 },
1759 0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf }),
1761 new DecoderFallbackExceptionTest (
1762 "Maximum overlong U-0000007F",
1763 new int [] { 0, 1 },
1764 new int [] { 1, 1 },
1765 new byte [] { 0xc1, 0xbf }),
1767 new DecoderFallbackExceptionTest (
1768 "Maximum overlong U-000007FF",
1769 new int [] { 0, 2 },
1770 new int [] { 2, 1, },
1771 new byte [] { 0xe0, 0x9f, 0xbf }),
1773 new DecoderFallbackExceptionTest (
1774 "Maximum overlong U-0000FFFF",
1775 new int [] { 0, 2, 3 },
1776 new int [] { 2, 1, 1 },
1777 new byte [] { 0xf0, 0x8f, 0xbf, 0xbf }),
1779 new DecoderFallbackExceptionTest (
1780 "Maximum overlong U-001FFFFF",
1781 new int [] { 0, 1, 2, 3, 4 },
1782 new int [] { 1, 1, 1, 1, 1 },
1783 new byte [] { 0xf8, 0x87, 0xbf, 0xbf, 0xbf }),
1785 new DecoderFallbackExceptionTest (
1786 "Maximum overlong U-03FFFFFF",
1787 new int [] { 0, 1, 2, 3, 4, 5 },
1788 new int [] { 1, 1, 1, 1, 1, 1 },
1790 0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf }),
1792 new DecoderFallbackExceptionTest (
1793 "Null overlong c0 80",
1794 new int [] { 0, 1 },
1795 new int [] { 1, 1 },
1796 new byte [] { 0xc0, 0x80, 0x22 }),
1798 new DecoderFallbackExceptionTest (
1799 "Null overlong e0 80 80",
1800 new int [] { 0, 2 },
1801 new int [] { 2, 1 },
1802 new byte [] { 0xe0, 0x80, 0x80 }),
1804 new DecoderFallbackExceptionTest (
1805 "Null overlong f0 80 80 80",
1806 new int [] { 0, 2, 3 },
1807 new int [] { 2, 1, 1 },
1808 new byte [] { 0xf0, 0x80, 0x80, 0x80 }),
1810 new DecoderFallbackExceptionTest (
1811 "Null overlong f8 80 80 80 80",
1812 new int [] { 0, 1, 2, 3, 4 },
1813 new int [] { 1, 1, 1, 1, 1 },
1814 new byte [] { 0xf8, 0x80, 0x80, 0x80, 0x80 }),
1816 new DecoderFallbackExceptionTest (
1817 "Null overlong fc 80 80 80 80 80",
1818 new int [] { 0, 1, 2, 3, 4, 5 },
1819 new int [] { 1, 1, 1, 1, 1, 1 },
1821 0xfc, 0x80, 0x80, 0x80, 0x80, 0x80 }),
1823 new DecoderFallbackExceptionTest (
1824 "Single UTF-16 surrogate U+D800",
1825 new int [] { 0, 2 },
1826 new int [] { 2, 1 },
1827 new byte [] { 0xed, 0xa0, 0x80 }),
1829 new DecoderFallbackExceptionTest (
1830 "Single UTF-16 surrogate U+DB7F",
1831 new int [] { 0, 2 },
1832 new int [] { 2, 1 },
1833 new byte [] { 0xed, 0xad, 0xbf }),
1835 new DecoderFallbackExceptionTest (
1836 "Single UTF-16 surrogate U+DB80",
1837 new int [] { 0, 2 },
1838 new int [] { 2, 1 },
1839 new byte [] { 0xed, 0xae, 0x80 }),
1841 new DecoderFallbackExceptionTest (
1842 "Single UTF-16 surrogate U+DBFF",
1843 new int [] { 0, 2 },
1844 new int [] { 2, 1 },
1845 new byte [] { 0xed, 0xaf, 0xbf }),
1847 new DecoderFallbackExceptionTest (
1848 "Single UTF-16 surrogate U+DC00",
1849 new int [] { 0, 2 },
1850 new int [] { 2, 1 },
1851 new byte [] { 0xed, 0xb0, 0x80 }),
1853 new DecoderFallbackExceptionTest (
1854 "Single UTF-16 surrogate U+DF80",
1855 new int [] { 0, 2 },
1856 new int [] { 2, 1 },
1857 new byte [] { 0xed, 0xbe, 0x80 }),
1859 new DecoderFallbackExceptionTest (
1860 "Single UTF-16 surrogate U+DFFF",
1861 new int [] { 0, 2 },
1862 new int [] { 2, 1 },
1863 new byte [] { 0xed, 0xbf, 0xbf }),
1865 new DecoderFallbackExceptionTest (
1866 "Paired UTF-16 surrogate U+D800 U+DC00",
1867 new int [] { 0, 2, 3, 5 },
1868 new int [] { 2, 1, 2, 1 },
1870 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80 }),
1872 new DecoderFallbackExceptionTest (
1873 "Paired UTF-16 surrogate U+D800 U+DFFF",
1874 new int [] { 0, 2, 3, 5 },
1875 new int [] { 2, 1, 2, 1 },
1877 0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf }),
1879 new DecoderFallbackExceptionTest (
1880 "Paired UTF-16 surrogate U+DB7F U+DC00",
1881 new int [] { 0, 2, 3, 5 },
1882 new int [] { 2, 1, 2, 1 },
1884 0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80 }),
1886 new DecoderFallbackExceptionTest (
1887 "Paired UTF-16 surrogate U+DB7F U+DFFF",
1888 new int [] { 0, 2, 3, 5 },
1889 new int [] { 2, 1, 2, 1 },
1891 0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf }),
1893 new DecoderFallbackExceptionTest (
1894 "Paired UTF-16 surrogate U+DB80 U+DC00",
1895 new int [] { 0, 2, 3, 5 },
1896 new int [] { 2, 1, 2, 1 },
1898 0xed, 0xae, 0x80, 0xed, 0xb0, 0x80 }),
1900 new DecoderFallbackExceptionTest (
1901 "Paired UTF-16 surrogate U+DB80 U+DFFF",
1902 new int [] { 0, 2, 3, 5 },
1903 new int [] { 2, 1, 2, 1 },
1905 0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf }),
1907 new DecoderFallbackExceptionTest (
1908 "Paired UTF-16 surrogate U+DBFF U+DC00",
1909 new int [] { 0, 2, 3, 5 },
1910 new int [] { 2, 1, 2, 1 },
1912 0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80 }),
1914 new DecoderFallbackExceptionTest (
1915 "Paired UTF-16 surrogate U+DBFF U+DFFF",
1916 new int [] { 0, 2, 3, 5 },
1917 new int [] { 2, 1, 2, 1 },
1919 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf }),
1921 new DecoderFallbackExceptionTest (
1922 "Illegal code position U+FFFE",
1925 new byte [] { 0xef, 0xbf, 0xbe }),
1927 new DecoderFallbackExceptionTest (
1928 "Illegal code position U+FFFF",
1931 new byte [] { 0xef, 0xbf, 0xbf }),
1933 Encoding utf8 = Encoding.GetEncoding (
1935 new EncoderExceptionFallback(),
1936 new DecoderExceptionFallback());
1937 Decoder dec = utf8.GetDecoder ();
1940 for(int t = 0; t < tests.Length; t++) {
1941 chars = new char [utf8.GetMaxCharCount (tests[t].bytes.Length)];
1943 // #1 complete conversion
1944 DecoderFallbackExceptions_GetChars (chars, t, dec, tests[t]);
1946 // #2 convert with several block_sizes
1947 for (int bs = 1; bs < tests[t].bytes.Length; bs++)
1948 DecoderFallbackExceptions_Convert (chars, t, dec, tests[t], bs);