2 // UTF8EncodingTest.cs - NUnit Test Cases for System.Text.UTF8Encoding
5 // Patrick Kalkman kalkman@cistron.nl
6 // Sebastien Pouliot (spouliot@motus.com)
8 // (C) 2003 Patrick Kalkman
9 // (C) 2004 Novell (http://www.novell.com)
12 using NUnit.Framework;
18 using DecoderException = System.Text.DecoderFallbackException;
20 using DecoderException = System.ArgumentException;
23 using AssertType = NUnit.Framework.Assert;
25 namespace MonoTests.System.Text
28 public class UTF8EncodingTest
30 private UTF8Encoding utf8;
35 utf8 = new UTF8Encoding (true, true);
39 public void IsBrowserDisplay ()
41 Assert.IsTrue (utf8.IsBrowserDisplay);
45 public void IsBrowserSave ()
47 Assert.IsTrue (utf8.IsBrowserSave);
51 public void IsMailNewsDisplay ()
53 Assert.IsTrue (utf8.IsMailNewsDisplay);
57 public void IsMailNewsSave ()
59 Assert.IsTrue (utf8.IsMailNewsSave);
63 public void TestCompat ()
65 Assert.IsTrue (new UTF8Encoding ().Equals (new UTF8Encoding ()));
69 public void TestEncodingGetBytes1()
71 UTF8Encoding utf8Enc = new UTF8Encoding ();
72 string UniCode = "\u0041\u2262\u0391\u002E";
74 // "A<NOT IDENTICAL TO><ALPHA>." may be encoded as 41 E2 89 A2 CE 91 2E
76 byte[] utf8Bytes = utf8Enc.GetBytes (UniCode);
78 Assert.AreEqual (0x41, utf8Bytes [0], "UTF #1");
79 Assert.AreEqual (0xE2, utf8Bytes [1], "UTF #2");
80 Assert.AreEqual (0x89, utf8Bytes [2], "UTF #3");
81 Assert.AreEqual (0xA2, utf8Bytes [3], "UTF #4");
82 Assert.AreEqual (0xCE, utf8Bytes [4], "UTF #5");
83 Assert.AreEqual (0x91, utf8Bytes [5], "UTF #6");
84 Assert.AreEqual (0x2E, utf8Bytes [6], "UTF #7");
88 public void TestEncodingGetBytes2()
90 UTF8Encoding utf8Enc = new UTF8Encoding ();
91 string UniCode = "\u0048\u0069\u0020\u004D\u006F\u006D\u0020\u263A\u0021";
93 // "Hi Mom <WHITE SMILING FACE>!" may be encoded as 48 69 20 4D 6F 6D 20 E2 98 BA 21
95 byte[] utf8Bytes = new byte [11];
97 int ByteCnt = utf8Enc.GetBytes (UniCode.ToCharArray(), 0, UniCode.Length, utf8Bytes, 0);
98 Assert.AreEqual (11, ByteCnt, "UTF #1");
99 Assert.AreEqual (0x48, utf8Bytes [0], "UTF #2");
100 Assert.AreEqual (0x69, utf8Bytes [1], "UTF #3");
101 Assert.AreEqual (0x20, utf8Bytes [2], "UTF #4");
102 Assert.AreEqual (0x4D, utf8Bytes [3], "UTF #5");
103 Assert.AreEqual (0x6F, utf8Bytes [4], "UTF #6");
104 Assert.AreEqual (0x6D, utf8Bytes [5], "UTF #7");
105 Assert.AreEqual (0x20, utf8Bytes [6], "UTF #8");
106 Assert.AreEqual (0xE2, utf8Bytes [7], "UTF #9");
107 Assert.AreEqual (0x98, utf8Bytes [8], "UTF #10");
108 Assert.AreEqual (0xBA, utf8Bytes [9], "UTF #11");
109 Assert.AreEqual (0x21, utf8Bytes [10], "UTF #12");
113 public void TestDecodingGetChars1()
115 UTF8Encoding utf8Enc = new UTF8Encoding ();
116 // 41 E2 89 A2 CE 91 2E may be decoded as "A<NOT IDENTICAL TO><ALPHA>."
118 byte[] utf8Bytes = new byte [] {0x41, 0xE2, 0x89, 0xA2, 0xCE, 0x91, 0x2E};
119 char[] UniCodeChars = utf8Enc.GetChars(utf8Bytes);
121 Assert.AreEqual (0x0041, UniCodeChars [0], "UTF #1");
122 Assert.AreEqual (0x2262, UniCodeChars [1], "UTF #2");
123 Assert.AreEqual (0x0391, UniCodeChars [2], "UTF #3");
124 Assert.AreEqual (0x002E, UniCodeChars [3], "UTF #4");
129 [Category ("NotWorking")]
131 public void TestMaxCharCount()
133 UTF8Encoding UTF8enc = new UTF8Encoding ();
135 // hmm, where is this extra 1 coming from?
136 Assert.AreEqual (51, UTF8enc.GetMaxCharCount(50), "UTF #1");
138 Assert.AreEqual (50, UTF8enc.GetMaxCharCount(50), "UTF #1");
144 [Category ("NotWorking")]
146 public void TestMaxByteCount()
148 UTF8Encoding UTF8enc = new UTF8Encoding ();
150 // maybe under .NET 2.0 insufficient surrogate pair is
151 // just not handled, and 3 is Preamble size.
152 Assert.AreEqual (153, UTF8enc.GetMaxByteCount(50), "UTF #1");
154 Assert.AreEqual (200, UTF8enc.GetMaxByteCount(50), "UTF #1");
158 // regression for bug #59648
160 public void TestThrowOnInvalid ()
162 UTF8Encoding u = new UTF8Encoding (true, false);
164 byte[] data = new byte [] { 0xC0, 0xAF };
166 Assert.AreEqual (2, u.GetCharCount (data), "#A0");
167 string s = u.GetString (data);
168 Assert.AreEqual ("\uFFFD\uFFFD", s, "#A1");
170 Assert.AreEqual (0, u.GetCharCount (data), "#A0");
171 string s = u.GetString (data);
172 Assert.AreEqual (String.Empty, s, "#A1");
175 data = new byte [] { 0x30, 0x31, 0xC0, 0xAF, 0x30, 0x32 };
176 s = u.GetString (data);
178 Assert.AreEqual (6, s.Length, "#B1");
179 Assert.AreEqual (0x30, (int) s [0], "#B2");
180 Assert.AreEqual (0x31, (int) s [1], "#B3");
181 Assert.AreEqual (0xFFFD, (int) s [2], "#B4");
182 Assert.AreEqual (0xFFFD, (int) s [3], "#B5");
183 Assert.AreEqual (0x30, (int) s [4], "#B6");
184 Assert.AreEqual (0x32, (int) s [5], "#B7");
186 Assert.AreEqual (4, s.Length, "#B1");
187 Assert.AreEqual (0x30, (int) s [0], "#B2");
188 Assert.AreEqual (0x31, (int) s [1], "#B3");
189 Assert.AreEqual (0x30, (int) s [2], "#B4");
190 Assert.AreEqual (0x32, (int) s [3], "#B5");
194 // UTF8 decoding tests from http://www.cl.cam.ac.uk/~mgk25/
197 public void T1_Correct_GreekWord_kosme ()
199 byte[] data = { 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5 };
200 string s = utf8.GetString (data);
201 // cute but saving source code in unicode can be problematic
202 // so we just ensure we can re-encode this
203 Assert.AreEqual (BitConverter.ToString (data), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted");
207 public void T2_Boundary_1_FirstPossibleSequence_Pass ()
209 byte[] data211 = { 0x00 };
210 string s = utf8.GetString (data211);
211 Assert.AreEqual ("\0", s, "1 byte (U-00000000)");
212 Assert.AreEqual (BitConverter.ToString (data211), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-1");
214 byte[] data212 = { 0xC2, 0x80 };
215 s = utf8.GetString (data212);
216 Assert.AreEqual (128, s [0], "2 bytes (U-00000080)");
217 Assert.AreEqual (BitConverter.ToString (data212), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-2");
219 byte[] data213 = { 0xE0, 0xA0, 0x80 };
220 s = utf8.GetString (data213);
221 Assert.AreEqual (2048, s [0], "3 bytes (U-00000800)");
222 Assert.AreEqual (BitConverter.ToString (data213), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-3");
224 byte[] data214 = { 0xF0, 0x90, 0x80, 0x80 };
225 s = utf8.GetString (data214);
226 Assert.AreEqual (55296, s [0], "4 bytes (U-00010000)-0");
227 Assert.AreEqual (56320, s [1], "4 bytes (U-00010000)-1");
228 Assert.AreEqual (BitConverter.ToString (data214), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-4");
233 [ExpectedException (typeof (DecoderException))]
234 public void T2_Boundary_1_FirstPossibleSequence_Fail_5 ()
236 byte[] data215 = { 0xF8, 0x88, 0x80, 0x80, 0x80 };
237 string s = utf8.GetString (data215);
238 Assert.IsNull (s, "5 bytes (U-00200000)");
239 Assert.AreEqual (BitConverter.ToString (data215), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-5");
244 [ExpectedException (typeof (DecoderException))]
245 public void T2_Boundary_1_FirstPossibleSequence_Fail_6 ()
247 byte[] data216 = { 0xFC, 0x84, 0x80, 0x80, 0x80, 0x80 };
248 string s = utf8.GetString (data216);
249 Assert.IsNull (s, "6 bytes (U-04000000)");
250 Assert.AreEqual (BitConverter.ToString (data216), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-6");
254 public void T2_Boundary_2_LastPossibleSequence_Pass ()
256 byte[] data221 = { 0x7F };
257 string s = utf8.GetString (data221);
258 Assert.AreEqual (127, s [0], "1 byte (U-0000007F)");
259 Assert.AreEqual (BitConverter.ToString (data221), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-1");
261 byte[] data222 = { 0xDF, 0xBF };
262 s = utf8.GetString (data222);
263 Assert.AreEqual (2047, s [0], "2 bytes (U-000007FF)");
264 Assert.AreEqual (BitConverter.ToString (data222), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-2");
266 byte[] data223 = { 0xEF, 0xBF, 0xBF };
267 s = utf8.GetString (data223);
268 Assert.AreEqual (65535, s [0], "3 bytes (U-0000FFFF)");
269 Assert.AreEqual (BitConverter.ToString (data223), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-3");
275 [ExpectedException (typeof (DecoderException))]
276 public void T2_Boundary_2_LastPossibleSequence_Fail_4 ()
278 byte[] data224 = { 0x7F, 0xBF, 0xBF, 0xBF };
279 string s = utf8.GetString (data224);
280 Assert.IsNull (s, "4 bytes (U-001FFFFF)");
281 Assert.AreEqual (BitConverter.ToString (data224), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-4");
286 [ExpectedException (typeof (DecoderException))]
287 public void T2_Boundary_2_LastPossibleSequence_Fail_5 ()
289 byte[] data225 = { 0xFB, 0xBF, 0xBF, 0xBF, 0xBF };
290 string s = utf8.GetString (data225);
291 Assert.IsNull (s, "5 bytes (U-03FFFFFF)");
292 Assert.AreEqual (BitConverter.ToString (data225), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-5");
297 [ExpectedException (typeof (DecoderException))]
298 public void T2_Boundary_2_LastPossibleSequence_Fail_6 ()
300 byte[] data226 = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF, 0xBF };
301 string s = utf8.GetString (data226);
302 Assert.IsNull (s, "6 bytes (U-7FFFFFFF)");
303 Assert.AreEqual (BitConverter.ToString (data226), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-6");
307 public void T2_Boundary_3_Other_Pass ()
309 byte[] data231 = { 0xED, 0x9F, 0xBF };
310 string s = utf8.GetString (data231);
311 Assert.AreEqual (55295, s [0], "U-0000D7FF");
312 Assert.AreEqual (BitConverter.ToString (data231), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-1");
314 byte[] data232 = { 0xEE, 0x80, 0x80 };
315 s = utf8.GetString (data232);
316 Assert.AreEqual (57344, s [0], "U-0000E000");
317 Assert.AreEqual (BitConverter.ToString (data232), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-2");
319 byte[] data233 = { 0xEF, 0xBF, 0xBD };
320 s = utf8.GetString (data233);
321 Assert.AreEqual (65533, s [0], "U-0000FFFD");
322 Assert.AreEqual (BitConverter.ToString (data233), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-3");
324 byte[] data234 = { 0xF4, 0x8F, 0xBF, 0xBF };
325 s = utf8.GetString (data234);
326 Assert.AreEqual (56319, s [0], "U-0010FFFF-0");
327 Assert.AreEqual (57343, s [1], "U-0010FFFF-1");
328 Assert.AreEqual (BitConverter.ToString (data234), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-4");
333 [ExpectedException (typeof (DecoderException))]
334 public void T2_Boundary_3_Other_Fail_5 ()
336 byte[] data235 = { 0xF4, 0x90, 0x80, 0x80 };
337 string s = utf8.GetString (data235);
338 Assert.IsNull (s, "U-00110000");
339 Assert.AreEqual (BitConverter.ToString (data235), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-5");
343 [ExpectedException (typeof (DecoderException))]
344 public void T3_Malformed_1_UnexpectedContinuation_311 ()
346 byte[] data = { 0x80 };
347 string s = utf8.GetString (data);
348 // exception is "really" expected here
352 [ExpectedException (typeof (DecoderException))]
353 public void T3_Malformed_1_UnexpectedContinuation_312 ()
355 byte[] data = { 0xBF };
356 string s = utf8.GetString (data);
357 // exception is "really" expected here
361 [ExpectedException (typeof (DecoderException))]
362 public void T3_Malformed_1_UnexpectedContinuation_313 ()
364 byte[] data = { 0x80, 0xBF };
365 string s = utf8.GetString (data);
366 // exception is "really" expected here
370 [ExpectedException (typeof (DecoderException))]
371 public void T3_Malformed_1_UnexpectedContinuation_314 ()
373 byte[] data = { 0x80, 0xBF, 0x80 };
374 string s = utf8.GetString (data);
375 // exception is "really" expected here
379 [ExpectedException (typeof (DecoderException))]
380 public void T3_Malformed_1_UnexpectedContinuation_315 ()
382 byte[] data = { 0x80, 0xBF, 0x80, 0xBF };
383 string s = utf8.GetString (data);
384 // exception is "really" expected here
388 [ExpectedException (typeof (DecoderException))]
389 public void T3_Malformed_1_UnexpectedContinuation_316 ()
391 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80 };
392 string s = utf8.GetString (data);
393 // exception is "really" expected here
397 [ExpectedException (typeof (DecoderException))]
398 public void T3_Malformed_1_UnexpectedContinuation_317 ()
400 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF };
401 string s = utf8.GetString (data);
402 // exception is "really" expected here
406 [ExpectedException (typeof (DecoderException))]
407 public void T3_Malformed_1_UnexpectedContinuation_318 ()
409 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 };
410 string s = utf8.GetString (data);
411 // exception is "really" expected here
415 [ExpectedException (typeof (DecoderException))]
416 public void T3_Malformed_1_UnexpectedContinuation_319 ()
418 // 64 different continuation characters
420 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
421 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
422 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
423 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF };
424 string s = utf8.GetString (data);
425 // exception is "really" expected here
429 [ExpectedException (typeof (DecoderException))]
430 public void T3_Malformed_2_LonelyStart_321 ()
433 0xC0, 0x20, 0xC1, 0x20, 0xC2, 0x20, 0xC3, 0x20, 0xC4, 0x20, 0xC5, 0x20, 0xC6, 0x20, 0xC7, 0x20,
434 0xC8, 0x20, 0xC9, 0x20, 0xCA, 0x20, 0xCB, 0x20, 0xCC, 0x20, 0xCD, 0x20, 0xCE, 0x20, 0xCF, 0x20,
435 0xD0, 0x20, 0xD1, 0x20, 0xD2, 0x20, 0xD3, 0x20, 0xD4, 0x20, 0xD5, 0x20, 0xD6, 0x20, 0xD7, 0x20,
436 0xD8, 0x20, 0xD9, 0x20, 0xDA, 0x20, 0xDB, 0x20, 0xDC, 0x20, 0xDD, 0x20, 0xDE, 0x20, 0xDF, 0x20 };
437 string s = utf8.GetString (data);
438 // exception is "really" expected here
442 [ExpectedException (typeof (DecoderException))]
443 public void T3_Malformed_2_LonelyStart_322 ()
446 0xE0, 0x20, 0xE1, 0x20, 0xE2, 0x20, 0xE3, 0x20, 0xE4, 0x20, 0xE5, 0x20, 0xE6, 0x20, 0xE7, 0x20,
447 0xE8, 0x20, 0xE9, 0x20, 0xEA, 0x20, 0xEB, 0x20, 0xEC, 0x20, 0xED, 0x20, 0xEE, 0x20, 0xEF, 0x20 };
448 string s = utf8.GetString (data);
449 // exception is "really" expected here
453 [ExpectedException (typeof (DecoderException))]
454 public void T3_Malformed_2_LonelyStart_323 ()
456 byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
457 string s = utf8.GetString (data);
458 // exception is "really" expected here
462 [ExpectedException (typeof (DecoderException))]
463 public void T3_Malformed_2_LonelyStart_324 ()
465 byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
466 string s = utf8.GetString (data);
467 // exception is "really" expected here
471 [ExpectedException (typeof (DecoderException))]
472 public void T3_Malformed_2_LonelyStart_325 ()
474 byte[] data = { 0xFC, 0x20, 0xFD, 0x20 };
475 string s = utf8.GetString (data);
476 // exception is "really" expected here
480 [ExpectedException (typeof (DecoderException))]
481 public void T3_Malformed_3_LastContinuationMissing_331 ()
483 byte[] data = { 0xC0 };
484 string s = utf8.GetString (data);
485 // exception is "really" expected here
489 [ExpectedException (typeof (DecoderException))]
490 public void T3_Malformed_3_LastContinuationMissing_332 ()
492 byte[] data = { 0xE0, 0x80 };
493 string s = utf8.GetString (data);
494 // exception is "really" expected here
498 [ExpectedException (typeof (DecoderException))]
499 public void T3_Malformed_3_LastContinuationMissing_333 ()
501 byte[] data = { 0xF0, 0x80, 0x80 };
502 string s = utf8.GetString (data);
503 // exception is "really" expected here
507 [ExpectedException (typeof (DecoderException))]
508 public void T3_Malformed_3_LastContinuationMissing_334 ()
510 byte[] data = { 0xF8, 0x80, 0x80, 0x80 };
511 string s = utf8.GetString (data);
512 // exception is "really" expected here
516 [ExpectedException (typeof (DecoderException))]
517 public void T3_Malformed_3_LastContinuationMissing_335 ()
519 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80 };
520 string s = utf8.GetString (data);
521 // exception is "really" expected here
525 // MS Fx 1.1 accept this
526 // [ExpectedException (typeof (DecoderException))]
527 public void T3_Malformed_3_LastContinuationMissing_336 ()
529 byte[] data = { 0xDF };
531 string s = utf8.GetString (data);
532 // exception is "really" expected here
533 Assert.AreEqual (String.Empty, s, "MS FX 1.1 behaviour");
535 catch (DecoderException) {
536 // but Mono doesn't - better stick to the standard
541 // MS Fx 1.1 accept this
542 // [ExpectedException (typeof (DecoderException))]
543 public void T3_Malformed_3_LastContinuationMissing_337 ()
545 byte[] data = { 0xEF, 0xBF };
547 string s = utf8.GetString (data);
548 // exception is "really" expected here
549 Assert.AreEqual (String.Empty, s, "MS FX 1.1 behaviour");
551 catch (DecoderException) {
552 // but Mono doesn't - better stick to the standard
557 [ExpectedException (typeof (DecoderException))]
558 public void T3_Malformed_3_LastContinuationMissing_338 ()
560 byte[] data = { 0xF7, 0xBF, 0xBF };
561 string s = utf8.GetString (data);
562 // exception is "really" expected here
566 [ExpectedException (typeof (DecoderException))]
567 public void T3_Malformed_3_LastContinuationMissing_339 ()
569 byte[] data = { 0xF, 0xBF, 0xBF, 0xBF };
570 string s = utf8.GetString (data);
571 // exception is "really" expected here
575 [ExpectedException (typeof (DecoderException))]
576 public void T3_Malformed_3_LastContinuationMissing_3310 ()
578 byte[] data = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
579 string s = utf8.GetString (data);
580 // exception is "really" expected here
584 [ExpectedException (typeof (DecoderException))]
585 public void T3_Malformed_4_ConcatenationImcomplete ()
588 0xC0, 0xE0, 0x80, 0xF0, 0x80, 0x80, 0xF8, 0x80, 0x80, 0x80, 0xFC, 0x80, 0x80, 0x80, 0x80, 0xDF,
589 0xEF, 0xBF, 0xF7, 0xBF, 0xBF, 0xFB, 0xBF, 0xBF, 0xBF, 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
590 string s = utf8.GetString (data);
591 // exception is "really" expected here
595 [ExpectedException (typeof (DecoderException))]
596 public void T3_Malformed_5_ImpossibleBytes_351 ()
598 byte[] data = { 0xFE };
599 string s = utf8.GetString (data);
600 // exception is "really" expected here
604 [ExpectedException (typeof (DecoderException))]
605 public void T3_Malformed_5_ImpossibleBytes_352 ()
607 byte[] data = { 0xFF };
608 string s = utf8.GetString (data);
609 // exception is "really" expected here
613 [ExpectedException (typeof (DecoderException))]
614 public void T3_Malformed_5_ImpossibleBytes_353 ()
616 byte[] data = { 0xFE, 0xFE, 0xFF, 0xFF };
617 string s = utf8.GetString (data);
618 // exception is "really" expected here
621 // Overlong == dangereous -> "safe" decoder should reject them
624 [ExpectedException (typeof (DecoderException))]
625 public void T4_Overlong_1_ASCII_Slash_411 ()
627 byte[] data = { 0xC0, 0xAF };
628 string s = utf8.GetString (data);
629 // exception is "really" expected here
633 [ExpectedException (typeof (DecoderException))]
634 public void T4_Overlong_1_ASCII_Slash_412 ()
636 byte[] data = { 0xE0, 0x80, 0xAF };
637 string s = utf8.GetString (data);
638 // exception is "really" expected here
642 [ExpectedException (typeof (DecoderException))]
643 public void T4_Overlong_1_ASCII_Slash_413 ()
645 byte[] data = { 0xF0, 0x80, 0x80, 0xAF };
646 string s = utf8.GetString (data);
647 // exception is "really" expected here
651 [ExpectedException (typeof (DecoderException))]
652 public void T4_Overlong_1_ASCII_Slash_414 ()
654 byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0xAF };
655 string s = utf8.GetString (data);
656 // exception is "really" expected here
660 [ExpectedException (typeof (DecoderException))]
661 public void T4_Overlong_1_ASCII_Slash_415 ()
663 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0xAF };
664 string s = utf8.GetString (data);
665 // exception is "really" expected here
669 [ExpectedException (typeof (DecoderException))]
670 public void T4_Overlong_2_MaximumBoundary_421 ()
672 byte[] data = { 0xC1, 0xBF };
673 string s = utf8.GetString (data);
674 // exception is "really" expected here
678 [ExpectedException (typeof (DecoderException))]
679 public void T4_Overlong_2_MaximumBoundary_422 ()
681 byte[] data = { 0xE0, 0x9F, 0xBF };
682 string s = utf8.GetString (data);
683 // exception is "really" expected here
687 [ExpectedException (typeof (DecoderException))]
688 public void T4_Overlong_2_MaximumBoundary_423 ()
690 byte[] data = { 0xF0, 0x8F, 0xBF, 0xBF };
691 string s = utf8.GetString (data);
692 // exception is "really" expected here
696 [ExpectedException (typeof (DecoderException))]
697 public void T4_Overlong_2_MaximumBoundary_424 ()
699 byte[] data = { 0xF8, 0x87, 0xBF, 0xBF, 0xBF };
700 string s = utf8.GetString (data);
701 // exception is "really" expected here
705 [ExpectedException (typeof (DecoderException))]
706 public void T4_Overlong_2_MaximumBoundary_425 ()
708 byte[] data = { 0xFC, 0x83, 0xBF, 0xBF, 0xBF, 0xBF };
709 string s = utf8.GetString (data);
710 // exception is "really" expected here
714 [ExpectedException (typeof (DecoderException))]
715 public void T4_Overlong_3_NUL_431 ()
717 byte[] data = { 0xC0, 0x80 };
718 string s = utf8.GetString (data);
719 // exception is "really" expected here
723 [ExpectedException (typeof (DecoderException))]
724 public void T4_Overlong_3_NUL_432 ()
726 byte[] data = { 0xE0, 0x80, 0x80 };
727 string s = utf8.GetString (data);
728 // exception is "really" expected here
732 [ExpectedException (typeof (DecoderException))]
733 public void T4_Overlong_3_NUL_433 ()
735 byte[] data = { 0xF0, 0x80, 0x80, 0x80 };
736 string s = utf8.GetString (data);
737 // exception is "really" expected here
741 [ExpectedException (typeof (DecoderException))]
742 public void T4_Overlong_3_NUL_434 ()
744 byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0x80 };
745 string s = utf8.GetString (data);
746 // exception is "really" expected here
750 [ExpectedException (typeof (DecoderException))]
751 public void T4_Overlong_3_NUL_435 ()
753 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0x80 };
754 string s = utf8.GetString (data);
755 // exception is "really" expected here
760 [ExpectedException (typeof (DecoderFallbackException))]
762 // MS Fx 1.1 accept this
763 [Category ("NotDotNet")]
764 [ExpectedException (typeof (DecoderException))]
766 public void T5_IllegalCodePosition_1_UTF16Surrogates_511 ()
768 byte[] data = { 0xED, 0xA0, 0x80 };
769 string s = utf8.GetString (data);
770 // exception is "really" expected here
771 Assert.AreEqual (55296, s [0], "MS FX 1.1 behaviour");
776 [ExpectedException (typeof (DecoderFallbackException))]
778 // MS Fx 1.1 accept this
779 [Category ("NotDotNet")]
780 [ExpectedException (typeof (DecoderException))]
782 public void T5_IllegalCodePosition_1_UTF16Surrogates_512 ()
784 byte[] data = { 0xED, 0xAD, 0xBF };
785 string s = utf8.GetString (data);
786 // exception is "really" expected here
787 Assert.AreEqual (56191, s [0], "MS FX 1.1 behaviour");
792 [ExpectedException (typeof (DecoderFallbackException))]
794 // MS Fx 1.1 accept this
795 [Category ("NotDotNet")]
796 [ExpectedException (typeof (DecoderException))]
798 public void T5_IllegalCodePosition_1_UTF16Surrogates_513 ()
800 byte[] data = { 0xED, 0xAE, 0x80 };
801 string s = utf8.GetString (data);
802 // exception is "really" expected here
803 Assert.AreEqual (56192, s [0], "MS FX 1.1 behaviour");
808 [ExpectedException (typeof (DecoderFallbackException))]
810 // MS Fx 1.1 accept this
811 [Category ("NotDotNet")]
812 [ExpectedException (typeof (DecoderException))]
814 public void T5_IllegalCodePosition_1_UTF16Surrogates_514 ()
816 byte[] data = { 0xED, 0xAF, 0xBF };
817 string s = utf8.GetString (data);
818 // exception is "really" expected here
819 Assert.AreEqual (56319, s [0], "MS FX 1.1 behaviour");
824 [ExpectedException (typeof (DecoderFallbackException))]
826 // MS Fx 1.1 accept this
827 [Category ("NotDotNet")]
828 [ExpectedException (typeof (DecoderException))]
830 public void T5_IllegalCodePosition_1_UTF16Surrogates_515 ()
832 byte[] data = { 0xED, 0xB0, 0x80 };
833 string s = utf8.GetString (data);
834 // exception is "really" expected here
835 Assert.AreEqual (56320, s [0], "MS FX 1.1 behaviour");
840 [ExpectedException (typeof (DecoderFallbackException))]
842 // MS Fx 1.1 accept this
843 [Category ("NotDotNet")]
844 [ExpectedException (typeof (DecoderException))]
846 public void T5_IllegalCodePosition_1_UTF16Surrogates_516 ()
848 byte[] data = { 0xED, 0xBE, 0x80 };
849 string s = utf8.GetString (data);
850 // exception is "really" expected here
851 Assert.AreEqual (57216, s [0], "MS FX 1.1 behaviour");
856 [ExpectedException (typeof (DecoderFallbackException))]
858 // MS Fx 1.1 accept this
859 [Category ("NotDotNet")]
860 [ExpectedException (typeof (DecoderException))]
862 public void T5_IllegalCodePosition_1_UTF16Surrogates_517 ()
864 byte[] data = { 0xED, 0xBF, 0xBF };
865 string s = utf8.GetString (data);
866 // exception is "really" expected here
867 Assert.AreEqual (57343, s [0], "MS FX 1.1 behaviour");
872 [ExpectedException (typeof (DecoderFallbackException))]
874 // MS Fx 1.1 accept this
875 [Category ("NotDotNet")]
876 [ExpectedException (typeof (DecoderException))]
878 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_521 ()
880 byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80 };
881 string s = utf8.GetString (data);
882 // exception is "really" expected here
883 Assert.AreEqual (55296, s [0], "MS FX 1.1 behaviour");
884 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
889 [ExpectedException (typeof (DecoderFallbackException))]
891 // MS Fx 1.1 accept this
892 [Category ("NotDotNet")]
893 [ExpectedException (typeof (DecoderException))]
895 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_522 ()
897 byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xBF, 0xBF };
898 string s = utf8.GetString (data);
899 // exception is "really" expected here
900 Assert.AreEqual (55296, s [0], "MS FX 1.1 behaviour");
901 Assert.AreEqual (57343, s [1], "MS FX 1.1 behaviour");
906 [ExpectedException (typeof (DecoderFallbackException))]
908 // MS Fx 1.1 accept this
909 [Category ("NotDotNet")]
910 [ExpectedException (typeof (DecoderException))]
912 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_523 ()
914 byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xB0, 0x80 };
915 string s = utf8.GetString (data);
916 // exception is "really" expected here
917 Assert.AreEqual (56191, s [0], "MS FX 1.1 behaviour");
918 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
923 [ExpectedException (typeof (DecoderFallbackException))]
925 // MS Fx 1.1 accept this
926 [Category ("NotDotNet")]
927 [ExpectedException (typeof (DecoderException))]
929 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_524 ()
931 byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xBF, 0xBF };
932 string s = utf8.GetString (data);
933 // exception is "really" expected here
934 Assert.AreEqual (56191, s [0], "MS FX 1.1 behaviour");
935 Assert.AreEqual (57343, s [1], "MS FX 1.1 behaviour");
940 [ExpectedException (typeof (DecoderFallbackException))]
942 // MS Fx 1.1 accept this
943 [Category ("NotDotNet")]
944 [ExpectedException (typeof (DecoderException))]
946 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_525 ()
948 byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xB0, 0x80 };
949 string s = utf8.GetString (data);
950 // exception is "really" expected here
951 Assert.AreEqual (56192, s [0], "MS FX 1.1 behaviour");
952 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
957 [ExpectedException (typeof (DecoderFallbackException))]
959 // MS Fx 1.1 accept this
960 [Category ("NotDotNet")]
961 [ExpectedException (typeof (DecoderException))]
963 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_526 ()
965 byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xBF, 0x8F };
966 string s = utf8.GetString (data);
967 // exception is "really" expected here
968 Assert.AreEqual (56192, s [0], "MS FX 1.1 behaviour");
969 Assert.AreEqual (57295, s [1], "MS FX 1.1 behaviour");
974 [ExpectedException (typeof (DecoderFallbackException))]
976 // MS Fx 1.1 accept this
977 [Category ("NotDotNet")]
978 [ExpectedException (typeof (DecoderException))]
980 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_527 ()
982 byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xB0, 0x80 };
983 string s = utf8.GetString (data);
984 // exception is "really" expected here
985 Assert.AreEqual (56319, s [0], "MS FX 1.1 behaviour");
986 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
991 [ExpectedException (typeof (DecoderFallbackException))]
993 // MS Fx 1.1 accept this
994 [Category ("NotDotNet")]
995 [ExpectedException (typeof (DecoderException))]
997 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_528 ()
999 byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xBF, 0xBF };
1000 string s = utf8.GetString (data);
1001 // exception is "really" expected here
1002 Assert.AreEqual (56319, s [0], "MS FX 1.1 behaviour");
1003 Assert.AreEqual (57343, s [1], "MS FX 1.1 behaviour");
1007 // MS Fx 1.1 accept this
1008 // [ExpectedException (typeof (DecoderException))]
1009 public void T5_IllegalCodePosition_3_Other_531 ()
1011 byte[] data = { 0xEF, 0xBF, 0xBE };
1012 string s = utf8.GetString (data);
1013 // exception is "really" expected here
1014 Assert.AreEqual (65534, s [0], "MS FX 1.1 behaviour");
1018 // MS Fx 1.1 accept this
1019 // [ExpectedException (typeof (DecoderException))]
1020 public void T5_IllegalCodePosition_3_Other_532 ()
1022 byte[] data = { 0xEF, 0xBF, 0xBF };
1023 string s = utf8.GetString (data);
1024 // exception is "really" expected here
1025 Assert.AreEqual (65535, s [0], "MS FX 1.1 behaviour");
1029 // bug #75065 and #73086.
1030 public void GetCharsFEFF ()
1032 byte [] data = new byte [] {0xEF, 0xBB, 0xBF};
1033 Encoding enc = new UTF8Encoding (false, true);
1034 string s = enc.GetString (data);
1035 Assert.AreEqual (s, "\uFEFF");
1037 Encoding utf = Encoding.UTF8;
1038 char[] testChars = {'\uFEFF','A'};
1040 byte[] bytes = utf.GetBytes(testChars);
1041 char[] chars = utf.GetChars(bytes);
1042 Assert.AreEqual ('\uFEFF', chars [0], "#1");
1043 Assert.AreEqual ('A', chars [1], "#2");
1048 public void CloneNotReadOnly ()
1050 Encoding e = Encoding.GetEncoding (65001).Clone ()
1052 Assert.AreEqual (false, e.IsReadOnly);
1053 e.EncoderFallback = new EncoderExceptionFallback ();
1059 [ExpectedException (typeof (DecoderFallbackException))]
1061 [ExpectedException (typeof (ArgumentException))]
1062 [Category ("NotDotNet")] // MS Bug
1064 public void Bug77315 ()
1066 new UTF8Encoding (false, true).GetString (
1067 new byte [] {0xED, 0xA2, 0x8C});
1071 public void SufficientByteArray ()
1073 Encoder e = Encoding.UTF8.GetEncoder ();
1074 byte [] bytes = new byte [0];
1076 char [] chars = new char [] {'\uD800'};
1077 e.GetBytes (chars, 0, 1, bytes, 0, false);
1079 int ret = e.GetBytes (chars, 1, 0, bytes, 0, true);
1081 Assert.AreEqual (0, ret, "drop insufficient char in 2.0: char[]");
1083 Assert.Fail ("ArgumentException is expected: char[]");
1085 } catch (ArgumentException ae) {
1091 string s = "\uD800";
1093 int ret = Encoding.UTF8.GetBytes (s, 0, 1, bytes, 0);
1095 Assert.AreEqual (0, ret, "drop insufficient char in 2.0: string");
1097 Assert.Fail ("ArgumentException is expected: string");
1099 } catch (ArgumentException ae) {
1106 [Test] // bug #565129
1107 public void SufficientByteArray2 ()
1109 var u = Encoding.UTF8;
1110 Assert.AreEqual (3, u.GetByteCount ("\uFFFD"), "#1-1");
1111 Assert.AreEqual (3, u.GetByteCount ("\uD800"), "#1-2");
1112 Assert.AreEqual (3, u.GetByteCount ("\uDC00"), "#1-3");
1113 Assert.AreEqual (4, u.GetByteCount ("\uD800\uDC00"), "#1-4");
1114 byte [] bytes = new byte [10];
1115 Assert.AreEqual (3, u.GetBytes ("\uDC00", 0, 1, bytes, 0), "#1-5"); // was bogus
1117 Assert.AreEqual (3, u.GetBytes ("\uFFFD").Length, "#2-1");
1118 Assert.AreEqual (3, u.GetBytes ("\uD800").Length, "#2-2");
1119 Assert.AreEqual (3, u.GetBytes ("\uDC00").Length, "#2-3");
1120 Assert.AreEqual (4, u.GetBytes ("\uD800\uDC00").Length, "#2-4");
1122 for (char c = char.MinValue; c < char.MaxValue; c++) {
1124 bIn = u.GetBytes (c.ToString ());
1128 new UTF8Encoding (false, true).GetBytes (new char [] {'\uDF45', '\uD808'}, 0, 2);
1129 Assert.Fail ("EncoderFallbackException is expected");
1130 } catch (EncoderFallbackException) {
1135 [Test] // bug #77550
1136 public void DecoderFallbackSimple ()
1138 UTF8Encoding e = new UTF8Encoding (false, false);
1139 AssertType.AreEqual (1, e.GetDecoder ().GetCharCount (
1140 new byte [] {(byte) 183}, 0, 1),
1142 AssertType.AreEqual (1, e.GetDecoder().GetChars (
1143 new byte [] {(byte) 183}, 0, 1,
1146 AssertType.AreEqual (1, e.GetString (new byte [] {(byte) 183}).Length,
1151 public void FallbackDefaultEncodingUTF8 ()
1153 DecoderReplacementFallbackBuffer b =
1154 Encoding.UTF8.DecoderFallback.CreateFallbackBuffer ()
1155 as DecoderReplacementFallbackBuffer;
1156 AssertType.IsTrue (b.Fallback (new byte [] {}, 0), "#1");
1157 AssertType.IsFalse (b.MovePrevious (), "#2");
1158 AssertType.AreEqual (1, b.Remaining, "#3");
1159 AssertType.AreEqual ('\uFFFD', b.GetNextChar (), "#4");
1163 [Category ("MobileNotWorking")]
1164 public void Bug415628 ()
1166 using (var f = File.Open ("Test/resources/415628.bin", FileMode.Open)) {
1167 BinaryReader br = new BinaryReader (f);
1168 byte [] buf = br.ReadBytes (8000);
1169 Encoding.UTF8.GetString(buf);
1175 [ExpectedException (typeof (ArgumentException))]
1176 public void Bug10788()
1178 byte[] bytes = new byte[4096];
1179 char[] chars = new char[10];
1181 Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 4096, chars, 9, false);
1185 public void Bug10789()
1187 byte[] bytes = new byte[4096];
1188 char[] chars = new char[10];
1191 Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 1, chars, 10, false);
1192 Assert.Fail ("ArgumentException is expected #1");
1193 } catch (ArgumentException) {
1197 Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 1, chars, 11, false);
1198 Assert.Fail ("ArgumentOutOfRangeException is expected #2");
1199 } catch (ArgumentOutOfRangeException) {
1202 int charactersWritten = Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 0, chars, 10, false);
1203 Assert.AreEqual (0, charactersWritten, "#3");