2 // UTF8EncodingTest.cs - NUnit Test Cases for System.Text.UTF8Encoding
5 // Patrick Kalkman kalkman@cistron.nl
6 // Sebastien Pouliot (spouliot@motus.com)
8 // (C) 2003 Patrick Kalkman
9 // (C) 2004 Novell (http://www.novell.com)
12 using NUnit.Framework;
18 using DecoderException = System.Text.DecoderFallbackException;
20 using DecoderException = System.ArgumentException;
23 using AssertType = NUnit.Framework.Assert;
25 namespace MonoTests.System.Text
28 public class UTF8EncodingTest
30 private UTF8Encoding utf8;
35 utf8 = new UTF8Encoding (true, true);
39 public void IsBrowserDisplay ()
41 Assert.IsTrue (utf8.IsBrowserDisplay);
45 public void IsBrowserSave ()
47 Assert.IsTrue (utf8.IsBrowserSave);
51 public void IsMailNewsDisplay ()
53 Assert.IsTrue (utf8.IsMailNewsDisplay);
57 public void IsMailNewsSave ()
59 Assert.IsTrue (utf8.IsMailNewsSave);
63 public void TestCompat ()
65 Assert.IsTrue (new UTF8Encoding ().Equals (new UTF8Encoding ()));
69 public void TestEncodingGetBytes1()
71 UTF8Encoding utf8Enc = new UTF8Encoding ();
72 string UniCode = "\u0041\u2262\u0391\u002E";
74 // "A<NOT IDENTICAL TO><ALPHA>." may be encoded as 41 E2 89 A2 CE 91 2E
76 byte[] utf8Bytes = utf8Enc.GetBytes (UniCode);
78 Assert.AreEqual (0x41, utf8Bytes [0], "UTF #1");
79 Assert.AreEqual (0xE2, utf8Bytes [1], "UTF #2");
80 Assert.AreEqual (0x89, utf8Bytes [2], "UTF #3");
81 Assert.AreEqual (0xA2, utf8Bytes [3], "UTF #4");
82 Assert.AreEqual (0xCE, utf8Bytes [4], "UTF #5");
83 Assert.AreEqual (0x91, utf8Bytes [5], "UTF #6");
84 Assert.AreEqual (0x2E, utf8Bytes [6], "UTF #7");
88 public void TestEncodingGetBytes2()
90 UTF8Encoding utf8Enc = new UTF8Encoding ();
91 string UniCode = "\u0048\u0069\u0020\u004D\u006F\u006D\u0020\u263A\u0021";
93 // "Hi Mom <WHITE SMILING FACE>!" may be encoded as 48 69 20 4D 6F 6D 20 E2 98 BA 21
95 byte[] utf8Bytes = new byte [11];
97 int ByteCnt = utf8Enc.GetBytes (UniCode.ToCharArray(), 0, UniCode.Length, utf8Bytes, 0);
98 Assert.AreEqual (11, ByteCnt, "UTF #1");
99 Assert.AreEqual (0x48, utf8Bytes [0], "UTF #2");
100 Assert.AreEqual (0x69, utf8Bytes [1], "UTF #3");
101 Assert.AreEqual (0x20, utf8Bytes [2], "UTF #4");
102 Assert.AreEqual (0x4D, utf8Bytes [3], "UTF #5");
103 Assert.AreEqual (0x6F, utf8Bytes [4], "UTF #6");
104 Assert.AreEqual (0x6D, utf8Bytes [5], "UTF #7");
105 Assert.AreEqual (0x20, utf8Bytes [6], "UTF #8");
106 Assert.AreEqual (0xE2, utf8Bytes [7], "UTF #9");
107 Assert.AreEqual (0x98, utf8Bytes [8], "UTF #10");
108 Assert.AreEqual (0xBA, utf8Bytes [9], "UTF #11");
109 Assert.AreEqual (0x21, utf8Bytes [10], "UTF #12");
113 public void TestDecodingGetChars1()
115 UTF8Encoding utf8Enc = new UTF8Encoding ();
116 // 41 E2 89 A2 CE 91 2E may be decoded as "A<NOT IDENTICAL TO><ALPHA>."
118 byte[] utf8Bytes = new byte [] {0x41, 0xE2, 0x89, 0xA2, 0xCE, 0x91, 0x2E};
119 char[] UniCodeChars = utf8Enc.GetChars(utf8Bytes);
121 Assert.AreEqual (0x0041, UniCodeChars [0], "UTF #1");
122 Assert.AreEqual (0x2262, UniCodeChars [1], "UTF #2");
123 Assert.AreEqual (0x0391, UniCodeChars [2], "UTF #3");
124 Assert.AreEqual (0x002E, UniCodeChars [3], "UTF #4");
128 public void TestMaxCharCount()
130 UTF8Encoding UTF8enc = new UTF8Encoding ();
131 Encoding UTF8encWithBOM = new UTF8Encoding(true);
132 Assert.AreEqual (51, UTF8enc.GetMaxCharCount(50), "UTF #1");
133 Assert.AreEqual (UTF8enc.GetMaxByteCount(50), UTF8encWithBOM.GetMaxByteCount(50), "UTF #2");
137 public void TestMaxCharCountWithCustomFallback()
139 Encoding encoding = Encoding.GetEncoding("utf-8", new EncoderReplacementFallback("\u2047\u2047"), new DecoderReplacementFallback("\u2047\u2047"));
140 Assert.AreEqual (102, encoding.GetMaxCharCount(50), "UTF #1");
144 public void TestMaxByteCount()
146 UTF8Encoding UTF8enc = new UTF8Encoding ();
147 Encoding UTF8encWithBOM = new UTF8Encoding(true);
149 Assert.AreEqual (153, UTF8enc.GetMaxByteCount(50), "UTF #1");
150 Assert.AreEqual (UTF8enc.GetMaxByteCount(50), UTF8encWithBOM.GetMaxByteCount(50), "UTF #2");
154 public void TestMaxByteCountWithCustomFallback()
156 Encoding encoding = Encoding.GetEncoding("utf-8", new EncoderReplacementFallback("\u2047\u2047"), new DecoderReplacementFallback("?"));
157 Assert.AreEqual (306, encoding.GetMaxByteCount(50), "UTF #1");
160 // regression for bug #59648
162 public void TestThrowOnInvalid ()
164 UTF8Encoding u = new UTF8Encoding (true, false);
166 byte[] data = new byte [] { 0xC0, 0xAF };
168 Assert.AreEqual (2, u.GetCharCount (data), "#A0");
169 string s = u.GetString (data);
170 Assert.AreEqual ("\uFFFD\uFFFD", s, "#A1");
172 Assert.AreEqual (0, u.GetCharCount (data), "#A0");
173 string s = u.GetString (data);
174 Assert.AreEqual (String.Empty, s, "#A1");
177 data = new byte [] { 0x30, 0x31, 0xC0, 0xAF, 0x30, 0x32 };
178 s = u.GetString (data);
180 Assert.AreEqual (6, s.Length, "#B1");
181 Assert.AreEqual (0x30, (int) s [0], "#B2");
182 Assert.AreEqual (0x31, (int) s [1], "#B3");
183 Assert.AreEqual (0xFFFD, (int) s [2], "#B4");
184 Assert.AreEqual (0xFFFD, (int) s [3], "#B5");
185 Assert.AreEqual (0x30, (int) s [4], "#B6");
186 Assert.AreEqual (0x32, (int) s [5], "#B7");
188 Assert.AreEqual (4, s.Length, "#B1");
189 Assert.AreEqual (0x30, (int) s [0], "#B2");
190 Assert.AreEqual (0x31, (int) s [1], "#B3");
191 Assert.AreEqual (0x30, (int) s [2], "#B4");
192 Assert.AreEqual (0x32, (int) s [3], "#B5");
196 // UTF8 decoding tests from http://www.cl.cam.ac.uk/~mgk25/
199 public void T1_Correct_GreekWord_kosme ()
201 byte[] data = { 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5 };
202 string s = utf8.GetString (data);
203 // cute but saving source code in unicode can be problematic
204 // so we just ensure we can re-encode this
205 Assert.AreEqual (BitConverter.ToString (data), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted");
209 public void T2_Boundary_1_FirstPossibleSequence_Pass ()
211 byte[] data211 = { 0x00 };
212 string s = utf8.GetString (data211);
213 Assert.AreEqual ("\0", s, "1 byte (U-00000000)");
214 Assert.AreEqual (BitConverter.ToString (data211), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-1");
216 byte[] data212 = { 0xC2, 0x80 };
217 s = utf8.GetString (data212);
218 Assert.AreEqual (128, s [0], "2 bytes (U-00000080)");
219 Assert.AreEqual (BitConverter.ToString (data212), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-2");
221 byte[] data213 = { 0xE0, 0xA0, 0x80 };
222 s = utf8.GetString (data213);
223 Assert.AreEqual (2048, s [0], "3 bytes (U-00000800)");
224 Assert.AreEqual (BitConverter.ToString (data213), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-3");
226 byte[] data214 = { 0xF0, 0x90, 0x80, 0x80 };
227 s = utf8.GetString (data214);
228 Assert.AreEqual (55296, s [0], "4 bytes (U-00010000)-0");
229 Assert.AreEqual (56320, s [1], "4 bytes (U-00010000)-1");
230 Assert.AreEqual (BitConverter.ToString (data214), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-4");
235 [ExpectedException (typeof (DecoderException))]
236 public void T2_Boundary_1_FirstPossibleSequence_Fail_5 ()
238 byte[] data215 = { 0xF8, 0x88, 0x80, 0x80, 0x80 };
239 string s = utf8.GetString (data215);
240 Assert.IsNull (s, "5 bytes (U-00200000)");
241 Assert.AreEqual (BitConverter.ToString (data215), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-5");
246 [ExpectedException (typeof (DecoderException))]
247 public void T2_Boundary_1_FirstPossibleSequence_Fail_6 ()
249 byte[] data216 = { 0xFC, 0x84, 0x80, 0x80, 0x80, 0x80 };
250 string s = utf8.GetString (data216);
251 Assert.IsNull (s, "6 bytes (U-04000000)");
252 Assert.AreEqual (BitConverter.ToString (data216), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-6");
256 public void T2_Boundary_2_LastPossibleSequence_Pass ()
258 byte[] data221 = { 0x7F };
259 string s = utf8.GetString (data221);
260 Assert.AreEqual (127, s [0], "1 byte (U-0000007F)");
261 Assert.AreEqual (BitConverter.ToString (data221), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-1");
263 byte[] data222 = { 0xDF, 0xBF };
264 s = utf8.GetString (data222);
265 Assert.AreEqual (2047, s [0], "2 bytes (U-000007FF)");
266 Assert.AreEqual (BitConverter.ToString (data222), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-2");
268 byte[] data223 = { 0xEF, 0xBF, 0xBF };
269 s = utf8.GetString (data223);
270 Assert.AreEqual (65535, s [0], "3 bytes (U-0000FFFF)");
271 Assert.AreEqual (BitConverter.ToString (data223), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-3");
277 [ExpectedException (typeof (DecoderException))]
278 public void T2_Boundary_2_LastPossibleSequence_Fail_4 ()
280 byte[] data224 = { 0x7F, 0xBF, 0xBF, 0xBF };
281 string s = utf8.GetString (data224);
282 Assert.IsNull (s, "4 bytes (U-001FFFFF)");
283 Assert.AreEqual (BitConverter.ToString (data224), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-4");
288 [ExpectedException (typeof (DecoderException))]
289 public void T2_Boundary_2_LastPossibleSequence_Fail_5 ()
291 byte[] data225 = { 0xFB, 0xBF, 0xBF, 0xBF, 0xBF };
292 string s = utf8.GetString (data225);
293 Assert.IsNull (s, "5 bytes (U-03FFFFFF)");
294 Assert.AreEqual (BitConverter.ToString (data225), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-5");
299 [ExpectedException (typeof (DecoderException))]
300 public void T2_Boundary_2_LastPossibleSequence_Fail_6 ()
302 byte[] data226 = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF, 0xBF };
303 string s = utf8.GetString (data226);
304 Assert.IsNull (s, "6 bytes (U-7FFFFFFF)");
305 Assert.AreEqual (BitConverter.ToString (data226), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-6");
309 public void T2_Boundary_3_Other_Pass ()
311 byte[] data231 = { 0xED, 0x9F, 0xBF };
312 string s = utf8.GetString (data231);
313 Assert.AreEqual (55295, s [0], "U-0000D7FF");
314 Assert.AreEqual (BitConverter.ToString (data231), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-1");
316 byte[] data232 = { 0xEE, 0x80, 0x80 };
317 s = utf8.GetString (data232);
318 Assert.AreEqual (57344, s [0], "U-0000E000");
319 Assert.AreEqual (BitConverter.ToString (data232), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-2");
321 byte[] data233 = { 0xEF, 0xBF, 0xBD };
322 s = utf8.GetString (data233);
323 Assert.AreEqual (65533, s [0], "U-0000FFFD");
324 Assert.AreEqual (BitConverter.ToString (data233), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-3");
326 byte[] data234 = { 0xF4, 0x8F, 0xBF, 0xBF };
327 s = utf8.GetString (data234);
328 Assert.AreEqual (56319, s [0], "U-0010FFFF-0");
329 Assert.AreEqual (57343, s [1], "U-0010FFFF-1");
330 Assert.AreEqual (BitConverter.ToString (data234), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-4");
335 [ExpectedException (typeof (DecoderException))]
336 public void T2_Boundary_3_Other_Fail_5 ()
338 byte[] data235 = { 0xF4, 0x90, 0x80, 0x80 };
339 string s = utf8.GetString (data235);
340 Assert.IsNull (s, "U-00110000");
341 Assert.AreEqual (BitConverter.ToString (data235), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-5");
345 [ExpectedException (typeof (DecoderException))]
346 public void T3_Malformed_1_UnexpectedContinuation_311 ()
348 byte[] data = { 0x80 };
349 string s = utf8.GetString (data);
350 // exception is "really" expected here
354 [ExpectedException (typeof (DecoderException))]
355 public void T3_Malformed_1_UnexpectedContinuation_312 ()
357 byte[] data = { 0xBF };
358 string s = utf8.GetString (data);
359 // exception is "really" expected here
363 [ExpectedException (typeof (DecoderException))]
364 public void T3_Malformed_1_UnexpectedContinuation_313 ()
366 byte[] data = { 0x80, 0xBF };
367 string s = utf8.GetString (data);
368 // exception is "really" expected here
372 [ExpectedException (typeof (DecoderException))]
373 public void T3_Malformed_1_UnexpectedContinuation_314 ()
375 byte[] data = { 0x80, 0xBF, 0x80 };
376 string s = utf8.GetString (data);
377 // exception is "really" expected here
381 [ExpectedException (typeof (DecoderException))]
382 public void T3_Malformed_1_UnexpectedContinuation_315 ()
384 byte[] data = { 0x80, 0xBF, 0x80, 0xBF };
385 string s = utf8.GetString (data);
386 // exception is "really" expected here
390 [ExpectedException (typeof (DecoderException))]
391 public void T3_Malformed_1_UnexpectedContinuation_316 ()
393 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80 };
394 string s = utf8.GetString (data);
395 // exception is "really" expected here
399 [ExpectedException (typeof (DecoderException))]
400 public void T3_Malformed_1_UnexpectedContinuation_317 ()
402 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF };
403 string s = utf8.GetString (data);
404 // exception is "really" expected here
408 [ExpectedException (typeof (DecoderException))]
409 public void T3_Malformed_1_UnexpectedContinuation_318 ()
411 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 };
412 string s = utf8.GetString (data);
413 // exception is "really" expected here
417 [ExpectedException (typeof (DecoderException))]
418 public void T3_Malformed_1_UnexpectedContinuation_319 ()
420 // 64 different continuation characters
422 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
423 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
424 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
425 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF };
426 string s = utf8.GetString (data);
427 // exception is "really" expected here
431 [ExpectedException (typeof (DecoderException))]
432 public void T3_Malformed_2_LonelyStart_321 ()
435 0xC0, 0x20, 0xC1, 0x20, 0xC2, 0x20, 0xC3, 0x20, 0xC4, 0x20, 0xC5, 0x20, 0xC6, 0x20, 0xC7, 0x20,
436 0xC8, 0x20, 0xC9, 0x20, 0xCA, 0x20, 0xCB, 0x20, 0xCC, 0x20, 0xCD, 0x20, 0xCE, 0x20, 0xCF, 0x20,
437 0xD0, 0x20, 0xD1, 0x20, 0xD2, 0x20, 0xD3, 0x20, 0xD4, 0x20, 0xD5, 0x20, 0xD6, 0x20, 0xD7, 0x20,
438 0xD8, 0x20, 0xD9, 0x20, 0xDA, 0x20, 0xDB, 0x20, 0xDC, 0x20, 0xDD, 0x20, 0xDE, 0x20, 0xDF, 0x20 };
439 string s = utf8.GetString (data);
440 // exception is "really" expected here
444 [ExpectedException (typeof (DecoderException))]
445 public void T3_Malformed_2_LonelyStart_322 ()
448 0xE0, 0x20, 0xE1, 0x20, 0xE2, 0x20, 0xE3, 0x20, 0xE4, 0x20, 0xE5, 0x20, 0xE6, 0x20, 0xE7, 0x20,
449 0xE8, 0x20, 0xE9, 0x20, 0xEA, 0x20, 0xEB, 0x20, 0xEC, 0x20, 0xED, 0x20, 0xEE, 0x20, 0xEF, 0x20 };
450 string s = utf8.GetString (data);
451 // exception is "really" expected here
455 [ExpectedException (typeof (DecoderException))]
456 public void T3_Malformed_2_LonelyStart_323 ()
458 byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
459 string s = utf8.GetString (data);
460 // exception is "really" expected here
464 [ExpectedException (typeof (DecoderException))]
465 public void T3_Malformed_2_LonelyStart_324 ()
467 byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
468 string s = utf8.GetString (data);
469 // exception is "really" expected here
473 [ExpectedException (typeof (DecoderException))]
474 public void T3_Malformed_2_LonelyStart_325 ()
476 byte[] data = { 0xFC, 0x20, 0xFD, 0x20 };
477 string s = utf8.GetString (data);
478 // exception is "really" expected here
482 [ExpectedException (typeof (DecoderException))]
483 public void T3_Malformed_3_LastContinuationMissing_331 ()
485 byte[] data = { 0xC0 };
486 string s = utf8.GetString (data);
487 // exception is "really" expected here
491 [ExpectedException (typeof (DecoderException))]
492 public void T3_Malformed_3_LastContinuationMissing_332 ()
494 byte[] data = { 0xE0, 0x80 };
495 string s = utf8.GetString (data);
496 // exception is "really" expected here
500 [ExpectedException (typeof (DecoderException))]
501 public void T3_Malformed_3_LastContinuationMissing_333 ()
503 byte[] data = { 0xF0, 0x80, 0x80 };
504 string s = utf8.GetString (data);
505 // exception is "really" expected here
509 [ExpectedException (typeof (DecoderException))]
510 public void T3_Malformed_3_LastContinuationMissing_334 ()
512 byte[] data = { 0xF8, 0x80, 0x80, 0x80 };
513 string s = utf8.GetString (data);
514 // exception is "really" expected here
518 [ExpectedException (typeof (DecoderException))]
519 public void T3_Malformed_3_LastContinuationMissing_335 ()
521 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80 };
522 string s = utf8.GetString (data);
523 // exception is "really" expected here
527 // MS Fx 1.1 accept this
528 // [ExpectedException (typeof (DecoderException))]
529 public void T3_Malformed_3_LastContinuationMissing_336 ()
531 byte[] data = { 0xDF };
533 string s = utf8.GetString (data);
534 // exception is "really" expected here
535 Assert.AreEqual (String.Empty, s, "MS FX 1.1 behaviour");
537 catch (DecoderException) {
538 // but Mono doesn't - better stick to the standard
543 // MS Fx 1.1 accept this
544 // [ExpectedException (typeof (DecoderException))]
545 public void T3_Malformed_3_LastContinuationMissing_337 ()
547 byte[] data = { 0xEF, 0xBF };
549 string s = utf8.GetString (data);
550 // exception is "really" expected here
551 Assert.AreEqual (String.Empty, s, "MS FX 1.1 behaviour");
553 catch (DecoderException) {
554 // but Mono doesn't - better stick to the standard
559 [ExpectedException (typeof (DecoderException))]
560 public void T3_Malformed_3_LastContinuationMissing_338 ()
562 byte[] data = { 0xF7, 0xBF, 0xBF };
563 string s = utf8.GetString (data);
564 // exception is "really" expected here
568 [ExpectedException (typeof (DecoderException))]
569 public void T3_Malformed_3_LastContinuationMissing_339 ()
571 byte[] data = { 0xF, 0xBF, 0xBF, 0xBF };
572 string s = utf8.GetString (data);
573 // exception is "really" expected here
577 [ExpectedException (typeof (DecoderException))]
578 public void T3_Malformed_3_LastContinuationMissing_3310 ()
580 byte[] data = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
581 string s = utf8.GetString (data);
582 // exception is "really" expected here
586 [ExpectedException (typeof (DecoderException))]
587 public void T3_Malformed_4_ConcatenationImcomplete ()
590 0xC0, 0xE0, 0x80, 0xF0, 0x80, 0x80, 0xF8, 0x80, 0x80, 0x80, 0xFC, 0x80, 0x80, 0x80, 0x80, 0xDF,
591 0xEF, 0xBF, 0xF7, 0xBF, 0xBF, 0xFB, 0xBF, 0xBF, 0xBF, 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
592 string s = utf8.GetString (data);
593 // exception is "really" expected here
597 [ExpectedException (typeof (DecoderException))]
598 public void T3_Malformed_5_ImpossibleBytes_351 ()
600 byte[] data = { 0xFE };
601 string s = utf8.GetString (data);
602 // exception is "really" expected here
606 [ExpectedException (typeof (DecoderException))]
607 public void T3_Malformed_5_ImpossibleBytes_352 ()
609 byte[] data = { 0xFF };
610 string s = utf8.GetString (data);
611 // exception is "really" expected here
615 [ExpectedException (typeof (DecoderException))]
616 public void T3_Malformed_5_ImpossibleBytes_353 ()
618 byte[] data = { 0xFE, 0xFE, 0xFF, 0xFF };
619 string s = utf8.GetString (data);
620 // exception is "really" expected here
623 // Overlong == dangereous -> "safe" decoder should reject them
626 [ExpectedException (typeof (DecoderException))]
627 public void T4_Overlong_1_ASCII_Slash_411 ()
629 byte[] data = { 0xC0, 0xAF };
630 string s = utf8.GetString (data);
631 // exception is "really" expected here
635 [ExpectedException (typeof (DecoderException))]
636 public void T4_Overlong_1_ASCII_Slash_412 ()
638 byte[] data = { 0xE0, 0x80, 0xAF };
639 string s = utf8.GetString (data);
640 // exception is "really" expected here
644 [ExpectedException (typeof (DecoderException))]
645 public void T4_Overlong_1_ASCII_Slash_413 ()
647 byte[] data = { 0xF0, 0x80, 0x80, 0xAF };
648 string s = utf8.GetString (data);
649 // exception is "really" expected here
653 [ExpectedException (typeof (DecoderException))]
654 public void T4_Overlong_1_ASCII_Slash_414 ()
656 byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0xAF };
657 string s = utf8.GetString (data);
658 // exception is "really" expected here
662 [ExpectedException (typeof (DecoderException))]
663 public void T4_Overlong_1_ASCII_Slash_415 ()
665 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0xAF };
666 string s = utf8.GetString (data);
667 // exception is "really" expected here
671 [ExpectedException (typeof (DecoderException))]
672 public void T4_Overlong_2_MaximumBoundary_421 ()
674 byte[] data = { 0xC1, 0xBF };
675 string s = utf8.GetString (data);
676 // exception is "really" expected here
680 [ExpectedException (typeof (DecoderException))]
681 public void T4_Overlong_2_MaximumBoundary_422 ()
683 byte[] data = { 0xE0, 0x9F, 0xBF };
684 string s = utf8.GetString (data);
685 // exception is "really" expected here
689 [ExpectedException (typeof (DecoderException))]
690 public void T4_Overlong_2_MaximumBoundary_423 ()
692 byte[] data = { 0xF0, 0x8F, 0xBF, 0xBF };
693 string s = utf8.GetString (data);
694 // exception is "really" expected here
698 [ExpectedException (typeof (DecoderException))]
699 public void T4_Overlong_2_MaximumBoundary_424 ()
701 byte[] data = { 0xF8, 0x87, 0xBF, 0xBF, 0xBF };
702 string s = utf8.GetString (data);
703 // exception is "really" expected here
707 [ExpectedException (typeof (DecoderException))]
708 public void T4_Overlong_2_MaximumBoundary_425 ()
710 byte[] data = { 0xFC, 0x83, 0xBF, 0xBF, 0xBF, 0xBF };
711 string s = utf8.GetString (data);
712 // exception is "really" expected here
716 [ExpectedException (typeof (DecoderException))]
717 public void T4_Overlong_3_NUL_431 ()
719 byte[] data = { 0xC0, 0x80 };
720 string s = utf8.GetString (data);
721 // exception is "really" expected here
725 [ExpectedException (typeof (DecoderException))]
726 public void T4_Overlong_3_NUL_432 ()
728 byte[] data = { 0xE0, 0x80, 0x80 };
729 string s = utf8.GetString (data);
730 // exception is "really" expected here
734 [ExpectedException (typeof (DecoderException))]
735 public void T4_Overlong_3_NUL_433 ()
737 byte[] data = { 0xF0, 0x80, 0x80, 0x80 };
738 string s = utf8.GetString (data);
739 // exception is "really" expected here
743 [ExpectedException (typeof (DecoderException))]
744 public void T4_Overlong_3_NUL_434 ()
746 byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0x80 };
747 string s = utf8.GetString (data);
748 // exception is "really" expected here
752 [ExpectedException (typeof (DecoderException))]
753 public void T4_Overlong_3_NUL_435 ()
755 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0x80 };
756 string s = utf8.GetString (data);
757 // exception is "really" expected here
762 [ExpectedException (typeof (DecoderFallbackException))]
764 // MS Fx 1.1 accept this
765 [Category ("NotDotNet")]
766 [ExpectedException (typeof (DecoderException))]
768 public void T5_IllegalCodePosition_1_UTF16Surrogates_511 ()
770 byte[] data = { 0xED, 0xA0, 0x80 };
771 string s = utf8.GetString (data);
772 // exception is "really" expected here
773 Assert.AreEqual (55296, s [0], "MS FX 1.1 behaviour");
778 [ExpectedException (typeof (DecoderFallbackException))]
780 // MS Fx 1.1 accept this
781 [Category ("NotDotNet")]
782 [ExpectedException (typeof (DecoderException))]
784 public void T5_IllegalCodePosition_1_UTF16Surrogates_512 ()
786 byte[] data = { 0xED, 0xAD, 0xBF };
787 string s = utf8.GetString (data);
788 // exception is "really" expected here
789 Assert.AreEqual (56191, s [0], "MS FX 1.1 behaviour");
794 [ExpectedException (typeof (DecoderFallbackException))]
796 // MS Fx 1.1 accept this
797 [Category ("NotDotNet")]
798 [ExpectedException (typeof (DecoderException))]
800 public void T5_IllegalCodePosition_1_UTF16Surrogates_513 ()
802 byte[] data = { 0xED, 0xAE, 0x80 };
803 string s = utf8.GetString (data);
804 // exception is "really" expected here
805 Assert.AreEqual (56192, s [0], "MS FX 1.1 behaviour");
810 [ExpectedException (typeof (DecoderFallbackException))]
812 // MS Fx 1.1 accept this
813 [Category ("NotDotNet")]
814 [ExpectedException (typeof (DecoderException))]
816 public void T5_IllegalCodePosition_1_UTF16Surrogates_514 ()
818 byte[] data = { 0xED, 0xAF, 0xBF };
819 string s = utf8.GetString (data);
820 // exception is "really" expected here
821 Assert.AreEqual (56319, s [0], "MS FX 1.1 behaviour");
826 [ExpectedException (typeof (DecoderFallbackException))]
828 // MS Fx 1.1 accept this
829 [Category ("NotDotNet")]
830 [ExpectedException (typeof (DecoderException))]
832 public void T5_IllegalCodePosition_1_UTF16Surrogates_515 ()
834 byte[] data = { 0xED, 0xB0, 0x80 };
835 string s = utf8.GetString (data);
836 // exception is "really" expected here
837 Assert.AreEqual (56320, s [0], "MS FX 1.1 behaviour");
842 [ExpectedException (typeof (DecoderFallbackException))]
844 // MS Fx 1.1 accept this
845 [Category ("NotDotNet")]
846 [ExpectedException (typeof (DecoderException))]
848 public void T5_IllegalCodePosition_1_UTF16Surrogates_516 ()
850 byte[] data = { 0xED, 0xBE, 0x80 };
851 string s = utf8.GetString (data);
852 // exception is "really" expected here
853 Assert.AreEqual (57216, s [0], "MS FX 1.1 behaviour");
858 [ExpectedException (typeof (DecoderFallbackException))]
860 // MS Fx 1.1 accept this
861 [Category ("NotDotNet")]
862 [ExpectedException (typeof (DecoderException))]
864 public void T5_IllegalCodePosition_1_UTF16Surrogates_517 ()
866 byte[] data = { 0xED, 0xBF, 0xBF };
867 string s = utf8.GetString (data);
868 // exception is "really" expected here
869 Assert.AreEqual (57343, s [0], "MS FX 1.1 behaviour");
874 [ExpectedException (typeof (DecoderFallbackException))]
876 // MS Fx 1.1 accept this
877 [Category ("NotDotNet")]
878 [ExpectedException (typeof (DecoderException))]
880 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_521 ()
882 byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80 };
883 string s = utf8.GetString (data);
884 // exception is "really" expected here
885 Assert.AreEqual (55296, s [0], "MS FX 1.1 behaviour");
886 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
891 [ExpectedException (typeof (DecoderFallbackException))]
893 // MS Fx 1.1 accept this
894 [Category ("NotDotNet")]
895 [ExpectedException (typeof (DecoderException))]
897 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_522 ()
899 byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xBF, 0xBF };
900 string s = utf8.GetString (data);
901 // exception is "really" expected here
902 Assert.AreEqual (55296, s [0], "MS FX 1.1 behaviour");
903 Assert.AreEqual (57343, s [1], "MS FX 1.1 behaviour");
908 [ExpectedException (typeof (DecoderFallbackException))]
910 // MS Fx 1.1 accept this
911 [Category ("NotDotNet")]
912 [ExpectedException (typeof (DecoderException))]
914 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_523 ()
916 byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xB0, 0x80 };
917 string s = utf8.GetString (data);
918 // exception is "really" expected here
919 Assert.AreEqual (56191, s [0], "MS FX 1.1 behaviour");
920 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
925 [ExpectedException (typeof (DecoderFallbackException))]
927 // MS Fx 1.1 accept this
928 [Category ("NotDotNet")]
929 [ExpectedException (typeof (DecoderException))]
931 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_524 ()
933 byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xBF, 0xBF };
934 string s = utf8.GetString (data);
935 // exception is "really" expected here
936 Assert.AreEqual (56191, s [0], "MS FX 1.1 behaviour");
937 Assert.AreEqual (57343, s [1], "MS FX 1.1 behaviour");
942 [ExpectedException (typeof (DecoderFallbackException))]
944 // MS Fx 1.1 accept this
945 [Category ("NotDotNet")]
946 [ExpectedException (typeof (DecoderException))]
948 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_525 ()
950 byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xB0, 0x80 };
951 string s = utf8.GetString (data);
952 // exception is "really" expected here
953 Assert.AreEqual (56192, s [0], "MS FX 1.1 behaviour");
954 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
959 [ExpectedException (typeof (DecoderFallbackException))]
961 // MS Fx 1.1 accept this
962 [Category ("NotDotNet")]
963 [ExpectedException (typeof (DecoderException))]
965 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_526 ()
967 byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xBF, 0x8F };
968 string s = utf8.GetString (data);
969 // exception is "really" expected here
970 Assert.AreEqual (56192, s [0], "MS FX 1.1 behaviour");
971 Assert.AreEqual (57295, s [1], "MS FX 1.1 behaviour");
976 [ExpectedException (typeof (DecoderFallbackException))]
978 // MS Fx 1.1 accept this
979 [Category ("NotDotNet")]
980 [ExpectedException (typeof (DecoderException))]
982 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_527 ()
984 byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xB0, 0x80 };
985 string s = utf8.GetString (data);
986 // exception is "really" expected here
987 Assert.AreEqual (56319, s [0], "MS FX 1.1 behaviour");
988 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
993 [ExpectedException (typeof (DecoderFallbackException))]
995 // MS Fx 1.1 accept this
996 [Category ("NotDotNet")]
997 [ExpectedException (typeof (DecoderException))]
999 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_528 ()
1001 byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xBF, 0xBF };
1002 string s = utf8.GetString (data);
1003 // exception is "really" expected here
1004 Assert.AreEqual (56319, s [0], "MS FX 1.1 behaviour");
1005 Assert.AreEqual (57343, s [1], "MS FX 1.1 behaviour");
1009 // MS Fx 1.1 accept this
1010 // [ExpectedException (typeof (DecoderException))]
1011 public void T5_IllegalCodePosition_3_Other_531 ()
1013 byte[] data = { 0xEF, 0xBF, 0xBE };
1014 string s = utf8.GetString (data);
1015 // exception is "really" expected here
1016 Assert.AreEqual (65534, s [0], "MS FX 1.1 behaviour");
1020 // MS Fx 1.1 accept this
1021 // [ExpectedException (typeof (DecoderException))]
1022 public void T5_IllegalCodePosition_3_Other_532 ()
1024 byte[] data = { 0xEF, 0xBF, 0xBF };
1025 string s = utf8.GetString (data);
1026 // exception is "really" expected here
1027 Assert.AreEqual (65535, s [0], "MS FX 1.1 behaviour");
1031 // bug #75065 and #73086.
1032 public void GetCharsFEFF ()
1034 byte [] data = new byte [] {0xEF, 0xBB, 0xBF};
1035 Encoding enc = new UTF8Encoding (false, true);
1036 string s = enc.GetString (data);
1037 Assert.AreEqual (s, "\uFEFF");
1039 Encoding utf = Encoding.UTF8;
1040 char[] testChars = {'\uFEFF','A'};
1042 byte[] bytes = utf.GetBytes(testChars);
1043 char[] chars = utf.GetChars(bytes);
1044 Assert.AreEqual ('\uFEFF', chars [0], "#1");
1045 Assert.AreEqual ('A', chars [1], "#2");
1050 public void CloneNotReadOnly ()
1052 Encoding e = Encoding.GetEncoding (65001).Clone ()
1054 Assert.AreEqual (false, e.IsReadOnly);
1055 e.EncoderFallback = new EncoderExceptionFallback ();
1061 [ExpectedException (typeof (DecoderFallbackException))]
1063 [ExpectedException (typeof (ArgumentException))]
1064 [Category ("NotDotNet")] // MS Bug
1066 public void Bug77315 ()
1068 new UTF8Encoding (false, true).GetString (
1069 new byte [] {0xED, 0xA2, 0x8C});
1073 public void SufficientByteArray ()
1075 Encoder e = Encoding.UTF8.GetEncoder ();
1076 byte [] bytes = new byte [0];
1078 char [] chars = new char [] {'\uD800'};
1079 e.GetBytes (chars, 0, 1, bytes, 0, false);
1081 int ret = e.GetBytes (chars, 1, 0, bytes, 0, true);
1083 Assert.AreEqual (0, ret, "drop insufficient char in 2.0: char[]");
1085 Assert.Fail ("ArgumentException is expected: char[]");
1087 } catch (ArgumentException) {
1090 string s = "\uD800";
1092 int ret = Encoding.UTF8.GetBytes (s, 0, 1, bytes, 0);
1094 Assert.AreEqual (0, ret, "drop insufficient char in 2.0: string");
1096 Assert.Fail ("ArgumentException is expected: string");
1098 } catch (ArgumentException) {
1102 [Test] // bug #565129
1103 public void SufficientByteArray2 ()
1105 var u = Encoding.UTF8;
1106 Assert.AreEqual (3, u.GetByteCount ("\uFFFD"), "#1-1");
1107 Assert.AreEqual (3, u.GetByteCount ("\uD800"), "#1-2");
1108 Assert.AreEqual (3, u.GetByteCount ("\uDC00"), "#1-3");
1109 Assert.AreEqual (4, u.GetByteCount ("\uD800\uDC00"), "#1-4");
1110 byte [] bytes = new byte [10];
1111 Assert.AreEqual (3, u.GetBytes ("\uDC00", 0, 1, bytes, 0), "#1-5"); // was bogus
1113 Assert.AreEqual (3, u.GetBytes ("\uFFFD").Length, "#2-1");
1114 Assert.AreEqual (3, u.GetBytes ("\uD800").Length, "#2-2");
1115 Assert.AreEqual (3, u.GetBytes ("\uDC00").Length, "#2-3");
1116 Assert.AreEqual (4, u.GetBytes ("\uD800\uDC00").Length, "#2-4");
1118 for (char c = char.MinValue; c < char.MaxValue; c++) {
1120 bIn = u.GetBytes (c.ToString ());
1124 new UTF8Encoding (false, true).GetBytes (new char [] {'\uDF45', '\uD808'}, 0, 2);
1125 Assert.Fail ("EncoderFallbackException is expected");
1126 } catch (EncoderFallbackException) {
1131 [Test] // bug #77550
1132 public void DecoderFallbackSimple ()
1134 UTF8Encoding e = new UTF8Encoding (false, false);
1135 AssertType.AreEqual (1, e.GetDecoder ().GetCharCount (
1136 new byte [] {(byte) 183}, 0, 1),
1138 AssertType.AreEqual (1, e.GetDecoder().GetChars (
1139 new byte [] {(byte) 183}, 0, 1,
1142 AssertType.AreEqual (1, e.GetString (new byte [] {(byte) 183}).Length,
1147 public void FallbackDefaultEncodingUTF8 ()
1149 DecoderReplacementFallbackBuffer b =
1150 Encoding.UTF8.DecoderFallback.CreateFallbackBuffer ()
1151 as DecoderReplacementFallbackBuffer;
1152 AssertType.IsTrue (b.Fallback (new byte [] {}, 0), "#1");
1153 AssertType.IsFalse (b.MovePrevious (), "#2");
1154 AssertType.AreEqual (1, b.Remaining, "#3");
1155 AssertType.AreEqual ('\uFFFD', b.GetNextChar (), "#4");
1159 [Category ("MobileNotWorking")]
1160 public void Bug415628 ()
1162 using (var f = File.Open ("Test/resources/415628.bin", FileMode.Open)) {
1163 BinaryReader br = new BinaryReader (f);
1164 byte [] buf = br.ReadBytes (8000);
1165 Encoding.UTF8.GetString(buf);
1171 [ExpectedException (typeof (ArgumentException))]
1172 public void Bug10788()
1174 byte[] bytes = new byte[4096];
1175 char[] chars = new char[10];
1177 Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 4096, chars, 9, false);
1181 public void Bug10789()
1183 byte[] bytes = new byte[4096];
1184 char[] chars = new char[10];
1187 Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 1, chars, 10, false);
1188 Assert.Fail ("ArgumentException is expected #1");
1189 } catch (ArgumentException) {
1193 Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 1, chars, 11, false);
1194 Assert.Fail ("ArgumentOutOfRangeException is expected #2");
1195 } catch (ArgumentOutOfRangeException) {
1198 int charactersWritten = Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 0, chars, 10, false);
1199 Assert.AreEqual (0, charactersWritten, "#3");