2 // UTF8EncodingTest.cs - NUnit Test Cases for System.Text.UTF8Encoding
5 // Patrick Kalkman kalkman@cistron.nl
6 // Sebastien Pouliot (spouliot@motus.com)
8 // (C) 2003 Patrick Kalkman
9 // (C) 2004 Novell (http://www.novell.com)
12 using NUnit.Framework;
14 using System.Reflection;
18 using DecoderException = System.Text.DecoderFallbackException;
20 using AssertType = NUnit.Framework.Assert;
22 namespace MonoTests.System.Text
25 public class UTF8EncodingTest
27 private UTF8Encoding utf8;
32 utf8 = new UTF8Encoding (true, true);
36 public void IsBrowserDisplay ()
38 Assert.IsTrue (utf8.IsBrowserDisplay);
42 public void IsBrowserSave ()
44 Assert.IsTrue (utf8.IsBrowserSave);
48 public void IsMailNewsDisplay ()
50 Assert.IsTrue (utf8.IsMailNewsDisplay);
54 public void IsMailNewsSave ()
56 Assert.IsTrue (utf8.IsMailNewsSave);
60 public void TestCompat ()
62 Assert.IsTrue (new UTF8Encoding ().Equals (new UTF8Encoding ()));
66 public void TestEncodingGetBytes1()
68 UTF8Encoding utf8Enc = new UTF8Encoding ();
69 string UniCode = "\u0041\u2262\u0391\u002E";
71 // "A<NOT IDENTICAL TO><ALPHA>." may be encoded as 41 E2 89 A2 CE 91 2E
73 byte[] utf8Bytes = utf8Enc.GetBytes (UniCode);
75 Assert.AreEqual (0x41, utf8Bytes [0], "UTF #1");
76 Assert.AreEqual (0xE2, utf8Bytes [1], "UTF #2");
77 Assert.AreEqual (0x89, utf8Bytes [2], "UTF #3");
78 Assert.AreEqual (0xA2, utf8Bytes [3], "UTF #4");
79 Assert.AreEqual (0xCE, utf8Bytes [4], "UTF #5");
80 Assert.AreEqual (0x91, utf8Bytes [5], "UTF #6");
81 Assert.AreEqual (0x2E, utf8Bytes [6], "UTF #7");
85 public void TestEncodingGetBytes2()
87 UTF8Encoding utf8Enc = new UTF8Encoding ();
88 string UniCode = "\u0048\u0069\u0020\u004D\u006F\u006D\u0020\u263A\u0021";
90 // "Hi Mom <WHITE SMILING FACE>!" may be encoded as 48 69 20 4D 6F 6D 20 E2 98 BA 21
92 byte[] utf8Bytes = new byte [11];
94 int ByteCnt = utf8Enc.GetBytes (UniCode.ToCharArray(), 0, UniCode.Length, utf8Bytes, 0);
95 Assert.AreEqual (11, ByteCnt, "UTF #1");
96 Assert.AreEqual (0x48, utf8Bytes [0], "UTF #2");
97 Assert.AreEqual (0x69, utf8Bytes [1], "UTF #3");
98 Assert.AreEqual (0x20, utf8Bytes [2], "UTF #4");
99 Assert.AreEqual (0x4D, utf8Bytes [3], "UTF #5");
100 Assert.AreEqual (0x6F, utf8Bytes [4], "UTF #6");
101 Assert.AreEqual (0x6D, utf8Bytes [5], "UTF #7");
102 Assert.AreEqual (0x20, utf8Bytes [6], "UTF #8");
103 Assert.AreEqual (0xE2, utf8Bytes [7], "UTF #9");
104 Assert.AreEqual (0x98, utf8Bytes [8], "UTF #10");
105 Assert.AreEqual (0xBA, utf8Bytes [9], "UTF #11");
106 Assert.AreEqual (0x21, utf8Bytes [10], "UTF #12");
110 public void TestDecodingGetChars1()
112 UTF8Encoding utf8Enc = new UTF8Encoding ();
113 // 41 E2 89 A2 CE 91 2E may be decoded as "A<NOT IDENTICAL TO><ALPHA>."
115 byte[] utf8Bytes = new byte [] {0x41, 0xE2, 0x89, 0xA2, 0xCE, 0x91, 0x2E};
116 char[] UniCodeChars = utf8Enc.GetChars(utf8Bytes);
118 Assert.AreEqual (0x0041, UniCodeChars [0], "UTF #1");
119 Assert.AreEqual (0x2262, UniCodeChars [1], "UTF #2");
120 Assert.AreEqual (0x0391, UniCodeChars [2], "UTF #3");
121 Assert.AreEqual (0x002E, UniCodeChars [3], "UTF #4");
125 public void TestMaxCharCount()
127 UTF8Encoding UTF8enc = new UTF8Encoding ();
128 Encoding UTF8encWithBOM = new UTF8Encoding(true);
129 Assert.AreEqual (51, UTF8enc.GetMaxCharCount(50), "UTF #1");
130 Assert.AreEqual (UTF8enc.GetMaxByteCount(50), UTF8encWithBOM.GetMaxByteCount(50), "UTF #2");
134 public void TestMaxCharCountWithCustomFallback()
136 Encoding encoding = Encoding.GetEncoding("utf-8", new EncoderReplacementFallback("\u2047\u2047"), new DecoderReplacementFallback("\u2047\u2047"));
137 Assert.AreEqual (102, encoding.GetMaxCharCount(50), "UTF #1");
141 public void TestMaxByteCount()
143 UTF8Encoding UTF8enc = new UTF8Encoding ();
144 Encoding UTF8encWithBOM = new UTF8Encoding(true);
146 Assert.AreEqual (153, UTF8enc.GetMaxByteCount(50), "UTF #1");
147 Assert.AreEqual (UTF8enc.GetMaxByteCount(50), UTF8encWithBOM.GetMaxByteCount(50), "UTF #2");
151 public void TestMaxByteCountWithCustomFallback()
153 Encoding encoding = Encoding.GetEncoding("utf-8", new EncoderReplacementFallback("\u2047\u2047"), new DecoderReplacementFallback("?"));
154 Assert.AreEqual (306, encoding.GetMaxByteCount(50), "UTF #1");
157 // regression for bug #59648
159 public void TestThrowOnInvalid ()
161 UTF8Encoding u = new UTF8Encoding (true, false);
163 byte[] data = new byte [] { 0xC0, 0xAF };
164 Assert.AreEqual (2, u.GetCharCount (data), "#A0");
165 string s = u.GetString (data);
166 Assert.AreEqual ("\uFFFD\uFFFD", s, "#A1");
168 data = new byte [] { 0x30, 0x31, 0xC0, 0xAF, 0x30, 0x32 };
169 s = u.GetString (data);
170 Assert.AreEqual (6, s.Length, "#B1");
171 Assert.AreEqual (0x30, (int) s [0], "#B2");
172 Assert.AreEqual (0x31, (int) s [1], "#B3");
173 Assert.AreEqual (0xFFFD, (int) s [2], "#B4");
174 Assert.AreEqual (0xFFFD, (int) s [3], "#B5");
175 Assert.AreEqual (0x30, (int) s [4], "#B6");
176 Assert.AreEqual (0x32, (int) s [5], "#B7");
180 // UTF8 decoding tests are based on the test file from http://www.cl.cam.ac.uk/~mgk25/
181 // The test file is: https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
182 // which is licensed under CC-by-4.0: https://creativecommons.org/licenses/by/4.0/
184 // The file is not copied verbatim, instead individual
185 // tests are based on individual portions of that file
189 public void T1_Correct_GreekWord_kosme ()
191 byte[] data = { 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5 };
192 string s = utf8.GetString (data);
193 // cute but saving source code in unicode can be problematic
194 // so we just ensure we can re-encode this
195 Assert.AreEqual (BitConverter.ToString (data), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted");
199 public void T2_Boundary_1_FirstPossibleSequence_Pass ()
201 byte[] data211 = { 0x00 };
202 string s = utf8.GetString (data211);
203 Assert.AreEqual ("\0", s, "1 byte (U-00000000)");
204 Assert.AreEqual (BitConverter.ToString (data211), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-1");
206 byte[] data212 = { 0xC2, 0x80 };
207 s = utf8.GetString (data212);
208 Assert.AreEqual (128, s [0], "2 bytes (U-00000080)");
209 Assert.AreEqual (BitConverter.ToString (data212), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-2");
211 byte[] data213 = { 0xE0, 0xA0, 0x80 };
212 s = utf8.GetString (data213);
213 Assert.AreEqual (2048, s [0], "3 bytes (U-00000800)");
214 Assert.AreEqual (BitConverter.ToString (data213), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-3");
216 byte[] data214 = { 0xF0, 0x90, 0x80, 0x80 };
217 s = utf8.GetString (data214);
218 Assert.AreEqual (55296, s [0], "4 bytes (U-00010000)-0");
219 Assert.AreEqual (56320, s [1], "4 bytes (U-00010000)-1");
220 Assert.AreEqual (BitConverter.ToString (data214), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-4");
225 [ExpectedException (typeof (DecoderException))]
226 public void T2_Boundary_1_FirstPossibleSequence_Fail_5 ()
228 byte[] data215 = { 0xF8, 0x88, 0x80, 0x80, 0x80 };
229 string s = utf8.GetString (data215);
230 Assert.IsNull (s, "5 bytes (U-00200000)");
231 Assert.AreEqual (BitConverter.ToString (data215), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-5");
236 [ExpectedException (typeof (DecoderException))]
237 public void T2_Boundary_1_FirstPossibleSequence_Fail_6 ()
239 byte[] data216 = { 0xFC, 0x84, 0x80, 0x80, 0x80, 0x80 };
240 string s = utf8.GetString (data216);
241 Assert.IsNull (s, "6 bytes (U-04000000)");
242 Assert.AreEqual (BitConverter.ToString (data216), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-6");
246 public void T2_Boundary_2_LastPossibleSequence_Pass ()
248 byte[] data221 = { 0x7F };
249 string s = utf8.GetString (data221);
250 Assert.AreEqual (127, s [0], "1 byte (U-0000007F)");
251 Assert.AreEqual (BitConverter.ToString (data221), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-1");
253 byte[] data222 = { 0xDF, 0xBF };
254 s = utf8.GetString (data222);
255 Assert.AreEqual (2047, s [0], "2 bytes (U-000007FF)");
256 Assert.AreEqual (BitConverter.ToString (data222), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-2");
258 byte[] data223 = { 0xEF, 0xBF, 0xBF };
259 s = utf8.GetString (data223);
260 Assert.AreEqual (65535, s [0], "3 bytes (U-0000FFFF)");
261 Assert.AreEqual (BitConverter.ToString (data223), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-3");
267 [ExpectedException (typeof (DecoderException))]
268 public void T2_Boundary_2_LastPossibleSequence_Fail_4 ()
270 byte[] data224 = { 0x7F, 0xBF, 0xBF, 0xBF };
271 string s = utf8.GetString (data224);
272 Assert.IsNull (s, "4 bytes (U-001FFFFF)");
273 Assert.AreEqual (BitConverter.ToString (data224), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-4");
278 [ExpectedException (typeof (DecoderException))]
279 public void T2_Boundary_2_LastPossibleSequence_Fail_5 ()
281 byte[] data225 = { 0xFB, 0xBF, 0xBF, 0xBF, 0xBF };
282 string s = utf8.GetString (data225);
283 Assert.IsNull (s, "5 bytes (U-03FFFFFF)");
284 Assert.AreEqual (BitConverter.ToString (data225), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-5");
289 [ExpectedException (typeof (DecoderException))]
290 public void T2_Boundary_2_LastPossibleSequence_Fail_6 ()
292 byte[] data226 = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF, 0xBF };
293 string s = utf8.GetString (data226);
294 Assert.IsNull (s, "6 bytes (U-7FFFFFFF)");
295 Assert.AreEqual (BitConverter.ToString (data226), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-6");
299 public void T2_Boundary_3_Other_Pass ()
301 byte[] data231 = { 0xED, 0x9F, 0xBF };
302 string s = utf8.GetString (data231);
303 Assert.AreEqual (55295, s [0], "U-0000D7FF");
304 Assert.AreEqual (BitConverter.ToString (data231), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-1");
306 byte[] data232 = { 0xEE, 0x80, 0x80 };
307 s = utf8.GetString (data232);
308 Assert.AreEqual (57344, s [0], "U-0000E000");
309 Assert.AreEqual (BitConverter.ToString (data232), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-2");
311 byte[] data233 = { 0xEF, 0xBF, 0xBD };
312 s = utf8.GetString (data233);
313 Assert.AreEqual (65533, s [0], "U-0000FFFD");
314 Assert.AreEqual (BitConverter.ToString (data233), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-3");
316 byte[] data234 = { 0xF4, 0x8F, 0xBF, 0xBF };
317 s = utf8.GetString (data234);
318 Assert.AreEqual (56319, s [0], "U-0010FFFF-0");
319 Assert.AreEqual (57343, s [1], "U-0010FFFF-1");
320 Assert.AreEqual (BitConverter.ToString (data234), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-4");
325 [ExpectedException (typeof (DecoderException))]
326 public void T2_Boundary_3_Other_Fail_5 ()
328 byte[] data235 = { 0xF4, 0x90, 0x80, 0x80 };
329 string s = utf8.GetString (data235);
330 Assert.IsNull (s, "U-00110000");
331 Assert.AreEqual (BitConverter.ToString (data235), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-5");
335 [ExpectedException (typeof (DecoderException))]
336 public void T3_Malformed_1_UnexpectedContinuation_311 ()
338 byte[] data = { 0x80 };
339 string s = utf8.GetString (data);
340 // exception is "really" expected here
344 [ExpectedException (typeof (DecoderException))]
345 public void T3_Malformed_1_UnexpectedContinuation_312 ()
347 byte[] data = { 0xBF };
348 string s = utf8.GetString (data);
349 // exception is "really" expected here
353 [ExpectedException (typeof (DecoderException))]
354 public void T3_Malformed_1_UnexpectedContinuation_313 ()
356 byte[] data = { 0x80, 0xBF };
357 string s = utf8.GetString (data);
358 // exception is "really" expected here
362 [ExpectedException (typeof (DecoderException))]
363 public void T3_Malformed_1_UnexpectedContinuation_314 ()
365 byte[] data = { 0x80, 0xBF, 0x80 };
366 string s = utf8.GetString (data);
367 // exception is "really" expected here
371 [ExpectedException (typeof (DecoderException))]
372 public void T3_Malformed_1_UnexpectedContinuation_315 ()
374 byte[] data = { 0x80, 0xBF, 0x80, 0xBF };
375 string s = utf8.GetString (data);
376 // exception is "really" expected here
380 [ExpectedException (typeof (DecoderException))]
381 public void T3_Malformed_1_UnexpectedContinuation_316 ()
383 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80 };
384 string s = utf8.GetString (data);
385 // exception is "really" expected here
389 [ExpectedException (typeof (DecoderException))]
390 public void T3_Malformed_1_UnexpectedContinuation_317 ()
392 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF };
393 string s = utf8.GetString (data);
394 // exception is "really" expected here
398 [ExpectedException (typeof (DecoderException))]
399 public void T3_Malformed_1_UnexpectedContinuation_318 ()
401 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 };
402 string s = utf8.GetString (data);
403 // exception is "really" expected here
407 [ExpectedException (typeof (DecoderException))]
408 public void T3_Malformed_1_UnexpectedContinuation_319 ()
410 // 64 different continuation characters
412 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
413 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
414 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
415 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF };
416 string s = utf8.GetString (data);
417 // exception is "really" expected here
421 [ExpectedException (typeof (DecoderException))]
422 public void T3_Malformed_2_LonelyStart_321 ()
425 0xC0, 0x20, 0xC1, 0x20, 0xC2, 0x20, 0xC3, 0x20, 0xC4, 0x20, 0xC5, 0x20, 0xC6, 0x20, 0xC7, 0x20,
426 0xC8, 0x20, 0xC9, 0x20, 0xCA, 0x20, 0xCB, 0x20, 0xCC, 0x20, 0xCD, 0x20, 0xCE, 0x20, 0xCF, 0x20,
427 0xD0, 0x20, 0xD1, 0x20, 0xD2, 0x20, 0xD3, 0x20, 0xD4, 0x20, 0xD5, 0x20, 0xD6, 0x20, 0xD7, 0x20,
428 0xD8, 0x20, 0xD9, 0x20, 0xDA, 0x20, 0xDB, 0x20, 0xDC, 0x20, 0xDD, 0x20, 0xDE, 0x20, 0xDF, 0x20 };
429 string s = utf8.GetString (data);
430 // exception is "really" expected here
434 [ExpectedException (typeof (DecoderException))]
435 public void T3_Malformed_2_LonelyStart_322 ()
438 0xE0, 0x20, 0xE1, 0x20, 0xE2, 0x20, 0xE3, 0x20, 0xE4, 0x20, 0xE5, 0x20, 0xE6, 0x20, 0xE7, 0x20,
439 0xE8, 0x20, 0xE9, 0x20, 0xEA, 0x20, 0xEB, 0x20, 0xEC, 0x20, 0xED, 0x20, 0xEE, 0x20, 0xEF, 0x20 };
440 string s = utf8.GetString (data);
441 // exception is "really" expected here
445 [ExpectedException (typeof (DecoderException))]
446 public void T3_Malformed_2_LonelyStart_323 ()
448 byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
449 string s = utf8.GetString (data);
450 // exception is "really" expected here
454 [ExpectedException (typeof (DecoderException))]
455 public void T3_Malformed_2_LonelyStart_324 ()
457 byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
458 string s = utf8.GetString (data);
459 // exception is "really" expected here
463 [ExpectedException (typeof (DecoderException))]
464 public void T3_Malformed_2_LonelyStart_325 ()
466 byte[] data = { 0xFC, 0x20, 0xFD, 0x20 };
467 string s = utf8.GetString (data);
468 // exception is "really" expected here
472 [ExpectedException (typeof (DecoderException))]
473 public void T3_Malformed_3_LastContinuationMissing_331 ()
475 byte[] data = { 0xC0 };
476 string s = utf8.GetString (data);
477 // exception is "really" expected here
481 [ExpectedException (typeof (DecoderException))]
482 public void T3_Malformed_3_LastContinuationMissing_332 ()
484 byte[] data = { 0xE0, 0x80 };
485 string s = utf8.GetString (data);
486 // exception is "really" expected here
490 [ExpectedException (typeof (DecoderException))]
491 public void T3_Malformed_3_LastContinuationMissing_333 ()
493 byte[] data = { 0xF0, 0x80, 0x80 };
494 string s = utf8.GetString (data);
495 // exception is "really" expected here
499 [ExpectedException (typeof (DecoderException))]
500 public void T3_Malformed_3_LastContinuationMissing_334 ()
502 byte[] data = { 0xF8, 0x80, 0x80, 0x80 };
503 string s = utf8.GetString (data);
504 // exception is "really" expected here
508 [ExpectedException (typeof (DecoderException))]
509 public void T3_Malformed_3_LastContinuationMissing_335 ()
511 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80 };
512 string s = utf8.GetString (data);
513 // exception is "really" expected here
517 // MS Fx 1.1 accept this
518 // [ExpectedException (typeof (DecoderException))]
519 public void T3_Malformed_3_LastContinuationMissing_336 ()
521 byte[] data = { 0xDF };
523 string s = utf8.GetString (data);
524 // exception is "really" expected here
525 Assert.AreEqual (String.Empty, s, "MS FX 1.1 behaviour");
527 catch (DecoderException) {
528 // but Mono doesn't - better stick to the standard
533 // MS Fx 1.1 accept this
534 // [ExpectedException (typeof (DecoderException))]
535 public void T3_Malformed_3_LastContinuationMissing_337 ()
537 byte[] data = { 0xEF, 0xBF };
539 string s = utf8.GetString (data);
540 // exception is "really" expected here
541 Assert.AreEqual (String.Empty, s, "MS FX 1.1 behaviour");
543 catch (DecoderException) {
544 // but Mono doesn't - better stick to the standard
549 [ExpectedException (typeof (DecoderException))]
550 public void T3_Malformed_3_LastContinuationMissing_338 ()
552 byte[] data = { 0xF7, 0xBF, 0xBF };
553 string s = utf8.GetString (data);
554 // exception is "really" expected here
558 [ExpectedException (typeof (DecoderException))]
559 public void T3_Malformed_3_LastContinuationMissing_339 ()
561 byte[] data = { 0xF, 0xBF, 0xBF, 0xBF };
562 string s = utf8.GetString (data);
563 // exception is "really" expected here
567 [ExpectedException (typeof (DecoderException))]
568 public void T3_Malformed_3_LastContinuationMissing_3310 ()
570 byte[] data = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
571 string s = utf8.GetString (data);
572 // exception is "really" expected here
576 [ExpectedException (typeof (DecoderException))]
577 public void T3_Malformed_4_ConcatenationImcomplete ()
580 0xC0, 0xE0, 0x80, 0xF0, 0x80, 0x80, 0xF8, 0x80, 0x80, 0x80, 0xFC, 0x80, 0x80, 0x80, 0x80, 0xDF,
581 0xEF, 0xBF, 0xF7, 0xBF, 0xBF, 0xFB, 0xBF, 0xBF, 0xBF, 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
582 string s = utf8.GetString (data);
583 // exception is "really" expected here
587 [ExpectedException (typeof (DecoderException))]
588 public void T3_Malformed_5_ImpossibleBytes_351 ()
590 byte[] data = { 0xFE };
591 string s = utf8.GetString (data);
592 // exception is "really" expected here
596 [ExpectedException (typeof (DecoderException))]
597 public void T3_Malformed_5_ImpossibleBytes_352 ()
599 byte[] data = { 0xFF };
600 string s = utf8.GetString (data);
601 // exception is "really" expected here
605 [ExpectedException (typeof (DecoderException))]
606 public void T3_Malformed_5_ImpossibleBytes_353 ()
608 byte[] data = { 0xFE, 0xFE, 0xFF, 0xFF };
609 string s = utf8.GetString (data);
610 // exception is "really" expected here
613 // Overlong == dangereous -> "safe" decoder should reject them
616 [ExpectedException (typeof (DecoderException))]
617 public void T4_Overlong_1_ASCII_Slash_411 ()
619 byte[] data = { 0xC0, 0xAF };
620 string s = utf8.GetString (data);
621 // exception is "really" expected here
625 [ExpectedException (typeof (DecoderException))]
626 public void T4_Overlong_1_ASCII_Slash_412 ()
628 byte[] data = { 0xE0, 0x80, 0xAF };
629 string s = utf8.GetString (data);
630 // exception is "really" expected here
634 [ExpectedException (typeof (DecoderException))]
635 public void T4_Overlong_1_ASCII_Slash_413 ()
637 byte[] data = { 0xF0, 0x80, 0x80, 0xAF };
638 string s = utf8.GetString (data);
639 // exception is "really" expected here
643 [ExpectedException (typeof (DecoderException))]
644 public void T4_Overlong_1_ASCII_Slash_414 ()
646 byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0xAF };
647 string s = utf8.GetString (data);
648 // exception is "really" expected here
652 [ExpectedException (typeof (DecoderException))]
653 public void T4_Overlong_1_ASCII_Slash_415 ()
655 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0xAF };
656 string s = utf8.GetString (data);
657 // exception is "really" expected here
661 [ExpectedException (typeof (DecoderException))]
662 public void T4_Overlong_2_MaximumBoundary_421 ()
664 byte[] data = { 0xC1, 0xBF };
665 string s = utf8.GetString (data);
666 // exception is "really" expected here
670 [ExpectedException (typeof (DecoderException))]
671 public void T4_Overlong_2_MaximumBoundary_422 ()
673 byte[] data = { 0xE0, 0x9F, 0xBF };
674 string s = utf8.GetString (data);
675 // exception is "really" expected here
679 [ExpectedException (typeof (DecoderException))]
680 public void T4_Overlong_2_MaximumBoundary_423 ()
682 byte[] data = { 0xF0, 0x8F, 0xBF, 0xBF };
683 string s = utf8.GetString (data);
684 // exception is "really" expected here
688 [ExpectedException (typeof (DecoderException))]
689 public void T4_Overlong_2_MaximumBoundary_424 ()
691 byte[] data = { 0xF8, 0x87, 0xBF, 0xBF, 0xBF };
692 string s = utf8.GetString (data);
693 // exception is "really" expected here
697 [ExpectedException (typeof (DecoderException))]
698 public void T4_Overlong_2_MaximumBoundary_425 ()
700 byte[] data = { 0xFC, 0x83, 0xBF, 0xBF, 0xBF, 0xBF };
701 string s = utf8.GetString (data);
702 // exception is "really" expected here
706 [ExpectedException (typeof (DecoderException))]
707 public void T4_Overlong_3_NUL_431 ()
709 byte[] data = { 0xC0, 0x80 };
710 string s = utf8.GetString (data);
711 // exception is "really" expected here
715 [ExpectedException (typeof (DecoderException))]
716 public void T4_Overlong_3_NUL_432 ()
718 byte[] data = { 0xE0, 0x80, 0x80 };
719 string s = utf8.GetString (data);
720 // exception is "really" expected here
724 [ExpectedException (typeof (DecoderException))]
725 public void T4_Overlong_3_NUL_433 ()
727 byte[] data = { 0xF0, 0x80, 0x80, 0x80 };
728 string s = utf8.GetString (data);
729 // exception is "really" expected here
733 [ExpectedException (typeof (DecoderException))]
734 public void T4_Overlong_3_NUL_434 ()
736 byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0x80 };
737 string s = utf8.GetString (data);
738 // exception is "really" expected here
742 [ExpectedException (typeof (DecoderException))]
743 public void T4_Overlong_3_NUL_435 ()
745 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0x80 };
746 string s = utf8.GetString (data);
747 // exception is "really" expected here
751 [ExpectedException (typeof (DecoderFallbackException))]
752 public void T5_IllegalCodePosition_1_UTF16Surrogates_511 ()
754 byte[] data = { 0xED, 0xA0, 0x80 };
755 string s = utf8.GetString (data);
756 // exception is "really" expected here
757 Assert.AreEqual (55296, s [0], "MS FX 1.1 behaviour");
761 [ExpectedException (typeof (DecoderFallbackException))]
762 public void T5_IllegalCodePosition_1_UTF16Surrogates_512 ()
764 byte[] data = { 0xED, 0xAD, 0xBF };
765 string s = utf8.GetString (data);
766 // exception is "really" expected here
767 Assert.AreEqual (56191, s [0], "MS FX 1.1 behaviour");
771 [ExpectedException (typeof (DecoderFallbackException))]
772 public void T5_IllegalCodePosition_1_UTF16Surrogates_513 ()
774 byte[] data = { 0xED, 0xAE, 0x80 };
775 string s = utf8.GetString (data);
776 // exception is "really" expected here
777 Assert.AreEqual (56192, s [0], "MS FX 1.1 behaviour");
781 [ExpectedException (typeof (DecoderFallbackException))]
782 public void T5_IllegalCodePosition_1_UTF16Surrogates_514 ()
784 byte[] data = { 0xED, 0xAF, 0xBF };
785 string s = utf8.GetString (data);
786 // exception is "really" expected here
787 Assert.AreEqual (56319, s [0], "MS FX 1.1 behaviour");
791 [ExpectedException (typeof (DecoderFallbackException))]
792 public void T5_IllegalCodePosition_1_UTF16Surrogates_515 ()
794 byte[] data = { 0xED, 0xB0, 0x80 };
795 string s = utf8.GetString (data);
796 // exception is "really" expected here
797 Assert.AreEqual (56320, s [0], "MS FX 1.1 behaviour");
801 [ExpectedException (typeof (DecoderFallbackException))]
802 public void T5_IllegalCodePosition_1_UTF16Surrogates_516 ()
804 byte[] data = { 0xED, 0xBE, 0x80 };
805 string s = utf8.GetString (data);
806 // exception is "really" expected here
807 Assert.AreEqual (57216, s [0], "MS FX 1.1 behaviour");
811 [ExpectedException (typeof (DecoderFallbackException))]
812 public void T5_IllegalCodePosition_1_UTF16Surrogates_517 ()
814 byte[] data = { 0xED, 0xBF, 0xBF };
815 string s = utf8.GetString (data);
816 // exception is "really" expected here
817 Assert.AreEqual (57343, s [0], "MS FX 1.1 behaviour");
821 [ExpectedException (typeof (DecoderFallbackException))]
822 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_521 ()
824 byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80 };
825 string s = utf8.GetString (data);
826 // exception is "really" expected here
827 Assert.AreEqual (55296, s [0], "MS FX 1.1 behaviour");
828 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
832 [ExpectedException (typeof (DecoderFallbackException))]
833 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_522 ()
835 byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xBF, 0xBF };
836 string s = utf8.GetString (data);
837 // exception is "really" expected here
838 Assert.AreEqual (55296, s [0], "MS FX 1.1 behaviour");
839 Assert.AreEqual (57343, s [1], "MS FX 1.1 behaviour");
843 [ExpectedException (typeof (DecoderFallbackException))]
844 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_523 ()
846 byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xB0, 0x80 };
847 string s = utf8.GetString (data);
848 // exception is "really" expected here
849 Assert.AreEqual (56191, s [0], "MS FX 1.1 behaviour");
850 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
854 [ExpectedException (typeof (DecoderFallbackException))]
855 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_524 ()
857 byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xBF, 0xBF };
858 string s = utf8.GetString (data);
859 // exception is "really" expected here
860 Assert.AreEqual (56191, s [0], "MS FX 1.1 behaviour");
861 Assert.AreEqual (57343, s [1], "MS FX 1.1 behaviour");
865 [ExpectedException (typeof (DecoderFallbackException))]
866 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_525 ()
868 byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xB0, 0x80 };
869 string s = utf8.GetString (data);
870 // exception is "really" expected here
871 Assert.AreEqual (56192, s [0], "MS FX 1.1 behaviour");
872 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
876 [ExpectedException (typeof (DecoderFallbackException))]
877 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_526 ()
879 byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xBF, 0x8F };
880 string s = utf8.GetString (data);
881 // exception is "really" expected here
882 Assert.AreEqual (56192, s [0], "MS FX 1.1 behaviour");
883 Assert.AreEqual (57295, s [1], "MS FX 1.1 behaviour");
887 [ExpectedException (typeof (DecoderFallbackException))]
888 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_527 ()
890 byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xB0, 0x80 };
891 string s = utf8.GetString (data);
892 // exception is "really" expected here
893 Assert.AreEqual (56319, s [0], "MS FX 1.1 behaviour");
894 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
898 [ExpectedException (typeof (DecoderFallbackException))]
899 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_528 ()
901 byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xBF, 0xBF };
902 string s = utf8.GetString (data);
903 // exception is "really" expected here
904 Assert.AreEqual (56319, s [0], "MS FX 1.1 behaviour");
905 Assert.AreEqual (57343, s [1], "MS FX 1.1 behaviour");
909 // MS Fx 1.1 accept this
910 // [ExpectedException (typeof (DecoderException))]
911 public void T5_IllegalCodePosition_3_Other_531 ()
913 byte[] data = { 0xEF, 0xBF, 0xBE };
914 string s = utf8.GetString (data);
915 // exception is "really" expected here
916 Assert.AreEqual (65534, s [0], "MS FX 1.1 behaviour");
920 // MS Fx 1.1 accept this
921 // [ExpectedException (typeof (DecoderException))]
922 public void T5_IllegalCodePosition_3_Other_532 ()
924 byte[] data = { 0xEF, 0xBF, 0xBF };
925 string s = utf8.GetString (data);
926 // exception is "really" expected here
927 Assert.AreEqual (65535, s [0], "MS FX 1.1 behaviour");
931 // bug #75065 and #73086.
932 public void GetCharsFEFF ()
934 byte [] data = new byte [] {0xEF, 0xBB, 0xBF};
935 Encoding enc = new UTF8Encoding (false, true);
936 string s = enc.GetString (data);
937 Assert.AreEqual (s, "\uFEFF");
939 Encoding utf = Encoding.UTF8;
940 char[] testChars = {'\uFEFF','A'};
942 byte[] bytes = utf.GetBytes(testChars);
943 char[] chars = utf.GetChars(bytes);
944 Assert.AreEqual ('\uFEFF', chars [0], "#1");
945 Assert.AreEqual ('A', chars [1], "#2");
949 public void CloneNotReadOnly ()
951 Encoding e = Encoding.GetEncoding (65001).Clone ()
953 Assert.AreEqual (false, e.IsReadOnly);
954 e.EncoderFallback = new EncoderExceptionFallback ();
958 [ExpectedException (typeof (DecoderFallbackException))]
959 public void Bug77315 ()
961 new UTF8Encoding (false, true).GetString (
962 new byte [] {0xED, 0xA2, 0x8C});
966 public void SufficientByteArray ()
968 Encoder e = Encoding.UTF8.GetEncoder ();
969 byte [] bytes = new byte [0];
971 char [] chars = new char [] {'\uD800'};
972 e.GetBytes (chars, 0, 1, bytes, 0, false);
974 int ret = e.GetBytes (chars, 1, 0, bytes, 0, true);
975 Assert.AreEqual (0, ret, "drop insufficient char in 2.0: char[]");
976 } catch (ArgumentException) {
981 int ret = Encoding.UTF8.GetBytes (s, 0, 1, bytes, 0);
982 Assert.AreEqual (0, ret, "drop insufficient char in 2.0: string");
983 } catch (ArgumentException) {
987 [Test] // bug #565129
988 public void SufficientByteArray2 ()
990 var u = Encoding.UTF8;
991 Assert.AreEqual (3, u.GetByteCount ("\uFFFD"), "#1-1");
992 Assert.AreEqual (3, u.GetByteCount ("\uD800"), "#1-2");
993 Assert.AreEqual (3, u.GetByteCount ("\uDC00"), "#1-3");
994 Assert.AreEqual (4, u.GetByteCount ("\uD800\uDC00"), "#1-4");
995 byte [] bytes = new byte [10];
996 Assert.AreEqual (3, u.GetBytes ("\uDC00", 0, 1, bytes, 0), "#1-5"); // was bogus
998 Assert.AreEqual (3, u.GetBytes ("\uFFFD").Length, "#2-1");
999 Assert.AreEqual (3, u.GetBytes ("\uD800").Length, "#2-2");
1000 Assert.AreEqual (3, u.GetBytes ("\uDC00").Length, "#2-3");
1001 Assert.AreEqual (4, u.GetBytes ("\uD800\uDC00").Length, "#2-4");
1003 for (char c = char.MinValue; c < char.MaxValue; c++) {
1005 bIn = u.GetBytes (c.ToString ());
1009 new UTF8Encoding (false, true).GetBytes (new char [] {'\uDF45', '\uD808'}, 0, 2);
1010 Assert.Fail ("EncoderFallbackException is expected");
1011 } catch (EncoderFallbackException) {
1015 [Test] // bug #77550
1016 public void DecoderFallbackSimple ()
1018 UTF8Encoding e = new UTF8Encoding (false, false);
1019 AssertType.AreEqual (1, e.GetDecoder ().GetCharCount (
1020 new byte [] {(byte) 183}, 0, 1),
1022 AssertType.AreEqual (1, e.GetDecoder().GetChars (
1023 new byte [] {(byte) 183}, 0, 1,
1026 AssertType.AreEqual (1, e.GetString (new byte [] {(byte) 183}).Length,
1031 public void FallbackDefaultEncodingUTF8 ()
1033 DecoderReplacementFallbackBuffer b =
1034 Encoding.UTF8.DecoderFallback.CreateFallbackBuffer ()
1035 as DecoderReplacementFallbackBuffer;
1036 AssertType.IsTrue (b.Fallback (new byte [] {}, 0), "#1");
1037 AssertType.IsFalse (b.MovePrevious (), "#2");
1038 AssertType.AreEqual (1, b.Remaining, "#3");
1039 AssertType.AreEqual ('\uFFFD', b.GetNextChar (), "#4");
1043 [Category ("MobileNotWorking")]
1044 public void Bug415628 ()
1046 DirectoryInfo bcl_output_dir = Directory.GetParent (Path.GetDirectoryName (Assembly.GetExecutingAssembly ().Location));
1047 string namespace_dir = Path.Combine (bcl_output_dir.Parent.FullName, "corlib");
1048 using (var f = File.Open (Path.Combine (namespace_dir, "Test/resources/415628.bin"), FileMode.Open)) {
1049 BinaryReader br = new BinaryReader (f);
1050 byte [] buf = br.ReadBytes (8000);
1051 Encoding.UTF8.GetString(buf);
1056 [ExpectedException (typeof (ArgumentException))]
1057 public void Bug10788()
1059 byte[] bytes = new byte[4096];
1060 char[] chars = new char[10];
1062 Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 4096, chars, 9, false);
1066 public void Bug10789()
1068 byte[] bytes = new byte[4096];
1069 char[] chars = new char[10];
1072 Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 1, chars, 10, false);
1073 Assert.Fail ("ArgumentException is expected #1");
1074 } catch (ArgumentException) {
1078 Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 1, chars, 11, false);
1079 Assert.Fail ("ArgumentOutOfRangeException is expected #2");
1080 } catch (ArgumentOutOfRangeException) {
1083 int charactersWritten = Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 0, chars, 10, false);
1084 Assert.AreEqual (0, charactersWritten, "#3");
1088 public void EncodingFallback ()
1090 /* Legal UTF-8 Byte Sequences
1095 * E1..EF 80..BF 80..BF
1096 * F0 90..BF 80..BF 80..BF
1097 * F1..F3 80..BF 80..BF 80..BF
1098 * F4 80..8F 80..BF 80..BF
1101 var t = new EncodingTester ("utf-8");
1105 for (byte b = 0x80; b <= 0xC1; b++) {
1106 data = new byte [] { b };
1107 t.TestDecoderFallback (data, "?", new byte [] { b });
1112 for (byte b = 0xC2; b <= 0xDF; b++) {
1113 data = new byte [] { b, 0x61 };
1114 t.TestDecoderFallback (data, "?a", new byte [] { b });
1118 data = new byte [] { 0xE0, 0x99};
1119 t.TestDecoderFallback (data, "?", new byte [] { 0xE0, 0x99});
1122 for (byte b = 0xE1; b <= 0xEF; b++) {
1123 data = new byte [] { b, 0x61 };
1124 t.TestDecoderFallback (data, "?a", new byte [] { b });
1128 data = new byte [] { 0xF0, 0x8F};
1129 t.TestDecoderFallback (data, "?", new byte [] { 0xF0, 0x8F });
1132 for (byte b = 0xF1; b <= 0xF4; b++) {
1133 data = new byte [] { b, 0x61 };
1134 t.TestDecoderFallback (data, "?a", new byte [] { b });
1138 for (byte b = 0xC2; b <= 0xF3; b++) {
1139 data = new byte [] { b, 0xC0 };
1140 t.TestDecoderFallback (data, "??", new byte [] { b }, new byte [] { 0xC0 });
1144 // E0..F3 90..BF 80..BF
1145 for (byte b = 0xE0; b <= 0xF3; b++) {
1146 data = new byte [] { b, 0xB0, 0x61 };
1147 t.TestDecoderFallback (data, "?a", new byte [] { b, 0xB0 });
1148 data = new byte [] { b, 0xB0, 0xC0 };
1149 t.TestDecoderFallback (data, "??", new byte [] { b, 0xB0 }, new byte [] { 0xC0 });
1153 data = new byte [] { 0xF4, 0x8F, 0xC0 };
1154 t.TestDecoderFallback (data, "??", new byte [] { 0xF4, 0x8F }, new byte [] { 0xC0 });
1157 // F0..F3 90..BF 80..BF 80..BF
1158 for (byte b = 0xF0; b <= 0xF3; b++) {
1159 data = new byte [] { b, 0xB0, 0xB0, 0x61 };
1160 t.TestDecoderFallback (data, "?a", new byte [] { b, 0xB0, 0xB0 });
1161 data = new byte [] { b, 0xB0, 0xB0, 0xC0 };
1162 t.TestDecoderFallback (data, "??", new byte [] { b, 0xB0, 0xB0 }, new byte [] { 0xC0 });
1165 // F4 80..8F 80..BF 80..BF
1166 data = new byte [] { 0xF4, 0x8F, 0xB0, 0xC0 };
1167 t.TestDecoderFallback (data, "??", new byte [] { 0xF4, 0x8F, 0xB0 }, new byte [] { 0xC0 });
1171 public void DecoderBug23771 ()
1173 var input = "\u733F"; // 'mono' on Japanese, 3bytes in UTF-8.
1174 var encoded = Encoding.UTF8.GetBytes (input);
1175 var decoder = Encoding.UTF8.GetDecoder ();
1176 var chars = new char [10]; // Just enough space to decode.
1177 var result = new StringBuilder ();
1178 var bytes = new byte [1]; // Simulates chunked input bytes.
1179 // Specify encoded bytes separetely.
1180 foreach (var b in encoded) {
1182 int bytesUsed, charsUsed;
1184 decoder.Convert (bytes, 0, bytes.Length, chars, 0, chars.Length, false, out bytesUsed, out charsUsed, out completed);
1185 result.Append (chars, 0, charsUsed);
1186 // Expected outputs are written in bottom.
1187 //Debug.Print ("bytesUsed:{0}, charsUsed:{1}, completed:{2}, result:'{3}'", bytesUsed, charsUsed, completed, result);
1190 // Expected: NO assertion error.
1191 Assert.AreEqual (input, result.ToString (), "#1");
1194 * Expected Debug outputs are:
1195 * bytesUsed:1, charsUsed:0, completed:True, result:''
1196 * bytesUsed:1, charsUsed:0, completed:True, result:''
1197 * bytesUsed:1, charsUsed:1, completed:True, result:'猿'
1199 * -- Note: '猿' is U+733F (1char in UTF-16)
1201 * Actual Debug output are:
1202 * bytesUsed:3, charsUsed:1, completed:False, result:'�'
1203 * bytesUsed:3, charsUsed:1, completed:False, result:'��'
1204 * bytesUsed:3, charsUsed:1, completed:False, result:'���'
1206 * All output parameters are not match.
1207 * -- Note: '�' is decoder fallback char (U+FFFD)