2 // UTF8EncodingTest.cs - NUnit Test Cases for System.Text.UTF8Encoding
5 // Patrick Kalkman kalkman@cistron.nl
6 // Sebastien Pouliot (spouliot@motus.com)
8 // (C) 2003 Patrick Kalkman
9 // (C) 2004 Novell (http://www.novell.com)
12 using NUnit.Framework;
17 using DecoderException = System.Text.DecoderFallbackException;
19 using AssertType = NUnit.Framework.Assert;
21 namespace MonoTests.System.Text
24 public class UTF8EncodingTest
26 private UTF8Encoding utf8;
31 utf8 = new UTF8Encoding (true, true);
35 public void IsBrowserDisplay ()
37 Assert.IsTrue (utf8.IsBrowserDisplay);
41 public void IsBrowserSave ()
43 Assert.IsTrue (utf8.IsBrowserSave);
47 public void IsMailNewsDisplay ()
49 Assert.IsTrue (utf8.IsMailNewsDisplay);
53 public void IsMailNewsSave ()
55 Assert.IsTrue (utf8.IsMailNewsSave);
59 public void TestCompat ()
61 Assert.IsTrue (new UTF8Encoding ().Equals (new UTF8Encoding ()));
65 public void TestEncodingGetBytes1()
67 UTF8Encoding utf8Enc = new UTF8Encoding ();
68 string UniCode = "\u0041\u2262\u0391\u002E";
70 // "A<NOT IDENTICAL TO><ALPHA>." may be encoded as 41 E2 89 A2 CE 91 2E
72 byte[] utf8Bytes = utf8Enc.GetBytes (UniCode);
74 Assert.AreEqual (0x41, utf8Bytes [0], "UTF #1");
75 Assert.AreEqual (0xE2, utf8Bytes [1], "UTF #2");
76 Assert.AreEqual (0x89, utf8Bytes [2], "UTF #3");
77 Assert.AreEqual (0xA2, utf8Bytes [3], "UTF #4");
78 Assert.AreEqual (0xCE, utf8Bytes [4], "UTF #5");
79 Assert.AreEqual (0x91, utf8Bytes [5], "UTF #6");
80 Assert.AreEqual (0x2E, utf8Bytes [6], "UTF #7");
84 public void TestEncodingGetBytes2()
86 UTF8Encoding utf8Enc = new UTF8Encoding ();
87 string UniCode = "\u0048\u0069\u0020\u004D\u006F\u006D\u0020\u263A\u0021";
89 // "Hi Mom <WHITE SMILING FACE>!" may be encoded as 48 69 20 4D 6F 6D 20 E2 98 BA 21
91 byte[] utf8Bytes = new byte [11];
93 int ByteCnt = utf8Enc.GetBytes (UniCode.ToCharArray(), 0, UniCode.Length, utf8Bytes, 0);
94 Assert.AreEqual (11, ByteCnt, "UTF #1");
95 Assert.AreEqual (0x48, utf8Bytes [0], "UTF #2");
96 Assert.AreEqual (0x69, utf8Bytes [1], "UTF #3");
97 Assert.AreEqual (0x20, utf8Bytes [2], "UTF #4");
98 Assert.AreEqual (0x4D, utf8Bytes [3], "UTF #5");
99 Assert.AreEqual (0x6F, utf8Bytes [4], "UTF #6");
100 Assert.AreEqual (0x6D, utf8Bytes [5], "UTF #7");
101 Assert.AreEqual (0x20, utf8Bytes [6], "UTF #8");
102 Assert.AreEqual (0xE2, utf8Bytes [7], "UTF #9");
103 Assert.AreEqual (0x98, utf8Bytes [8], "UTF #10");
104 Assert.AreEqual (0xBA, utf8Bytes [9], "UTF #11");
105 Assert.AreEqual (0x21, utf8Bytes [10], "UTF #12");
109 public void TestDecodingGetChars1()
111 UTF8Encoding utf8Enc = new UTF8Encoding ();
112 // 41 E2 89 A2 CE 91 2E may be decoded as "A<NOT IDENTICAL TO><ALPHA>."
114 byte[] utf8Bytes = new byte [] {0x41, 0xE2, 0x89, 0xA2, 0xCE, 0x91, 0x2E};
115 char[] UniCodeChars = utf8Enc.GetChars(utf8Bytes);
117 Assert.AreEqual (0x0041, UniCodeChars [0], "UTF #1");
118 Assert.AreEqual (0x2262, UniCodeChars [1], "UTF #2");
119 Assert.AreEqual (0x0391, UniCodeChars [2], "UTF #3");
120 Assert.AreEqual (0x002E, UniCodeChars [3], "UTF #4");
124 public void TestMaxCharCount()
126 UTF8Encoding UTF8enc = new UTF8Encoding ();
127 Encoding UTF8encWithBOM = new UTF8Encoding(true);
128 Assert.AreEqual (51, UTF8enc.GetMaxCharCount(50), "UTF #1");
129 Assert.AreEqual (UTF8enc.GetMaxByteCount(50), UTF8encWithBOM.GetMaxByteCount(50), "UTF #2");
133 public void TestMaxCharCountWithCustomFallback()
135 Encoding encoding = Encoding.GetEncoding("utf-8", new EncoderReplacementFallback("\u2047\u2047"), new DecoderReplacementFallback("\u2047\u2047"));
136 Assert.AreEqual (102, encoding.GetMaxCharCount(50), "UTF #1");
140 public void TestMaxByteCount()
142 UTF8Encoding UTF8enc = new UTF8Encoding ();
143 Encoding UTF8encWithBOM = new UTF8Encoding(true);
145 Assert.AreEqual (153, UTF8enc.GetMaxByteCount(50), "UTF #1");
146 Assert.AreEqual (UTF8enc.GetMaxByteCount(50), UTF8encWithBOM.GetMaxByteCount(50), "UTF #2");
150 public void TestMaxByteCountWithCustomFallback()
152 Encoding encoding = Encoding.GetEncoding("utf-8", new EncoderReplacementFallback("\u2047\u2047"), new DecoderReplacementFallback("?"));
153 Assert.AreEqual (306, encoding.GetMaxByteCount(50), "UTF #1");
156 // regression for bug #59648
158 public void TestThrowOnInvalid ()
160 UTF8Encoding u = new UTF8Encoding (true, false);
162 byte[] data = new byte [] { 0xC0, 0xAF };
163 Assert.AreEqual (2, u.GetCharCount (data), "#A0");
164 string s = u.GetString (data);
165 Assert.AreEqual ("\uFFFD\uFFFD", s, "#A1");
167 data = new byte [] { 0x30, 0x31, 0xC0, 0xAF, 0x30, 0x32 };
168 s = u.GetString (data);
169 Assert.AreEqual (6, s.Length, "#B1");
170 Assert.AreEqual (0x30, (int) s [0], "#B2");
171 Assert.AreEqual (0x31, (int) s [1], "#B3");
172 Assert.AreEqual (0xFFFD, (int) s [2], "#B4");
173 Assert.AreEqual (0xFFFD, (int) s [3], "#B5");
174 Assert.AreEqual (0x30, (int) s [4], "#B6");
175 Assert.AreEqual (0x32, (int) s [5], "#B7");
179 // UTF8 decoding tests are based on the test file from http://www.cl.cam.ac.uk/~mgk25/
180 // The test file is: https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
181 // which is licensed under CC-by-4.0: https://creativecommons.org/licenses/by/4.0/
183 // The file is not copied verbatim, instead individual
184 // tests are based on individual portions of that file
188 public void T1_Correct_GreekWord_kosme ()
190 byte[] data = { 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5 };
191 string s = utf8.GetString (data);
192 // cute but saving source code in unicode can be problematic
193 // so we just ensure we can re-encode this
194 Assert.AreEqual (BitConverter.ToString (data), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted");
198 public void T2_Boundary_1_FirstPossibleSequence_Pass ()
200 byte[] data211 = { 0x00 };
201 string s = utf8.GetString (data211);
202 Assert.AreEqual ("\0", s, "1 byte (U-00000000)");
203 Assert.AreEqual (BitConverter.ToString (data211), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-1");
205 byte[] data212 = { 0xC2, 0x80 };
206 s = utf8.GetString (data212);
207 Assert.AreEqual (128, s [0], "2 bytes (U-00000080)");
208 Assert.AreEqual (BitConverter.ToString (data212), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-2");
210 byte[] data213 = { 0xE0, 0xA0, 0x80 };
211 s = utf8.GetString (data213);
212 Assert.AreEqual (2048, s [0], "3 bytes (U-00000800)");
213 Assert.AreEqual (BitConverter.ToString (data213), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-3");
215 byte[] data214 = { 0xF0, 0x90, 0x80, 0x80 };
216 s = utf8.GetString (data214);
217 Assert.AreEqual (55296, s [0], "4 bytes (U-00010000)-0");
218 Assert.AreEqual (56320, s [1], "4 bytes (U-00010000)-1");
219 Assert.AreEqual (BitConverter.ToString (data214), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-4");
224 [ExpectedException (typeof (DecoderException))]
225 public void T2_Boundary_1_FirstPossibleSequence_Fail_5 ()
227 byte[] data215 = { 0xF8, 0x88, 0x80, 0x80, 0x80 };
228 string s = utf8.GetString (data215);
229 Assert.IsNull (s, "5 bytes (U-00200000)");
230 Assert.AreEqual (BitConverter.ToString (data215), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-5");
235 [ExpectedException (typeof (DecoderException))]
236 public void T2_Boundary_1_FirstPossibleSequence_Fail_6 ()
238 byte[] data216 = { 0xFC, 0x84, 0x80, 0x80, 0x80, 0x80 };
239 string s = utf8.GetString (data216);
240 Assert.IsNull (s, "6 bytes (U-04000000)");
241 Assert.AreEqual (BitConverter.ToString (data216), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-6");
245 public void T2_Boundary_2_LastPossibleSequence_Pass ()
247 byte[] data221 = { 0x7F };
248 string s = utf8.GetString (data221);
249 Assert.AreEqual (127, s [0], "1 byte (U-0000007F)");
250 Assert.AreEqual (BitConverter.ToString (data221), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-1");
252 byte[] data222 = { 0xDF, 0xBF };
253 s = utf8.GetString (data222);
254 Assert.AreEqual (2047, s [0], "2 bytes (U-000007FF)");
255 Assert.AreEqual (BitConverter.ToString (data222), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-2");
257 byte[] data223 = { 0xEF, 0xBF, 0xBF };
258 s = utf8.GetString (data223);
259 Assert.AreEqual (65535, s [0], "3 bytes (U-0000FFFF)");
260 Assert.AreEqual (BitConverter.ToString (data223), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-3");
266 [ExpectedException (typeof (DecoderException))]
267 public void T2_Boundary_2_LastPossibleSequence_Fail_4 ()
269 byte[] data224 = { 0x7F, 0xBF, 0xBF, 0xBF };
270 string s = utf8.GetString (data224);
271 Assert.IsNull (s, "4 bytes (U-001FFFFF)");
272 Assert.AreEqual (BitConverter.ToString (data224), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-4");
277 [ExpectedException (typeof (DecoderException))]
278 public void T2_Boundary_2_LastPossibleSequence_Fail_5 ()
280 byte[] data225 = { 0xFB, 0xBF, 0xBF, 0xBF, 0xBF };
281 string s = utf8.GetString (data225);
282 Assert.IsNull (s, "5 bytes (U-03FFFFFF)");
283 Assert.AreEqual (BitConverter.ToString (data225), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-5");
288 [ExpectedException (typeof (DecoderException))]
289 public void T2_Boundary_2_LastPossibleSequence_Fail_6 ()
291 byte[] data226 = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF, 0xBF };
292 string s = utf8.GetString (data226);
293 Assert.IsNull (s, "6 bytes (U-7FFFFFFF)");
294 Assert.AreEqual (BitConverter.ToString (data226), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-6");
298 public void T2_Boundary_3_Other_Pass ()
300 byte[] data231 = { 0xED, 0x9F, 0xBF };
301 string s = utf8.GetString (data231);
302 Assert.AreEqual (55295, s [0], "U-0000D7FF");
303 Assert.AreEqual (BitConverter.ToString (data231), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-1");
305 byte[] data232 = { 0xEE, 0x80, 0x80 };
306 s = utf8.GetString (data232);
307 Assert.AreEqual (57344, s [0], "U-0000E000");
308 Assert.AreEqual (BitConverter.ToString (data232), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-2");
310 byte[] data233 = { 0xEF, 0xBF, 0xBD };
311 s = utf8.GetString (data233);
312 Assert.AreEqual (65533, s [0], "U-0000FFFD");
313 Assert.AreEqual (BitConverter.ToString (data233), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-3");
315 byte[] data234 = { 0xF4, 0x8F, 0xBF, 0xBF };
316 s = utf8.GetString (data234);
317 Assert.AreEqual (56319, s [0], "U-0010FFFF-0");
318 Assert.AreEqual (57343, s [1], "U-0010FFFF-1");
319 Assert.AreEqual (BitConverter.ToString (data234), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-4");
324 [ExpectedException (typeof (DecoderException))]
325 public void T2_Boundary_3_Other_Fail_5 ()
327 byte[] data235 = { 0xF4, 0x90, 0x80, 0x80 };
328 string s = utf8.GetString (data235);
329 Assert.IsNull (s, "U-00110000");
330 Assert.AreEqual (BitConverter.ToString (data235), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-5");
334 [ExpectedException (typeof (DecoderException))]
335 public void T3_Malformed_1_UnexpectedContinuation_311 ()
337 byte[] data = { 0x80 };
338 string s = utf8.GetString (data);
339 // exception is "really" expected here
343 [ExpectedException (typeof (DecoderException))]
344 public void T3_Malformed_1_UnexpectedContinuation_312 ()
346 byte[] data = { 0xBF };
347 string s = utf8.GetString (data);
348 // exception is "really" expected here
352 [ExpectedException (typeof (DecoderException))]
353 public void T3_Malformed_1_UnexpectedContinuation_313 ()
355 byte[] data = { 0x80, 0xBF };
356 string s = utf8.GetString (data);
357 // exception is "really" expected here
361 [ExpectedException (typeof (DecoderException))]
362 public void T3_Malformed_1_UnexpectedContinuation_314 ()
364 byte[] data = { 0x80, 0xBF, 0x80 };
365 string s = utf8.GetString (data);
366 // exception is "really" expected here
370 [ExpectedException (typeof (DecoderException))]
371 public void T3_Malformed_1_UnexpectedContinuation_315 ()
373 byte[] data = { 0x80, 0xBF, 0x80, 0xBF };
374 string s = utf8.GetString (data);
375 // exception is "really" expected here
379 [ExpectedException (typeof (DecoderException))]
380 public void T3_Malformed_1_UnexpectedContinuation_316 ()
382 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80 };
383 string s = utf8.GetString (data);
384 // exception is "really" expected here
388 [ExpectedException (typeof (DecoderException))]
389 public void T3_Malformed_1_UnexpectedContinuation_317 ()
391 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF };
392 string s = utf8.GetString (data);
393 // exception is "really" expected here
397 [ExpectedException (typeof (DecoderException))]
398 public void T3_Malformed_1_UnexpectedContinuation_318 ()
400 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 };
401 string s = utf8.GetString (data);
402 // exception is "really" expected here
406 [ExpectedException (typeof (DecoderException))]
407 public void T3_Malformed_1_UnexpectedContinuation_319 ()
409 // 64 different continuation characters
411 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
412 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
413 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
414 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF };
415 string s = utf8.GetString (data);
416 // exception is "really" expected here
420 [ExpectedException (typeof (DecoderException))]
421 public void T3_Malformed_2_LonelyStart_321 ()
424 0xC0, 0x20, 0xC1, 0x20, 0xC2, 0x20, 0xC3, 0x20, 0xC4, 0x20, 0xC5, 0x20, 0xC6, 0x20, 0xC7, 0x20,
425 0xC8, 0x20, 0xC9, 0x20, 0xCA, 0x20, 0xCB, 0x20, 0xCC, 0x20, 0xCD, 0x20, 0xCE, 0x20, 0xCF, 0x20,
426 0xD0, 0x20, 0xD1, 0x20, 0xD2, 0x20, 0xD3, 0x20, 0xD4, 0x20, 0xD5, 0x20, 0xD6, 0x20, 0xD7, 0x20,
427 0xD8, 0x20, 0xD9, 0x20, 0xDA, 0x20, 0xDB, 0x20, 0xDC, 0x20, 0xDD, 0x20, 0xDE, 0x20, 0xDF, 0x20 };
428 string s = utf8.GetString (data);
429 // exception is "really" expected here
433 [ExpectedException (typeof (DecoderException))]
434 public void T3_Malformed_2_LonelyStart_322 ()
437 0xE0, 0x20, 0xE1, 0x20, 0xE2, 0x20, 0xE3, 0x20, 0xE4, 0x20, 0xE5, 0x20, 0xE6, 0x20, 0xE7, 0x20,
438 0xE8, 0x20, 0xE9, 0x20, 0xEA, 0x20, 0xEB, 0x20, 0xEC, 0x20, 0xED, 0x20, 0xEE, 0x20, 0xEF, 0x20 };
439 string s = utf8.GetString (data);
440 // exception is "really" expected here
444 [ExpectedException (typeof (DecoderException))]
445 public void T3_Malformed_2_LonelyStart_323 ()
447 byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
448 string s = utf8.GetString (data);
449 // exception is "really" expected here
453 [ExpectedException (typeof (DecoderException))]
454 public void T3_Malformed_2_LonelyStart_324 ()
456 byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
457 string s = utf8.GetString (data);
458 // exception is "really" expected here
462 [ExpectedException (typeof (DecoderException))]
463 public void T3_Malformed_2_LonelyStart_325 ()
465 byte[] data = { 0xFC, 0x20, 0xFD, 0x20 };
466 string s = utf8.GetString (data);
467 // exception is "really" expected here
471 [ExpectedException (typeof (DecoderException))]
472 public void T3_Malformed_3_LastContinuationMissing_331 ()
474 byte[] data = { 0xC0 };
475 string s = utf8.GetString (data);
476 // exception is "really" expected here
480 [ExpectedException (typeof (DecoderException))]
481 public void T3_Malformed_3_LastContinuationMissing_332 ()
483 byte[] data = { 0xE0, 0x80 };
484 string s = utf8.GetString (data);
485 // exception is "really" expected here
489 [ExpectedException (typeof (DecoderException))]
490 public void T3_Malformed_3_LastContinuationMissing_333 ()
492 byte[] data = { 0xF0, 0x80, 0x80 };
493 string s = utf8.GetString (data);
494 // exception is "really" expected here
498 [ExpectedException (typeof (DecoderException))]
499 public void T3_Malformed_3_LastContinuationMissing_334 ()
501 byte[] data = { 0xF8, 0x80, 0x80, 0x80 };
502 string s = utf8.GetString (data);
503 // exception is "really" expected here
507 [ExpectedException (typeof (DecoderException))]
508 public void T3_Malformed_3_LastContinuationMissing_335 ()
510 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80 };
511 string s = utf8.GetString (data);
512 // exception is "really" expected here
516 // MS Fx 1.1 accept this
517 // [ExpectedException (typeof (DecoderException))]
518 public void T3_Malformed_3_LastContinuationMissing_336 ()
520 byte[] data = { 0xDF };
522 string s = utf8.GetString (data);
523 // exception is "really" expected here
524 Assert.AreEqual (String.Empty, s, "MS FX 1.1 behaviour");
526 catch (DecoderException) {
527 // but Mono doesn't - better stick to the standard
532 // MS Fx 1.1 accept this
533 // [ExpectedException (typeof (DecoderException))]
534 public void T3_Malformed_3_LastContinuationMissing_337 ()
536 byte[] data = { 0xEF, 0xBF };
538 string s = utf8.GetString (data);
539 // exception is "really" expected here
540 Assert.AreEqual (String.Empty, s, "MS FX 1.1 behaviour");
542 catch (DecoderException) {
543 // but Mono doesn't - better stick to the standard
548 [ExpectedException (typeof (DecoderException))]
549 public void T3_Malformed_3_LastContinuationMissing_338 ()
551 byte[] data = { 0xF7, 0xBF, 0xBF };
552 string s = utf8.GetString (data);
553 // exception is "really" expected here
557 [ExpectedException (typeof (DecoderException))]
558 public void T3_Malformed_3_LastContinuationMissing_339 ()
560 byte[] data = { 0xF, 0xBF, 0xBF, 0xBF };
561 string s = utf8.GetString (data);
562 // exception is "really" expected here
566 [ExpectedException (typeof (DecoderException))]
567 public void T3_Malformed_3_LastContinuationMissing_3310 ()
569 byte[] data = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
570 string s = utf8.GetString (data);
571 // exception is "really" expected here
575 [ExpectedException (typeof (DecoderException))]
576 public void T3_Malformed_4_ConcatenationImcomplete ()
579 0xC0, 0xE0, 0x80, 0xF0, 0x80, 0x80, 0xF8, 0x80, 0x80, 0x80, 0xFC, 0x80, 0x80, 0x80, 0x80, 0xDF,
580 0xEF, 0xBF, 0xF7, 0xBF, 0xBF, 0xFB, 0xBF, 0xBF, 0xBF, 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
581 string s = utf8.GetString (data);
582 // exception is "really" expected here
586 [ExpectedException (typeof (DecoderException))]
587 public void T3_Malformed_5_ImpossibleBytes_351 ()
589 byte[] data = { 0xFE };
590 string s = utf8.GetString (data);
591 // exception is "really" expected here
595 [ExpectedException (typeof (DecoderException))]
596 public void T3_Malformed_5_ImpossibleBytes_352 ()
598 byte[] data = { 0xFF };
599 string s = utf8.GetString (data);
600 // exception is "really" expected here
604 [ExpectedException (typeof (DecoderException))]
605 public void T3_Malformed_5_ImpossibleBytes_353 ()
607 byte[] data = { 0xFE, 0xFE, 0xFF, 0xFF };
608 string s = utf8.GetString (data);
609 // exception is "really" expected here
612 // Overlong == dangereous -> "safe" decoder should reject them
615 [ExpectedException (typeof (DecoderException))]
616 public void T4_Overlong_1_ASCII_Slash_411 ()
618 byte[] data = { 0xC0, 0xAF };
619 string s = utf8.GetString (data);
620 // exception is "really" expected here
624 [ExpectedException (typeof (DecoderException))]
625 public void T4_Overlong_1_ASCII_Slash_412 ()
627 byte[] data = { 0xE0, 0x80, 0xAF };
628 string s = utf8.GetString (data);
629 // exception is "really" expected here
633 [ExpectedException (typeof (DecoderException))]
634 public void T4_Overlong_1_ASCII_Slash_413 ()
636 byte[] data = { 0xF0, 0x80, 0x80, 0xAF };
637 string s = utf8.GetString (data);
638 // exception is "really" expected here
642 [ExpectedException (typeof (DecoderException))]
643 public void T4_Overlong_1_ASCII_Slash_414 ()
645 byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0xAF };
646 string s = utf8.GetString (data);
647 // exception is "really" expected here
651 [ExpectedException (typeof (DecoderException))]
652 public void T4_Overlong_1_ASCII_Slash_415 ()
654 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0xAF };
655 string s = utf8.GetString (data);
656 // exception is "really" expected here
660 [ExpectedException (typeof (DecoderException))]
661 public void T4_Overlong_2_MaximumBoundary_421 ()
663 byte[] data = { 0xC1, 0xBF };
664 string s = utf8.GetString (data);
665 // exception is "really" expected here
669 [ExpectedException (typeof (DecoderException))]
670 public void T4_Overlong_2_MaximumBoundary_422 ()
672 byte[] data = { 0xE0, 0x9F, 0xBF };
673 string s = utf8.GetString (data);
674 // exception is "really" expected here
678 [ExpectedException (typeof (DecoderException))]
679 public void T4_Overlong_2_MaximumBoundary_423 ()
681 byte[] data = { 0xF0, 0x8F, 0xBF, 0xBF };
682 string s = utf8.GetString (data);
683 // exception is "really" expected here
687 [ExpectedException (typeof (DecoderException))]
688 public void T4_Overlong_2_MaximumBoundary_424 ()
690 byte[] data = { 0xF8, 0x87, 0xBF, 0xBF, 0xBF };
691 string s = utf8.GetString (data);
692 // exception is "really" expected here
696 [ExpectedException (typeof (DecoderException))]
697 public void T4_Overlong_2_MaximumBoundary_425 ()
699 byte[] data = { 0xFC, 0x83, 0xBF, 0xBF, 0xBF, 0xBF };
700 string s = utf8.GetString (data);
701 // exception is "really" expected here
705 [ExpectedException (typeof (DecoderException))]
706 public void T4_Overlong_3_NUL_431 ()
708 byte[] data = { 0xC0, 0x80 };
709 string s = utf8.GetString (data);
710 // exception is "really" expected here
714 [ExpectedException (typeof (DecoderException))]
715 public void T4_Overlong_3_NUL_432 ()
717 byte[] data = { 0xE0, 0x80, 0x80 };
718 string s = utf8.GetString (data);
719 // exception is "really" expected here
723 [ExpectedException (typeof (DecoderException))]
724 public void T4_Overlong_3_NUL_433 ()
726 byte[] data = { 0xF0, 0x80, 0x80, 0x80 };
727 string s = utf8.GetString (data);
728 // exception is "really" expected here
732 [ExpectedException (typeof (DecoderException))]
733 public void T4_Overlong_3_NUL_434 ()
735 byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0x80 };
736 string s = utf8.GetString (data);
737 // exception is "really" expected here
741 [ExpectedException (typeof (DecoderException))]
742 public void T4_Overlong_3_NUL_435 ()
744 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0x80 };
745 string s = utf8.GetString (data);
746 // exception is "really" expected here
750 [ExpectedException (typeof (DecoderFallbackException))]
751 public void T5_IllegalCodePosition_1_UTF16Surrogates_511 ()
753 byte[] data = { 0xED, 0xA0, 0x80 };
754 string s = utf8.GetString (data);
755 // exception is "really" expected here
756 Assert.AreEqual (55296, s [0], "MS FX 1.1 behaviour");
760 [ExpectedException (typeof (DecoderFallbackException))]
761 public void T5_IllegalCodePosition_1_UTF16Surrogates_512 ()
763 byte[] data = { 0xED, 0xAD, 0xBF };
764 string s = utf8.GetString (data);
765 // exception is "really" expected here
766 Assert.AreEqual (56191, s [0], "MS FX 1.1 behaviour");
770 [ExpectedException (typeof (DecoderFallbackException))]
771 public void T5_IllegalCodePosition_1_UTF16Surrogates_513 ()
773 byte[] data = { 0xED, 0xAE, 0x80 };
774 string s = utf8.GetString (data);
775 // exception is "really" expected here
776 Assert.AreEqual (56192, s [0], "MS FX 1.1 behaviour");
780 [ExpectedException (typeof (DecoderFallbackException))]
781 public void T5_IllegalCodePosition_1_UTF16Surrogates_514 ()
783 byte[] data = { 0xED, 0xAF, 0xBF };
784 string s = utf8.GetString (data);
785 // exception is "really" expected here
786 Assert.AreEqual (56319, s [0], "MS FX 1.1 behaviour");
790 [ExpectedException (typeof (DecoderFallbackException))]
791 public void T5_IllegalCodePosition_1_UTF16Surrogates_515 ()
793 byte[] data = { 0xED, 0xB0, 0x80 };
794 string s = utf8.GetString (data);
795 // exception is "really" expected here
796 Assert.AreEqual (56320, s [0], "MS FX 1.1 behaviour");
800 [ExpectedException (typeof (DecoderFallbackException))]
801 public void T5_IllegalCodePosition_1_UTF16Surrogates_516 ()
803 byte[] data = { 0xED, 0xBE, 0x80 };
804 string s = utf8.GetString (data);
805 // exception is "really" expected here
806 Assert.AreEqual (57216, s [0], "MS FX 1.1 behaviour");
810 [ExpectedException (typeof (DecoderFallbackException))]
811 public void T5_IllegalCodePosition_1_UTF16Surrogates_517 ()
813 byte[] data = { 0xED, 0xBF, 0xBF };
814 string s = utf8.GetString (data);
815 // exception is "really" expected here
816 Assert.AreEqual (57343, s [0], "MS FX 1.1 behaviour");
820 [ExpectedException (typeof (DecoderFallbackException))]
821 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_521 ()
823 byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80 };
824 string s = utf8.GetString (data);
825 // exception is "really" expected here
826 Assert.AreEqual (55296, s [0], "MS FX 1.1 behaviour");
827 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
831 [ExpectedException (typeof (DecoderFallbackException))]
832 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_522 ()
834 byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xBF, 0xBF };
835 string s = utf8.GetString (data);
836 // exception is "really" expected here
837 Assert.AreEqual (55296, s [0], "MS FX 1.1 behaviour");
838 Assert.AreEqual (57343, s [1], "MS FX 1.1 behaviour");
842 [ExpectedException (typeof (DecoderFallbackException))]
843 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_523 ()
845 byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xB0, 0x80 };
846 string s = utf8.GetString (data);
847 // exception is "really" expected here
848 Assert.AreEqual (56191, s [0], "MS FX 1.1 behaviour");
849 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
853 [ExpectedException (typeof (DecoderFallbackException))]
854 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_524 ()
856 byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xBF, 0xBF };
857 string s = utf8.GetString (data);
858 // exception is "really" expected here
859 Assert.AreEqual (56191, s [0], "MS FX 1.1 behaviour");
860 Assert.AreEqual (57343, s [1], "MS FX 1.1 behaviour");
864 [ExpectedException (typeof (DecoderFallbackException))]
865 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_525 ()
867 byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xB0, 0x80 };
868 string s = utf8.GetString (data);
869 // exception is "really" expected here
870 Assert.AreEqual (56192, s [0], "MS FX 1.1 behaviour");
871 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
875 [ExpectedException (typeof (DecoderFallbackException))]
876 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_526 ()
878 byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xBF, 0x8F };
879 string s = utf8.GetString (data);
880 // exception is "really" expected here
881 Assert.AreEqual (56192, s [0], "MS FX 1.1 behaviour");
882 Assert.AreEqual (57295, s [1], "MS FX 1.1 behaviour");
886 [ExpectedException (typeof (DecoderFallbackException))]
887 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_527 ()
889 byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xB0, 0x80 };
890 string s = utf8.GetString (data);
891 // exception is "really" expected here
892 Assert.AreEqual (56319, s [0], "MS FX 1.1 behaviour");
893 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
897 [ExpectedException (typeof (DecoderFallbackException))]
898 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_528 ()
900 byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xBF, 0xBF };
901 string s = utf8.GetString (data);
902 // exception is "really" expected here
903 Assert.AreEqual (56319, s [0], "MS FX 1.1 behaviour");
904 Assert.AreEqual (57343, s [1], "MS FX 1.1 behaviour");
908 // MS Fx 1.1 accept this
909 // [ExpectedException (typeof (DecoderException))]
910 public void T5_IllegalCodePosition_3_Other_531 ()
912 byte[] data = { 0xEF, 0xBF, 0xBE };
913 string s = utf8.GetString (data);
914 // exception is "really" expected here
915 Assert.AreEqual (65534, s [0], "MS FX 1.1 behaviour");
919 // MS Fx 1.1 accept this
920 // [ExpectedException (typeof (DecoderException))]
921 public void T5_IllegalCodePosition_3_Other_532 ()
923 byte[] data = { 0xEF, 0xBF, 0xBF };
924 string s = utf8.GetString (data);
925 // exception is "really" expected here
926 Assert.AreEqual (65535, s [0], "MS FX 1.1 behaviour");
930 // bug #75065 and #73086.
931 public void GetCharsFEFF ()
933 byte [] data = new byte [] {0xEF, 0xBB, 0xBF};
934 Encoding enc = new UTF8Encoding (false, true);
935 string s = enc.GetString (data);
936 Assert.AreEqual (s, "\uFEFF");
938 Encoding utf = Encoding.UTF8;
939 char[] testChars = {'\uFEFF','A'};
941 byte[] bytes = utf.GetBytes(testChars);
942 char[] chars = utf.GetChars(bytes);
943 Assert.AreEqual ('\uFEFF', chars [0], "#1");
944 Assert.AreEqual ('A', chars [1], "#2");
948 public void CloneNotReadOnly ()
950 Encoding e = Encoding.GetEncoding (65001).Clone ()
952 Assert.AreEqual (false, e.IsReadOnly);
953 e.EncoderFallback = new EncoderExceptionFallback ();
957 [ExpectedException (typeof (DecoderFallbackException))]
958 public void Bug77315 ()
960 new UTF8Encoding (false, true).GetString (
961 new byte [] {0xED, 0xA2, 0x8C});
965 public void SufficientByteArray ()
967 Encoder e = Encoding.UTF8.GetEncoder ();
968 byte [] bytes = new byte [0];
970 char [] chars = new char [] {'\uD800'};
971 e.GetBytes (chars, 0, 1, bytes, 0, false);
973 int ret = e.GetBytes (chars, 1, 0, bytes, 0, true);
974 Assert.AreEqual (0, ret, "drop insufficient char in 2.0: char[]");
975 } catch (ArgumentException) {
980 int ret = Encoding.UTF8.GetBytes (s, 0, 1, bytes, 0);
981 Assert.AreEqual (0, ret, "drop insufficient char in 2.0: string");
982 } catch (ArgumentException) {
986 [Test] // bug #565129
987 public void SufficientByteArray2 ()
989 var u = Encoding.UTF8;
990 Assert.AreEqual (3, u.GetByteCount ("\uFFFD"), "#1-1");
991 Assert.AreEqual (3, u.GetByteCount ("\uD800"), "#1-2");
992 Assert.AreEqual (3, u.GetByteCount ("\uDC00"), "#1-3");
993 Assert.AreEqual (4, u.GetByteCount ("\uD800\uDC00"), "#1-4");
994 byte [] bytes = new byte [10];
995 Assert.AreEqual (3, u.GetBytes ("\uDC00", 0, 1, bytes, 0), "#1-5"); // was bogus
997 Assert.AreEqual (3, u.GetBytes ("\uFFFD").Length, "#2-1");
998 Assert.AreEqual (3, u.GetBytes ("\uD800").Length, "#2-2");
999 Assert.AreEqual (3, u.GetBytes ("\uDC00").Length, "#2-3");
1000 Assert.AreEqual (4, u.GetBytes ("\uD800\uDC00").Length, "#2-4");
1002 for (char c = char.MinValue; c < char.MaxValue; c++) {
1004 bIn = u.GetBytes (c.ToString ());
1008 new UTF8Encoding (false, true).GetBytes (new char [] {'\uDF45', '\uD808'}, 0, 2);
1009 Assert.Fail ("EncoderFallbackException is expected");
1010 } catch (EncoderFallbackException) {
1014 [Test] // bug #77550
1015 public void DecoderFallbackSimple ()
1017 UTF8Encoding e = new UTF8Encoding (false, false);
1018 AssertType.AreEqual (1, e.GetDecoder ().GetCharCount (
1019 new byte [] {(byte) 183}, 0, 1),
1021 AssertType.AreEqual (1, e.GetDecoder().GetChars (
1022 new byte [] {(byte) 183}, 0, 1,
1025 AssertType.AreEqual (1, e.GetString (new byte [] {(byte) 183}).Length,
1030 public void FallbackDefaultEncodingUTF8 ()
1032 DecoderReplacementFallbackBuffer b =
1033 Encoding.UTF8.DecoderFallback.CreateFallbackBuffer ()
1034 as DecoderReplacementFallbackBuffer;
1035 AssertType.IsTrue (b.Fallback (new byte [] {}, 0), "#1");
1036 AssertType.IsFalse (b.MovePrevious (), "#2");
1037 AssertType.AreEqual (1, b.Remaining, "#3");
1038 AssertType.AreEqual ('\uFFFD', b.GetNextChar (), "#4");
1042 [Category ("MobileNotWorking")]
1043 public void Bug415628 ()
1045 using (var f = File.Open ("Test/resources/415628.bin", FileMode.Open)) {
1046 BinaryReader br = new BinaryReader (f);
1047 byte [] buf = br.ReadBytes (8000);
1048 Encoding.UTF8.GetString(buf);
1053 [ExpectedException (typeof (ArgumentException))]
1054 public void Bug10788()
1056 byte[] bytes = new byte[4096];
1057 char[] chars = new char[10];
1059 Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 4096, chars, 9, false);
1063 public void Bug10789()
1065 byte[] bytes = new byte[4096];
1066 char[] chars = new char[10];
1069 Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 1, chars, 10, false);
1070 Assert.Fail ("ArgumentException is expected #1");
1071 } catch (ArgumentException) {
1075 Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 1, chars, 11, false);
1076 Assert.Fail ("ArgumentOutOfRangeException is expected #2");
1077 } catch (ArgumentOutOfRangeException) {
1080 int charactersWritten = Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 0, chars, 10, false);
1081 Assert.AreEqual (0, charactersWritten, "#3");
1085 public void EncodingFallback ()
1087 /* Legal UTF-8 Byte Sequences
1092 * E1..EF 80..BF 80..BF
1093 * F0 90..BF 80..BF 80..BF
1094 * F1..F3 80..BF 80..BF 80..BF
1095 * F4 80..8F 80..BF 80..BF
1098 var t = new EncodingTester ("utf-8");
1102 for (byte b = 0x80; b <= 0xC1; b++) {
1103 data = new byte [] { b };
1104 t.TestDecoderFallback (data, "?", new byte [] { b });
1109 for (byte b = 0xC2; b <= 0xDF; b++) {
1110 data = new byte [] { b, 0x61 };
1111 t.TestDecoderFallback (data, "?a", new byte [] { b });
1115 data = new byte [] { 0xE0, 0x99};
1116 t.TestDecoderFallback (data, "?", new byte [] { 0xE0, 0x99});
1119 for (byte b = 0xE1; b <= 0xEF; b++) {
1120 data = new byte [] { b, 0x61 };
1121 t.TestDecoderFallback (data, "?a", new byte [] { b });
1125 data = new byte [] { 0xF0, 0x8F};
1126 t.TestDecoderFallback (data, "?", new byte [] { 0xF0, 0x8F });
1129 for (byte b = 0xF1; b <= 0xF4; b++) {
1130 data = new byte [] { b, 0x61 };
1131 t.TestDecoderFallback (data, "?a", new byte [] { b });
1135 for (byte b = 0xC2; b <= 0xF3; b++) {
1136 data = new byte [] { b, 0xC0 };
1137 t.TestDecoderFallback (data, "??", new byte [] { b }, new byte [] { 0xC0 });
1141 // E0..F3 90..BF 80..BF
1142 for (byte b = 0xE0; b <= 0xF3; b++) {
1143 data = new byte [] { b, 0xB0, 0x61 };
1144 t.TestDecoderFallback (data, "?a", new byte [] { b, 0xB0 });
1145 data = new byte [] { b, 0xB0, 0xC0 };
1146 t.TestDecoderFallback (data, "??", new byte [] { b, 0xB0 }, new byte [] { 0xC0 });
1150 data = new byte [] { 0xF4, 0x8F, 0xC0 };
1151 t.TestDecoderFallback (data, "??", new byte [] { 0xF4, 0x8F }, new byte [] { 0xC0 });
1154 // F0..F3 90..BF 80..BF 80..BF
1155 for (byte b = 0xF0; b <= 0xF3; b++) {
1156 data = new byte [] { b, 0xB0, 0xB0, 0x61 };
1157 t.TestDecoderFallback (data, "?a", new byte [] { b, 0xB0, 0xB0 });
1158 data = new byte [] { b, 0xB0, 0xB0, 0xC0 };
1159 t.TestDecoderFallback (data, "??", new byte [] { b, 0xB0, 0xB0 }, new byte [] { 0xC0 });
1162 // F4 80..8F 80..BF 80..BF
1163 data = new byte [] { 0xF4, 0x8F, 0xB0, 0xC0 };
1164 t.TestDecoderFallback (data, "??", new byte [] { 0xF4, 0x8F, 0xB0 }, new byte [] { 0xC0 });
1168 public void DecoderBug23771 ()
1170 var input = "\u733F"; // 'mono' on Japanese, 3bytes in UTF-8.
1171 var encoded = Encoding.UTF8.GetBytes (input);
1172 var decoder = Encoding.UTF8.GetDecoder ();
1173 var chars = new char [10]; // Just enough space to decode.
1174 var result = new StringBuilder ();
1175 var bytes = new byte [1]; // Simulates chunked input bytes.
1176 // Specify encoded bytes separetely.
1177 foreach (var b in encoded) {
1179 int bytesUsed, charsUsed;
1181 decoder.Convert (bytes, 0, bytes.Length, chars, 0, chars.Length, false, out bytesUsed, out charsUsed, out completed);
1182 result.Append (chars, 0, charsUsed);
1183 // Expected outputs are written in bottom.
1184 //Debug.Print ("bytesUsed:{0}, charsUsed:{1}, completed:{2}, result:'{3}'", bytesUsed, charsUsed, completed, result);
1187 // Expected: NO assertion error.
1188 Assert.AreEqual (input, result.ToString (), "#1");
1191 * Expected Debug outputs are:
1192 * bytesUsed:1, charsUsed:0, completed:True, result:''
1193 * bytesUsed:1, charsUsed:0, completed:True, result:''
1194 * bytesUsed:1, charsUsed:1, completed:True, result:'猿'
1196 * -- Note: '猿' is U+733F (1char in UTF-16)
1198 * Actual Debug output are:
1199 * bytesUsed:3, charsUsed:1, completed:False, result:'�'
1200 * bytesUsed:3, charsUsed:1, completed:False, result:'��'
1201 * bytesUsed:3, charsUsed:1, completed:False, result:'���'
1203 * All output parameters are not match.
1204 * -- Note: '�' is decoder fallback char (U+FFFD)