2 // UTF8EncodingTest.cs - NUnit Test Cases for System.Text.UTF8Encoding
5 // Patrick Kalkman kalkman@cistron.nl
6 // Sebastien Pouliot (spouliot@motus.com)
8 // (C) 2003 Patrick Kalkman
9 // (C) 2004 Novell (http://www.novell.com)
12 using NUnit.Framework;
17 using DecoderException = System.Text.DecoderFallbackException;
19 using AssertType = NUnit.Framework.Assert;
21 namespace MonoTests.System.Text
24 public class UTF8EncodingTest
26 private UTF8Encoding utf8;
31 utf8 = new UTF8Encoding (true, true);
35 public void IsBrowserDisplay ()
37 Assert.IsTrue (utf8.IsBrowserDisplay);
41 public void IsBrowserSave ()
43 Assert.IsTrue (utf8.IsBrowserSave);
47 public void IsMailNewsDisplay ()
49 Assert.IsTrue (utf8.IsMailNewsDisplay);
53 public void IsMailNewsSave ()
55 Assert.IsTrue (utf8.IsMailNewsSave);
59 public void TestCompat ()
61 Assert.IsTrue (new UTF8Encoding ().Equals (new UTF8Encoding ()));
65 public void TestEncodingGetBytes1()
67 UTF8Encoding utf8Enc = new UTF8Encoding ();
68 string UniCode = "\u0041\u2262\u0391\u002E";
70 // "A<NOT IDENTICAL TO><ALPHA>." may be encoded as 41 E2 89 A2 CE 91 2E
72 byte[] utf8Bytes = utf8Enc.GetBytes (UniCode);
74 Assert.AreEqual (0x41, utf8Bytes [0], "UTF #1");
75 Assert.AreEqual (0xE2, utf8Bytes [1], "UTF #2");
76 Assert.AreEqual (0x89, utf8Bytes [2], "UTF #3");
77 Assert.AreEqual (0xA2, utf8Bytes [3], "UTF #4");
78 Assert.AreEqual (0xCE, utf8Bytes [4], "UTF #5");
79 Assert.AreEqual (0x91, utf8Bytes [5], "UTF #6");
80 Assert.AreEqual (0x2E, utf8Bytes [6], "UTF #7");
84 public void TestEncodingGetBytes2()
86 UTF8Encoding utf8Enc = new UTF8Encoding ();
87 string UniCode = "\u0048\u0069\u0020\u004D\u006F\u006D\u0020\u263A\u0021";
89 // "Hi Mom <WHITE SMILING FACE>!" may be encoded as 48 69 20 4D 6F 6D 20 E2 98 BA 21
91 byte[] utf8Bytes = new byte [11];
93 int ByteCnt = utf8Enc.GetBytes (UniCode.ToCharArray(), 0, UniCode.Length, utf8Bytes, 0);
94 Assert.AreEqual (11, ByteCnt, "UTF #1");
95 Assert.AreEqual (0x48, utf8Bytes [0], "UTF #2");
96 Assert.AreEqual (0x69, utf8Bytes [1], "UTF #3");
97 Assert.AreEqual (0x20, utf8Bytes [2], "UTF #4");
98 Assert.AreEqual (0x4D, utf8Bytes [3], "UTF #5");
99 Assert.AreEqual (0x6F, utf8Bytes [4], "UTF #6");
100 Assert.AreEqual (0x6D, utf8Bytes [5], "UTF #7");
101 Assert.AreEqual (0x20, utf8Bytes [6], "UTF #8");
102 Assert.AreEqual (0xE2, utf8Bytes [7], "UTF #9");
103 Assert.AreEqual (0x98, utf8Bytes [8], "UTF #10");
104 Assert.AreEqual (0xBA, utf8Bytes [9], "UTF #11");
105 Assert.AreEqual (0x21, utf8Bytes [10], "UTF #12");
109 public void TestDecodingGetChars1()
111 UTF8Encoding utf8Enc = new UTF8Encoding ();
112 // 41 E2 89 A2 CE 91 2E may be decoded as "A<NOT IDENTICAL TO><ALPHA>."
114 byte[] utf8Bytes = new byte [] {0x41, 0xE2, 0x89, 0xA2, 0xCE, 0x91, 0x2E};
115 char[] UniCodeChars = utf8Enc.GetChars(utf8Bytes);
117 Assert.AreEqual (0x0041, UniCodeChars [0], "UTF #1");
118 Assert.AreEqual (0x2262, UniCodeChars [1], "UTF #2");
119 Assert.AreEqual (0x0391, UniCodeChars [2], "UTF #3");
120 Assert.AreEqual (0x002E, UniCodeChars [3], "UTF #4");
124 public void TestMaxCharCount()
126 UTF8Encoding UTF8enc = new UTF8Encoding ();
127 Encoding UTF8encWithBOM = new UTF8Encoding(true);
128 Assert.AreEqual (51, UTF8enc.GetMaxCharCount(50), "UTF #1");
129 Assert.AreEqual (UTF8enc.GetMaxByteCount(50), UTF8encWithBOM.GetMaxByteCount(50), "UTF #2");
133 public void TestMaxCharCountWithCustomFallback()
135 Encoding encoding = Encoding.GetEncoding("utf-8", new EncoderReplacementFallback("\u2047\u2047"), new DecoderReplacementFallback("\u2047\u2047"));
136 Assert.AreEqual (102, encoding.GetMaxCharCount(50), "UTF #1");
140 public void TestMaxByteCount()
142 UTF8Encoding UTF8enc = new UTF8Encoding ();
143 Encoding UTF8encWithBOM = new UTF8Encoding(true);
145 Assert.AreEqual (153, UTF8enc.GetMaxByteCount(50), "UTF #1");
146 Assert.AreEqual (UTF8enc.GetMaxByteCount(50), UTF8encWithBOM.GetMaxByteCount(50), "UTF #2");
150 public void TestMaxByteCountWithCustomFallback()
152 Encoding encoding = Encoding.GetEncoding("utf-8", new EncoderReplacementFallback("\u2047\u2047"), new DecoderReplacementFallback("?"));
153 Assert.AreEqual (306, encoding.GetMaxByteCount(50), "UTF #1");
156 // regression for bug #59648
158 public void TestThrowOnInvalid ()
160 UTF8Encoding u = new UTF8Encoding (true, false);
162 byte[] data = new byte [] { 0xC0, 0xAF };
163 Assert.AreEqual (2, u.GetCharCount (data), "#A0");
164 string s = u.GetString (data);
165 Assert.AreEqual ("\uFFFD\uFFFD", s, "#A1");
167 data = new byte [] { 0x30, 0x31, 0xC0, 0xAF, 0x30, 0x32 };
168 s = u.GetString (data);
169 Assert.AreEqual (6, s.Length, "#B1");
170 Assert.AreEqual (0x30, (int) s [0], "#B2");
171 Assert.AreEqual (0x31, (int) s [1], "#B3");
172 Assert.AreEqual (0xFFFD, (int) s [2], "#B4");
173 Assert.AreEqual (0xFFFD, (int) s [3], "#B5");
174 Assert.AreEqual (0x30, (int) s [4], "#B6");
175 Assert.AreEqual (0x32, (int) s [5], "#B7");
178 // UTF8 decoding tests from http://www.cl.cam.ac.uk/~mgk25/
181 public void T1_Correct_GreekWord_kosme ()
183 byte[] data = { 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5 };
184 string s = utf8.GetString (data);
185 // cute but saving source code in unicode can be problematic
186 // so we just ensure we can re-encode this
187 Assert.AreEqual (BitConverter.ToString (data), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted");
191 public void T2_Boundary_1_FirstPossibleSequence_Pass ()
193 byte[] data211 = { 0x00 };
194 string s = utf8.GetString (data211);
195 Assert.AreEqual ("\0", s, "1 byte (U-00000000)");
196 Assert.AreEqual (BitConverter.ToString (data211), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-1");
198 byte[] data212 = { 0xC2, 0x80 };
199 s = utf8.GetString (data212);
200 Assert.AreEqual (128, s [0], "2 bytes (U-00000080)");
201 Assert.AreEqual (BitConverter.ToString (data212), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-2");
203 byte[] data213 = { 0xE0, 0xA0, 0x80 };
204 s = utf8.GetString (data213);
205 Assert.AreEqual (2048, s [0], "3 bytes (U-00000800)");
206 Assert.AreEqual (BitConverter.ToString (data213), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-3");
208 byte[] data214 = { 0xF0, 0x90, 0x80, 0x80 };
209 s = utf8.GetString (data214);
210 Assert.AreEqual (55296, s [0], "4 bytes (U-00010000)-0");
211 Assert.AreEqual (56320, s [1], "4 bytes (U-00010000)-1");
212 Assert.AreEqual (BitConverter.ToString (data214), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-4");
217 [ExpectedException (typeof (DecoderException))]
218 public void T2_Boundary_1_FirstPossibleSequence_Fail_5 ()
220 byte[] data215 = { 0xF8, 0x88, 0x80, 0x80, 0x80 };
221 string s = utf8.GetString (data215);
222 Assert.IsNull (s, "5 bytes (U-00200000)");
223 Assert.AreEqual (BitConverter.ToString (data215), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-5");
228 [ExpectedException (typeof (DecoderException))]
229 public void T2_Boundary_1_FirstPossibleSequence_Fail_6 ()
231 byte[] data216 = { 0xFC, 0x84, 0x80, 0x80, 0x80, 0x80 };
232 string s = utf8.GetString (data216);
233 Assert.IsNull (s, "6 bytes (U-04000000)");
234 Assert.AreEqual (BitConverter.ToString (data216), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-6");
238 public void T2_Boundary_2_LastPossibleSequence_Pass ()
240 byte[] data221 = { 0x7F };
241 string s = utf8.GetString (data221);
242 Assert.AreEqual (127, s [0], "1 byte (U-0000007F)");
243 Assert.AreEqual (BitConverter.ToString (data221), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-1");
245 byte[] data222 = { 0xDF, 0xBF };
246 s = utf8.GetString (data222);
247 Assert.AreEqual (2047, s [0], "2 bytes (U-000007FF)");
248 Assert.AreEqual (BitConverter.ToString (data222), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-2");
250 byte[] data223 = { 0xEF, 0xBF, 0xBF };
251 s = utf8.GetString (data223);
252 Assert.AreEqual (65535, s [0], "3 bytes (U-0000FFFF)");
253 Assert.AreEqual (BitConverter.ToString (data223), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-3");
259 [ExpectedException (typeof (DecoderException))]
260 public void T2_Boundary_2_LastPossibleSequence_Fail_4 ()
262 byte[] data224 = { 0x7F, 0xBF, 0xBF, 0xBF };
263 string s = utf8.GetString (data224);
264 Assert.IsNull (s, "4 bytes (U-001FFFFF)");
265 Assert.AreEqual (BitConverter.ToString (data224), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-4");
270 [ExpectedException (typeof (DecoderException))]
271 public void T2_Boundary_2_LastPossibleSequence_Fail_5 ()
273 byte[] data225 = { 0xFB, 0xBF, 0xBF, 0xBF, 0xBF };
274 string s = utf8.GetString (data225);
275 Assert.IsNull (s, "5 bytes (U-03FFFFFF)");
276 Assert.AreEqual (BitConverter.ToString (data225), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-5");
281 [ExpectedException (typeof (DecoderException))]
282 public void T2_Boundary_2_LastPossibleSequence_Fail_6 ()
284 byte[] data226 = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF, 0xBF };
285 string s = utf8.GetString (data226);
286 Assert.IsNull (s, "6 bytes (U-7FFFFFFF)");
287 Assert.AreEqual (BitConverter.ToString (data226), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-6");
291 public void T2_Boundary_3_Other_Pass ()
293 byte[] data231 = { 0xED, 0x9F, 0xBF };
294 string s = utf8.GetString (data231);
295 Assert.AreEqual (55295, s [0], "U-0000D7FF");
296 Assert.AreEqual (BitConverter.ToString (data231), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-1");
298 byte[] data232 = { 0xEE, 0x80, 0x80 };
299 s = utf8.GetString (data232);
300 Assert.AreEqual (57344, s [0], "U-0000E000");
301 Assert.AreEqual (BitConverter.ToString (data232), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-2");
303 byte[] data233 = { 0xEF, 0xBF, 0xBD };
304 s = utf8.GetString (data233);
305 Assert.AreEqual (65533, s [0], "U-0000FFFD");
306 Assert.AreEqual (BitConverter.ToString (data233), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-3");
308 byte[] data234 = { 0xF4, 0x8F, 0xBF, 0xBF };
309 s = utf8.GetString (data234);
310 Assert.AreEqual (56319, s [0], "U-0010FFFF-0");
311 Assert.AreEqual (57343, s [1], "U-0010FFFF-1");
312 Assert.AreEqual (BitConverter.ToString (data234), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-4");
317 [ExpectedException (typeof (DecoderException))]
318 public void T2_Boundary_3_Other_Fail_5 ()
320 byte[] data235 = { 0xF4, 0x90, 0x80, 0x80 };
321 string s = utf8.GetString (data235);
322 Assert.IsNull (s, "U-00110000");
323 Assert.AreEqual (BitConverter.ToString (data235), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-5");
327 [ExpectedException (typeof (DecoderException))]
328 public void T3_Malformed_1_UnexpectedContinuation_311 ()
330 byte[] data = { 0x80 };
331 string s = utf8.GetString (data);
332 // exception is "really" expected here
336 [ExpectedException (typeof (DecoderException))]
337 public void T3_Malformed_1_UnexpectedContinuation_312 ()
339 byte[] data = { 0xBF };
340 string s = utf8.GetString (data);
341 // exception is "really" expected here
345 [ExpectedException (typeof (DecoderException))]
346 public void T3_Malformed_1_UnexpectedContinuation_313 ()
348 byte[] data = { 0x80, 0xBF };
349 string s = utf8.GetString (data);
350 // exception is "really" expected here
354 [ExpectedException (typeof (DecoderException))]
355 public void T3_Malformed_1_UnexpectedContinuation_314 ()
357 byte[] data = { 0x80, 0xBF, 0x80 };
358 string s = utf8.GetString (data);
359 // exception is "really" expected here
363 [ExpectedException (typeof (DecoderException))]
364 public void T3_Malformed_1_UnexpectedContinuation_315 ()
366 byte[] data = { 0x80, 0xBF, 0x80, 0xBF };
367 string s = utf8.GetString (data);
368 // exception is "really" expected here
372 [ExpectedException (typeof (DecoderException))]
373 public void T3_Malformed_1_UnexpectedContinuation_316 ()
375 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80 };
376 string s = utf8.GetString (data);
377 // exception is "really" expected here
381 [ExpectedException (typeof (DecoderException))]
382 public void T3_Malformed_1_UnexpectedContinuation_317 ()
384 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF };
385 string s = utf8.GetString (data);
386 // exception is "really" expected here
390 [ExpectedException (typeof (DecoderException))]
391 public void T3_Malformed_1_UnexpectedContinuation_318 ()
393 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 };
394 string s = utf8.GetString (data);
395 // exception is "really" expected here
399 [ExpectedException (typeof (DecoderException))]
400 public void T3_Malformed_1_UnexpectedContinuation_319 ()
402 // 64 different continuation characters
404 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
405 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
406 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
407 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF };
408 string s = utf8.GetString (data);
409 // exception is "really" expected here
413 [ExpectedException (typeof (DecoderException))]
414 public void T3_Malformed_2_LonelyStart_321 ()
417 0xC0, 0x20, 0xC1, 0x20, 0xC2, 0x20, 0xC3, 0x20, 0xC4, 0x20, 0xC5, 0x20, 0xC6, 0x20, 0xC7, 0x20,
418 0xC8, 0x20, 0xC9, 0x20, 0xCA, 0x20, 0xCB, 0x20, 0xCC, 0x20, 0xCD, 0x20, 0xCE, 0x20, 0xCF, 0x20,
419 0xD0, 0x20, 0xD1, 0x20, 0xD2, 0x20, 0xD3, 0x20, 0xD4, 0x20, 0xD5, 0x20, 0xD6, 0x20, 0xD7, 0x20,
420 0xD8, 0x20, 0xD9, 0x20, 0xDA, 0x20, 0xDB, 0x20, 0xDC, 0x20, 0xDD, 0x20, 0xDE, 0x20, 0xDF, 0x20 };
421 string s = utf8.GetString (data);
422 // exception is "really" expected here
426 [ExpectedException (typeof (DecoderException))]
427 public void T3_Malformed_2_LonelyStart_322 ()
430 0xE0, 0x20, 0xE1, 0x20, 0xE2, 0x20, 0xE3, 0x20, 0xE4, 0x20, 0xE5, 0x20, 0xE6, 0x20, 0xE7, 0x20,
431 0xE8, 0x20, 0xE9, 0x20, 0xEA, 0x20, 0xEB, 0x20, 0xEC, 0x20, 0xED, 0x20, 0xEE, 0x20, 0xEF, 0x20 };
432 string s = utf8.GetString (data);
433 // exception is "really" expected here
437 [ExpectedException (typeof (DecoderException))]
438 public void T3_Malformed_2_LonelyStart_323 ()
440 byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
441 string s = utf8.GetString (data);
442 // exception is "really" expected here
446 [ExpectedException (typeof (DecoderException))]
447 public void T3_Malformed_2_LonelyStart_324 ()
449 byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
450 string s = utf8.GetString (data);
451 // exception is "really" expected here
455 [ExpectedException (typeof (DecoderException))]
456 public void T3_Malformed_2_LonelyStart_325 ()
458 byte[] data = { 0xFC, 0x20, 0xFD, 0x20 };
459 string s = utf8.GetString (data);
460 // exception is "really" expected here
464 [ExpectedException (typeof (DecoderException))]
465 public void T3_Malformed_3_LastContinuationMissing_331 ()
467 byte[] data = { 0xC0 };
468 string s = utf8.GetString (data);
469 // exception is "really" expected here
473 [ExpectedException (typeof (DecoderException))]
474 public void T3_Malformed_3_LastContinuationMissing_332 ()
476 byte[] data = { 0xE0, 0x80 };
477 string s = utf8.GetString (data);
478 // exception is "really" expected here
482 [ExpectedException (typeof (DecoderException))]
483 public void T3_Malformed_3_LastContinuationMissing_333 ()
485 byte[] data = { 0xF0, 0x80, 0x80 };
486 string s = utf8.GetString (data);
487 // exception is "really" expected here
491 [ExpectedException (typeof (DecoderException))]
492 public void T3_Malformed_3_LastContinuationMissing_334 ()
494 byte[] data = { 0xF8, 0x80, 0x80, 0x80 };
495 string s = utf8.GetString (data);
496 // exception is "really" expected here
500 [ExpectedException (typeof (DecoderException))]
501 public void T3_Malformed_3_LastContinuationMissing_335 ()
503 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80 };
504 string s = utf8.GetString (data);
505 // exception is "really" expected here
509 // MS Fx 1.1 accept this
510 // [ExpectedException (typeof (DecoderException))]
511 public void T3_Malformed_3_LastContinuationMissing_336 ()
513 byte[] data = { 0xDF };
515 string s = utf8.GetString (data);
516 // exception is "really" expected here
517 Assert.AreEqual (String.Empty, s, "MS FX 1.1 behaviour");
519 catch (DecoderException) {
520 // but Mono doesn't - better stick to the standard
525 // MS Fx 1.1 accept this
526 // [ExpectedException (typeof (DecoderException))]
527 public void T3_Malformed_3_LastContinuationMissing_337 ()
529 byte[] data = { 0xEF, 0xBF };
531 string s = utf8.GetString (data);
532 // exception is "really" expected here
533 Assert.AreEqual (String.Empty, s, "MS FX 1.1 behaviour");
535 catch (DecoderException) {
536 // but Mono doesn't - better stick to the standard
541 [ExpectedException (typeof (DecoderException))]
542 public void T3_Malformed_3_LastContinuationMissing_338 ()
544 byte[] data = { 0xF7, 0xBF, 0xBF };
545 string s = utf8.GetString (data);
546 // exception is "really" expected here
550 [ExpectedException (typeof (DecoderException))]
551 public void T3_Malformed_3_LastContinuationMissing_339 ()
553 byte[] data = { 0xF, 0xBF, 0xBF, 0xBF };
554 string s = utf8.GetString (data);
555 // exception is "really" expected here
559 [ExpectedException (typeof (DecoderException))]
560 public void T3_Malformed_3_LastContinuationMissing_3310 ()
562 byte[] data = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
563 string s = utf8.GetString (data);
564 // exception is "really" expected here
568 [ExpectedException (typeof (DecoderException))]
569 public void T3_Malformed_4_ConcatenationImcomplete ()
572 0xC0, 0xE0, 0x80, 0xF0, 0x80, 0x80, 0xF8, 0x80, 0x80, 0x80, 0xFC, 0x80, 0x80, 0x80, 0x80, 0xDF,
573 0xEF, 0xBF, 0xF7, 0xBF, 0xBF, 0xFB, 0xBF, 0xBF, 0xBF, 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
574 string s = utf8.GetString (data);
575 // exception is "really" expected here
579 [ExpectedException (typeof (DecoderException))]
580 public void T3_Malformed_5_ImpossibleBytes_351 ()
582 byte[] data = { 0xFE };
583 string s = utf8.GetString (data);
584 // exception is "really" expected here
588 [ExpectedException (typeof (DecoderException))]
589 public void T3_Malformed_5_ImpossibleBytes_352 ()
591 byte[] data = { 0xFF };
592 string s = utf8.GetString (data);
593 // exception is "really" expected here
597 [ExpectedException (typeof (DecoderException))]
598 public void T3_Malformed_5_ImpossibleBytes_353 ()
600 byte[] data = { 0xFE, 0xFE, 0xFF, 0xFF };
601 string s = utf8.GetString (data);
602 // exception is "really" expected here
605 // Overlong == dangereous -> "safe" decoder should reject them
608 [ExpectedException (typeof (DecoderException))]
609 public void T4_Overlong_1_ASCII_Slash_411 ()
611 byte[] data = { 0xC0, 0xAF };
612 string s = utf8.GetString (data);
613 // exception is "really" expected here
617 [ExpectedException (typeof (DecoderException))]
618 public void T4_Overlong_1_ASCII_Slash_412 ()
620 byte[] data = { 0xE0, 0x80, 0xAF };
621 string s = utf8.GetString (data);
622 // exception is "really" expected here
626 [ExpectedException (typeof (DecoderException))]
627 public void T4_Overlong_1_ASCII_Slash_413 ()
629 byte[] data = { 0xF0, 0x80, 0x80, 0xAF };
630 string s = utf8.GetString (data);
631 // exception is "really" expected here
635 [ExpectedException (typeof (DecoderException))]
636 public void T4_Overlong_1_ASCII_Slash_414 ()
638 byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0xAF };
639 string s = utf8.GetString (data);
640 // exception is "really" expected here
644 [ExpectedException (typeof (DecoderException))]
645 public void T4_Overlong_1_ASCII_Slash_415 ()
647 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0xAF };
648 string s = utf8.GetString (data);
649 // exception is "really" expected here
653 [ExpectedException (typeof (DecoderException))]
654 public void T4_Overlong_2_MaximumBoundary_421 ()
656 byte[] data = { 0xC1, 0xBF };
657 string s = utf8.GetString (data);
658 // exception is "really" expected here
662 [ExpectedException (typeof (DecoderException))]
663 public void T4_Overlong_2_MaximumBoundary_422 ()
665 byte[] data = { 0xE0, 0x9F, 0xBF };
666 string s = utf8.GetString (data);
667 // exception is "really" expected here
671 [ExpectedException (typeof (DecoderException))]
672 public void T4_Overlong_2_MaximumBoundary_423 ()
674 byte[] data = { 0xF0, 0x8F, 0xBF, 0xBF };
675 string s = utf8.GetString (data);
676 // exception is "really" expected here
680 [ExpectedException (typeof (DecoderException))]
681 public void T4_Overlong_2_MaximumBoundary_424 ()
683 byte[] data = { 0xF8, 0x87, 0xBF, 0xBF, 0xBF };
684 string s = utf8.GetString (data);
685 // exception is "really" expected here
689 [ExpectedException (typeof (DecoderException))]
690 public void T4_Overlong_2_MaximumBoundary_425 ()
692 byte[] data = { 0xFC, 0x83, 0xBF, 0xBF, 0xBF, 0xBF };
693 string s = utf8.GetString (data);
694 // exception is "really" expected here
698 [ExpectedException (typeof (DecoderException))]
699 public void T4_Overlong_3_NUL_431 ()
701 byte[] data = { 0xC0, 0x80 };
702 string s = utf8.GetString (data);
703 // exception is "really" expected here
707 [ExpectedException (typeof (DecoderException))]
708 public void T4_Overlong_3_NUL_432 ()
710 byte[] data = { 0xE0, 0x80, 0x80 };
711 string s = utf8.GetString (data);
712 // exception is "really" expected here
716 [ExpectedException (typeof (DecoderException))]
717 public void T4_Overlong_3_NUL_433 ()
719 byte[] data = { 0xF0, 0x80, 0x80, 0x80 };
720 string s = utf8.GetString (data);
721 // exception is "really" expected here
725 [ExpectedException (typeof (DecoderException))]
726 public void T4_Overlong_3_NUL_434 ()
728 byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0x80 };
729 string s = utf8.GetString (data);
730 // exception is "really" expected here
734 [ExpectedException (typeof (DecoderException))]
735 public void T4_Overlong_3_NUL_435 ()
737 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0x80 };
738 string s = utf8.GetString (data);
739 // exception is "really" expected here
743 [ExpectedException (typeof (DecoderFallbackException))]
744 public void T5_IllegalCodePosition_1_UTF16Surrogates_511 ()
746 byte[] data = { 0xED, 0xA0, 0x80 };
747 string s = utf8.GetString (data);
748 // exception is "really" expected here
749 Assert.AreEqual (55296, s [0], "MS FX 1.1 behaviour");
753 [ExpectedException (typeof (DecoderFallbackException))]
754 public void T5_IllegalCodePosition_1_UTF16Surrogates_512 ()
756 byte[] data = { 0xED, 0xAD, 0xBF };
757 string s = utf8.GetString (data);
758 // exception is "really" expected here
759 Assert.AreEqual (56191, s [0], "MS FX 1.1 behaviour");
763 [ExpectedException (typeof (DecoderFallbackException))]
764 public void T5_IllegalCodePosition_1_UTF16Surrogates_513 ()
766 byte[] data = { 0xED, 0xAE, 0x80 };
767 string s = utf8.GetString (data);
768 // exception is "really" expected here
769 Assert.AreEqual (56192, s [0], "MS FX 1.1 behaviour");
773 [ExpectedException (typeof (DecoderFallbackException))]
774 public void T5_IllegalCodePosition_1_UTF16Surrogates_514 ()
776 byte[] data = { 0xED, 0xAF, 0xBF };
777 string s = utf8.GetString (data);
778 // exception is "really" expected here
779 Assert.AreEqual (56319, s [0], "MS FX 1.1 behaviour");
783 [ExpectedException (typeof (DecoderFallbackException))]
784 public void T5_IllegalCodePosition_1_UTF16Surrogates_515 ()
786 byte[] data = { 0xED, 0xB0, 0x80 };
787 string s = utf8.GetString (data);
788 // exception is "really" expected here
789 Assert.AreEqual (56320, s [0], "MS FX 1.1 behaviour");
793 [ExpectedException (typeof (DecoderFallbackException))]
794 public void T5_IllegalCodePosition_1_UTF16Surrogates_516 ()
796 byte[] data = { 0xED, 0xBE, 0x80 };
797 string s = utf8.GetString (data);
798 // exception is "really" expected here
799 Assert.AreEqual (57216, s [0], "MS FX 1.1 behaviour");
803 [ExpectedException (typeof (DecoderFallbackException))]
804 public void T5_IllegalCodePosition_1_UTF16Surrogates_517 ()
806 byte[] data = { 0xED, 0xBF, 0xBF };
807 string s = utf8.GetString (data);
808 // exception is "really" expected here
809 Assert.AreEqual (57343, s [0], "MS FX 1.1 behaviour");
813 [ExpectedException (typeof (DecoderFallbackException))]
814 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_521 ()
816 byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80 };
817 string s = utf8.GetString (data);
818 // exception is "really" expected here
819 Assert.AreEqual (55296, s [0], "MS FX 1.1 behaviour");
820 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
824 [ExpectedException (typeof (DecoderFallbackException))]
825 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_522 ()
827 byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xBF, 0xBF };
828 string s = utf8.GetString (data);
829 // exception is "really" expected here
830 Assert.AreEqual (55296, s [0], "MS FX 1.1 behaviour");
831 Assert.AreEqual (57343, s [1], "MS FX 1.1 behaviour");
835 [ExpectedException (typeof (DecoderFallbackException))]
836 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_523 ()
838 byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xB0, 0x80 };
839 string s = utf8.GetString (data);
840 // exception is "really" expected here
841 Assert.AreEqual (56191, s [0], "MS FX 1.1 behaviour");
842 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
846 [ExpectedException (typeof (DecoderFallbackException))]
847 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_524 ()
849 byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xBF, 0xBF };
850 string s = utf8.GetString (data);
851 // exception is "really" expected here
852 Assert.AreEqual (56191, s [0], "MS FX 1.1 behaviour");
853 Assert.AreEqual (57343, s [1], "MS FX 1.1 behaviour");
857 [ExpectedException (typeof (DecoderFallbackException))]
858 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_525 ()
860 byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xB0, 0x80 };
861 string s = utf8.GetString (data);
862 // exception is "really" expected here
863 Assert.AreEqual (56192, s [0], "MS FX 1.1 behaviour");
864 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
868 [ExpectedException (typeof (DecoderFallbackException))]
869 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_526 ()
871 byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xBF, 0x8F };
872 string s = utf8.GetString (data);
873 // exception is "really" expected here
874 Assert.AreEqual (56192, s [0], "MS FX 1.1 behaviour");
875 Assert.AreEqual (57295, s [1], "MS FX 1.1 behaviour");
879 [ExpectedException (typeof (DecoderFallbackException))]
880 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_527 ()
882 byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xB0, 0x80 };
883 string s = utf8.GetString (data);
884 // exception is "really" expected here
885 Assert.AreEqual (56319, s [0], "MS FX 1.1 behaviour");
886 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
890 [ExpectedException (typeof (DecoderFallbackException))]
891 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_528 ()
893 byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xBF, 0xBF };
894 string s = utf8.GetString (data);
895 // exception is "really" expected here
896 Assert.AreEqual (56319, s [0], "MS FX 1.1 behaviour");
897 Assert.AreEqual (57343, s [1], "MS FX 1.1 behaviour");
901 // MS Fx 1.1 accept this
902 // [ExpectedException (typeof (DecoderException))]
903 public void T5_IllegalCodePosition_3_Other_531 ()
905 byte[] data = { 0xEF, 0xBF, 0xBE };
906 string s = utf8.GetString (data);
907 // exception is "really" expected here
908 Assert.AreEqual (65534, s [0], "MS FX 1.1 behaviour");
912 // MS Fx 1.1 accept this
913 // [ExpectedException (typeof (DecoderException))]
914 public void T5_IllegalCodePosition_3_Other_532 ()
916 byte[] data = { 0xEF, 0xBF, 0xBF };
917 string s = utf8.GetString (data);
918 // exception is "really" expected here
919 Assert.AreEqual (65535, s [0], "MS FX 1.1 behaviour");
923 // bug #75065 and #73086.
924 public void GetCharsFEFF ()
926 byte [] data = new byte [] {0xEF, 0xBB, 0xBF};
927 Encoding enc = new UTF8Encoding (false, true);
928 string s = enc.GetString (data);
929 Assert.AreEqual (s, "\uFEFF");
931 Encoding utf = Encoding.UTF8;
932 char[] testChars = {'\uFEFF','A'};
934 byte[] bytes = utf.GetBytes(testChars);
935 char[] chars = utf.GetChars(bytes);
936 Assert.AreEqual ('\uFEFF', chars [0], "#1");
937 Assert.AreEqual ('A', chars [1], "#2");
941 public void CloneNotReadOnly ()
943 Encoding e = Encoding.GetEncoding (65001).Clone ()
945 Assert.AreEqual (false, e.IsReadOnly);
946 e.EncoderFallback = new EncoderExceptionFallback ();
950 [ExpectedException (typeof (DecoderFallbackException))]
951 public void Bug77315 ()
953 new UTF8Encoding (false, true).GetString (
954 new byte [] {0xED, 0xA2, 0x8C});
958 public void SufficientByteArray ()
960 Encoder e = Encoding.UTF8.GetEncoder ();
961 byte [] bytes = new byte [0];
963 char [] chars = new char [] {'\uD800'};
964 e.GetBytes (chars, 0, 1, bytes, 0, false);
966 int ret = e.GetBytes (chars, 1, 0, bytes, 0, true);
967 Assert.AreEqual (0, ret, "drop insufficient char in 2.0: char[]");
968 } catch (ArgumentException) {
973 int ret = Encoding.UTF8.GetBytes (s, 0, 1, bytes, 0);
974 Assert.AreEqual (0, ret, "drop insufficient char in 2.0: string");
975 } catch (ArgumentException) {
979 [Test] // bug #565129
980 public void SufficientByteArray2 ()
982 var u = Encoding.UTF8;
983 Assert.AreEqual (3, u.GetByteCount ("\uFFFD"), "#1-1");
984 Assert.AreEqual (3, u.GetByteCount ("\uD800"), "#1-2");
985 Assert.AreEqual (3, u.GetByteCount ("\uDC00"), "#1-3");
986 Assert.AreEqual (4, u.GetByteCount ("\uD800\uDC00"), "#1-4");
987 byte [] bytes = new byte [10];
988 Assert.AreEqual (3, u.GetBytes ("\uDC00", 0, 1, bytes, 0), "#1-5"); // was bogus
990 Assert.AreEqual (3, u.GetBytes ("\uFFFD").Length, "#2-1");
991 Assert.AreEqual (3, u.GetBytes ("\uD800").Length, "#2-2");
992 Assert.AreEqual (3, u.GetBytes ("\uDC00").Length, "#2-3");
993 Assert.AreEqual (4, u.GetBytes ("\uD800\uDC00").Length, "#2-4");
995 for (char c = char.MinValue; c < char.MaxValue; c++) {
997 bIn = u.GetBytes (c.ToString ());
1001 new UTF8Encoding (false, true).GetBytes (new char [] {'\uDF45', '\uD808'}, 0, 2);
1002 Assert.Fail ("EncoderFallbackException is expected");
1003 } catch (EncoderFallbackException) {
1007 [Test] // bug #77550
1008 public void DecoderFallbackSimple ()
1010 UTF8Encoding e = new UTF8Encoding (false, false);
1011 AssertType.AreEqual (1, e.GetDecoder ().GetCharCount (
1012 new byte [] {(byte) 183}, 0, 1),
1014 AssertType.AreEqual (1, e.GetDecoder().GetChars (
1015 new byte [] {(byte) 183}, 0, 1,
1018 AssertType.AreEqual (1, e.GetString (new byte [] {(byte) 183}).Length,
1023 public void FallbackDefaultEncodingUTF8 ()
1025 DecoderReplacementFallbackBuffer b =
1026 Encoding.UTF8.DecoderFallback.CreateFallbackBuffer ()
1027 as DecoderReplacementFallbackBuffer;
1028 AssertType.IsTrue (b.Fallback (new byte [] {}, 0), "#1");
1029 AssertType.IsFalse (b.MovePrevious (), "#2");
1030 AssertType.AreEqual (1, b.Remaining, "#3");
1031 AssertType.AreEqual ('\uFFFD', b.GetNextChar (), "#4");
1035 [Category ("MobileNotWorking")]
1036 public void Bug415628 ()
1038 using (var f = File.Open ("Test/resources/415628.bin", FileMode.Open)) {
1039 BinaryReader br = new BinaryReader (f);
1040 byte [] buf = br.ReadBytes (8000);
1041 Encoding.UTF8.GetString(buf);
1046 [ExpectedException (typeof (ArgumentException))]
1047 public void Bug10788()
1049 byte[] bytes = new byte[4096];
1050 char[] chars = new char[10];
1052 Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 4096, chars, 9, false);
1056 public void Bug10789()
1058 byte[] bytes = new byte[4096];
1059 char[] chars = new char[10];
1062 Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 1, chars, 10, false);
1063 Assert.Fail ("ArgumentException is expected #1");
1064 } catch (ArgumentException) {
1068 Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 1, chars, 11, false);
1069 Assert.Fail ("ArgumentOutOfRangeException is expected #2");
1070 } catch (ArgumentOutOfRangeException) {
1073 int charactersWritten = Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 0, chars, 10, false);
1074 Assert.AreEqual (0, charactersWritten, "#3");
1078 public void EncodingFallback ()
1080 /* Legal UTF-8 Byte Sequences
1085 * E1..EF 80..BF 80..BF
1086 * F0 90..BF 80..BF 80..BF
1087 * F1..F3 80..BF 80..BF 80..BF
1088 * F4 80..8F 80..BF 80..BF
1091 var t = new EncodingTester ("utf-8");
1095 for (byte b = 0x80; b <= 0xC1; b++) {
1096 data = new byte [] { b };
1097 t.TestDecoderFallback (data, "?", new byte [] { b });
1102 for (byte b = 0xC2; b <= 0xDF; b++) {
1103 data = new byte [] { b, 0x61 };
1104 t.TestDecoderFallback (data, "?a", new byte [] { b });
1108 data = new byte [] { 0xE0, 0x99};
1109 t.TestDecoderFallback (data, "?", new byte [] { 0xE0, 0x99});
1112 for (byte b = 0xE1; b <= 0xEF; b++) {
1113 data = new byte [] { b, 0x61 };
1114 t.TestDecoderFallback (data, "?a", new byte [] { b });
1118 data = new byte [] { 0xF0, 0x8F};
1119 t.TestDecoderFallback (data, "?", new byte [] { 0xF0, 0x8F });
1122 for (byte b = 0xF1; b <= 0xF4; b++) {
1123 data = new byte [] { b, 0x61 };
1124 t.TestDecoderFallback (data, "?a", new byte [] { b });
1128 for (byte b = 0xC2; b <= 0xF3; b++) {
1129 data = new byte [] { b, 0xC0 };
1130 t.TestDecoderFallback (data, "??", new byte [] { b }, new byte [] { 0xC0 });
1134 // E0..F3 90..BF 80..BF
1135 for (byte b = 0xE0; b <= 0xF3; b++) {
1136 data = new byte [] { b, 0xB0, 0x61 };
1137 t.TestDecoderFallback (data, "?a", new byte [] { b, 0xB0 });
1138 data = new byte [] { b, 0xB0, 0xC0 };
1139 t.TestDecoderFallback (data, "??", new byte [] { b, 0xB0 }, new byte [] { 0xC0 });
1143 data = new byte [] { 0xF4, 0x8F, 0xC0 };
1144 t.TestDecoderFallback (data, "??", new byte [] { 0xF4, 0x8F }, new byte [] { 0xC0 });
1147 // F0..F3 90..BF 80..BF 80..BF
1148 for (byte b = 0xF0; b <= 0xF3; b++) {
1149 data = new byte [] { b, 0xB0, 0xB0, 0x61 };
1150 t.TestDecoderFallback (data, "?a", new byte [] { b, 0xB0, 0xB0 });
1151 data = new byte [] { b, 0xB0, 0xB0, 0xC0 };
1152 t.TestDecoderFallback (data, "??", new byte [] { b, 0xB0, 0xB0 }, new byte [] { 0xC0 });
1155 // F4 80..8F 80..BF 80..BF
1156 data = new byte [] { 0xF4, 0x8F, 0xB0, 0xC0 };
1157 t.TestDecoderFallback (data, "??", new byte [] { 0xF4, 0x8F, 0xB0 }, new byte [] { 0xC0 });
1161 public void DecoderBug23771 ()
1163 var input = "\u733F"; // 'mono' on Japanese, 3bytes in UTF-8.
1164 var encoded = Encoding.UTF8.GetBytes (input);
1165 var decoder = Encoding.UTF8.GetDecoder ();
1166 var chars = new char [10]; // Just enough space to decode.
1167 var result = new StringBuilder ();
1168 var bytes = new byte [1]; // Simulates chunked input bytes.
1169 // Specify encoded bytes separetely.
1170 foreach (var b in encoded) {
1172 int bytesUsed, charsUsed;
1174 decoder.Convert (bytes, 0, bytes.Length, chars, 0, chars.Length, false, out bytesUsed, out charsUsed, out completed);
1175 result.Append (chars, 0, charsUsed);
1176 // Expected outputs are written in bottom.
1177 //Debug.Print ("bytesUsed:{0}, charsUsed:{1}, completed:{2}, result:'{3}'", bytesUsed, charsUsed, completed, result);
1180 // Expected: NO assertion error.
1181 Assert.AreEqual (input, result.ToString (), "#1");
1184 * Expected Debug outputs are:
1185 * bytesUsed:1, charsUsed:0, completed:True, result:''
1186 * bytesUsed:1, charsUsed:0, completed:True, result:''
1187 * bytesUsed:1, charsUsed:1, completed:True, result:'猿'
1189 * -- Note: '猿' is U+733F (1char in UTF-16)
1191 * Actual Debug output are:
1192 * bytesUsed:3, charsUsed:1, completed:False, result:'�'
1193 * bytesUsed:3, charsUsed:1, completed:False, result:'��'
1194 * bytesUsed:3, charsUsed:1, completed:False, result:'���'
1196 * All output parameters are not match.
1197 * -- Note: '�' is decoder fallback char (U+FFFD)