2 // UTF8EncodingTest.cs - NUnit Test Cases for System.Text.UTF8Encoding
5 // Patrick Kalkman kalkman@cistron.nl
6 // Sebastien Pouliot (spouliot@motus.com)
8 // (C) 2003 Patrick Kalkman
9 // (C) 2004 Novell (http://www.novell.com)
12 using NUnit.Framework;
18 using DecoderException = System.Text.DecoderFallbackException;
20 using DecoderException = System.ArgumentException;
23 using AssertType = NUnit.Framework.Assert;
25 namespace MonoTests.System.Text
28 public class UTF8EncodingTest
30 private UTF8Encoding utf8;
35 utf8 = new UTF8Encoding (true, true);
39 public void IsBrowserDisplay ()
41 Assert.IsTrue (utf8.IsBrowserDisplay);
45 public void IsBrowserSave ()
47 Assert.IsTrue (utf8.IsBrowserSave);
51 public void IsMailNewsDisplay ()
53 Assert.IsTrue (utf8.IsMailNewsDisplay);
57 public void IsMailNewsSave ()
59 Assert.IsTrue (utf8.IsMailNewsSave);
63 public void TestEncodingGetBytes1()
65 UTF8Encoding utf8Enc = new UTF8Encoding ();
66 string UniCode = "\u0041\u2262\u0391\u002E";
68 // "A<NOT IDENTICAL TO><ALPHA>." may be encoded as 41 E2 89 A2 CE 91 2E
70 byte[] utf8Bytes = utf8Enc.GetBytes (UniCode);
72 Assert.AreEqual (0x41, utf8Bytes [0], "UTF #1");
73 Assert.AreEqual (0xE2, utf8Bytes [1], "UTF #2");
74 Assert.AreEqual (0x89, utf8Bytes [2], "UTF #3");
75 Assert.AreEqual (0xA2, utf8Bytes [3], "UTF #4");
76 Assert.AreEqual (0xCE, utf8Bytes [4], "UTF #5");
77 Assert.AreEqual (0x91, utf8Bytes [5], "UTF #6");
78 Assert.AreEqual (0x2E, utf8Bytes [6], "UTF #7");
82 public void TestEncodingGetBytes2()
84 UTF8Encoding utf8Enc = new UTF8Encoding ();
85 string UniCode = "\u0048\u0069\u0020\u004D\u006F\u006D\u0020\u263A\u0021";
87 // "Hi Mom <WHITE SMILING FACE>!" may be encoded as 48 69 20 4D 6F 6D 20 E2 98 BA 21
89 byte[] utf8Bytes = new byte [11];
91 int ByteCnt = utf8Enc.GetBytes (UniCode.ToCharArray(), 0, UniCode.Length, utf8Bytes, 0);
92 Assert.AreEqual (11, ByteCnt, "UTF #1");
93 Assert.AreEqual (0x48, utf8Bytes [0], "UTF #2");
94 Assert.AreEqual (0x69, utf8Bytes [1], "UTF #3");
95 Assert.AreEqual (0x20, utf8Bytes [2], "UTF #4");
96 Assert.AreEqual (0x4D, utf8Bytes [3], "UTF #5");
97 Assert.AreEqual (0x6F, utf8Bytes [4], "UTF #6");
98 Assert.AreEqual (0x6D, utf8Bytes [5], "UTF #7");
99 Assert.AreEqual (0x20, utf8Bytes [6], "UTF #8");
100 Assert.AreEqual (0xE2, utf8Bytes [7], "UTF #9");
101 Assert.AreEqual (0x98, utf8Bytes [8], "UTF #10");
102 Assert.AreEqual (0xBA, utf8Bytes [9], "UTF #11");
103 Assert.AreEqual (0x21, utf8Bytes [10], "UTF #12");
107 public void TestDecodingGetChars1()
109 UTF8Encoding utf8Enc = new UTF8Encoding ();
110 // 41 E2 89 A2 CE 91 2E may be decoded as "A<NOT IDENTICAL TO><ALPHA>."
112 byte[] utf8Bytes = new byte [] {0x41, 0xE2, 0x89, 0xA2, 0xCE, 0x91, 0x2E};
113 char[] UniCodeChars = utf8Enc.GetChars(utf8Bytes);
115 Assert.AreEqual (0x0041, UniCodeChars [0], "UTF #1");
116 Assert.AreEqual (0x2262, UniCodeChars [1], "UTF #2");
117 Assert.AreEqual (0x0391, UniCodeChars [2], "UTF #3");
118 Assert.AreEqual (0x002E, UniCodeChars [3], "UTF #4");
123 [Category ("NotWorking")]
125 public void TestMaxCharCount()
127 UTF8Encoding UTF8enc = new UTF8Encoding ();
129 // hmm, where is this extra 1 coming from?
130 Assert.AreEqual (51, UTF8enc.GetMaxCharCount(50), "UTF #1");
132 Assert.AreEqual (50, UTF8enc.GetMaxCharCount(50), "UTF #1");
138 [Category ("NotWorking")]
140 public void TestMaxByteCount()
142 UTF8Encoding UTF8enc = new UTF8Encoding ();
144 // maybe under .NET 2.0 insufficient surrogate pair is
145 // just not handled, and 3 is Preamble size.
146 Assert.AreEqual (153, UTF8enc.GetMaxByteCount(50), "UTF #1");
148 Assert.AreEqual (200, UTF8enc.GetMaxByteCount(50), "UTF #1");
152 // regression for bug #59648
154 public void TestThrowOnInvalid ()
156 UTF8Encoding u = new UTF8Encoding (true, false);
158 byte[] data = new byte [] { 0xC0, 0xAF };
160 Assert.AreEqual (2, u.GetCharCount (data), "#A0");
161 string s = u.GetString (data);
162 Assert.AreEqual ("\uFFFD\uFFFD", s, "#A1");
164 Assert.AreEqual (0, u.GetCharCount (data), "#A0");
165 string s = u.GetString (data);
166 Assert.AreEqual (String.Empty, s, "#A1");
169 data = new byte [] { 0x30, 0x31, 0xC0, 0xAF, 0x30, 0x32 };
170 s = u.GetString (data);
172 Assert.AreEqual (6, s.Length, "#B1");
173 Assert.AreEqual (0x30, (int) s [0], "#B2");
174 Assert.AreEqual (0x31, (int) s [1], "#B3");
175 Assert.AreEqual (0xFFFD, (int) s [2], "#B4");
176 Assert.AreEqual (0xFFFD, (int) s [3], "#B5");
177 Assert.AreEqual (0x30, (int) s [4], "#B6");
178 Assert.AreEqual (0x32, (int) s [5], "#B7");
180 Assert.AreEqual (4, s.Length, "#B1");
181 Assert.AreEqual (0x30, (int) s [0], "#B2");
182 Assert.AreEqual (0x31, (int) s [1], "#B3");
183 Assert.AreEqual (0x30, (int) s [2], "#B4");
184 Assert.AreEqual (0x32, (int) s [3], "#B5");
188 // UTF8 decoding tests from http://www.cl.cam.ac.uk/~mgk25/
191 public void T1_Correct_GreekWord_kosme ()
193 byte[] data = { 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5 };
194 string s = utf8.GetString (data);
195 // cute but saving source code in unicode can be problematic
196 // so we just ensure we can re-encode this
197 Assert.AreEqual (BitConverter.ToString (data), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted");
201 public void T2_Boundary_1_FirstPossibleSequence_Pass ()
203 byte[] data211 = { 0x00 };
204 string s = utf8.GetString (data211);
205 Assert.AreEqual ("\0", s, "1 byte (U-00000000)");
206 Assert.AreEqual (BitConverter.ToString (data211), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-1");
208 byte[] data212 = { 0xC2, 0x80 };
209 s = utf8.GetString (data212);
210 Assert.AreEqual (128, s [0], "2 bytes (U-00000080)");
211 Assert.AreEqual (BitConverter.ToString (data212), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-2");
213 byte[] data213 = { 0xE0, 0xA0, 0x80 };
214 s = utf8.GetString (data213);
215 Assert.AreEqual (2048, s [0], "3 bytes (U-00000800)");
216 Assert.AreEqual (BitConverter.ToString (data213), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-3");
218 byte[] data214 = { 0xF0, 0x90, 0x80, 0x80 };
219 s = utf8.GetString (data214);
220 Assert.AreEqual (55296, s [0], "4 bytes (U-00010000)-0");
221 Assert.AreEqual (56320, s [1], "4 bytes (U-00010000)-1");
222 Assert.AreEqual (BitConverter.ToString (data214), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-4");
227 [ExpectedException (typeof (DecoderException))]
228 public void T2_Boundary_1_FirstPossibleSequence_Fail_5 ()
230 byte[] data215 = { 0xF8, 0x88, 0x80, 0x80, 0x80 };
231 string s = utf8.GetString (data215);
232 Assert.IsNull (s, "5 bytes (U-00200000)");
233 Assert.AreEqual (BitConverter.ToString (data215), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-5");
238 [ExpectedException (typeof (DecoderException))]
239 public void T2_Boundary_1_FirstPossibleSequence_Fail_6 ()
241 byte[] data216 = { 0xFC, 0x84, 0x80, 0x80, 0x80, 0x80 };
242 string s = utf8.GetString (data216);
243 Assert.IsNull (s, "6 bytes (U-04000000)");
244 Assert.AreEqual (BitConverter.ToString (data216), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-6");
248 public void T2_Boundary_2_LastPossibleSequence_Pass ()
250 byte[] data221 = { 0x7F };
251 string s = utf8.GetString (data221);
252 Assert.AreEqual (127, s [0], "1 byte (U-0000007F)");
253 Assert.AreEqual (BitConverter.ToString (data221), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-1");
255 byte[] data222 = { 0xDF, 0xBF };
256 s = utf8.GetString (data222);
257 Assert.AreEqual (2047, s [0], "2 bytes (U-000007FF)");
258 Assert.AreEqual (BitConverter.ToString (data222), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-2");
260 byte[] data223 = { 0xEF, 0xBF, 0xBF };
261 s = utf8.GetString (data223);
262 Assert.AreEqual (65535, s [0], "3 bytes (U-0000FFFF)");
263 Assert.AreEqual (BitConverter.ToString (data223), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-3");
269 [ExpectedException (typeof (DecoderException))]
270 public void T2_Boundary_2_LastPossibleSequence_Fail_4 ()
272 byte[] data224 = { 0x7F, 0xBF, 0xBF, 0xBF };
273 string s = utf8.GetString (data224);
274 Assert.IsNull (s, "4 bytes (U-001FFFFF)");
275 Assert.AreEqual (BitConverter.ToString (data224), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-4");
280 [ExpectedException (typeof (DecoderException))]
281 public void T2_Boundary_2_LastPossibleSequence_Fail_5 ()
283 byte[] data225 = { 0xFB, 0xBF, 0xBF, 0xBF, 0xBF };
284 string s = utf8.GetString (data225);
285 Assert.IsNull (s, "5 bytes (U-03FFFFFF)");
286 Assert.AreEqual (BitConverter.ToString (data225), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-5");
291 [ExpectedException (typeof (DecoderException))]
292 public void T2_Boundary_2_LastPossibleSequence_Fail_6 ()
294 byte[] data226 = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF, 0xBF };
295 string s = utf8.GetString (data226);
296 Assert.IsNull (s, "6 bytes (U-7FFFFFFF)");
297 Assert.AreEqual (BitConverter.ToString (data226), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-6");
301 public void T2_Boundary_3_Other_Pass ()
303 byte[] data231 = { 0xED, 0x9F, 0xBF };
304 string s = utf8.GetString (data231);
305 Assert.AreEqual (55295, s [0], "U-0000D7FF");
306 Assert.AreEqual (BitConverter.ToString (data231), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-1");
308 byte[] data232 = { 0xEE, 0x80, 0x80 };
309 s = utf8.GetString (data232);
310 Assert.AreEqual (57344, s [0], "U-0000E000");
311 Assert.AreEqual (BitConverter.ToString (data232), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-2");
313 byte[] data233 = { 0xEF, 0xBF, 0xBD };
314 s = utf8.GetString (data233);
315 Assert.AreEqual (65533, s [0], "U-0000FFFD");
316 Assert.AreEqual (BitConverter.ToString (data233), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-3");
318 byte[] data234 = { 0xF4, 0x8F, 0xBF, 0xBF };
319 s = utf8.GetString (data234);
320 Assert.AreEqual (56319, s [0], "U-0010FFFF-0");
321 Assert.AreEqual (57343, s [1], "U-0010FFFF-1");
322 Assert.AreEqual (BitConverter.ToString (data234), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-4");
327 [ExpectedException (typeof (DecoderException))]
328 public void T2_Boundary_3_Other_Fail_5 ()
330 byte[] data235 = { 0xF4, 0x90, 0x80, 0x80 };
331 string s = utf8.GetString (data235);
332 Assert.IsNull (s, "U-00110000");
333 Assert.AreEqual (BitConverter.ToString (data235), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-5");
337 [ExpectedException (typeof (DecoderException))]
338 public void T3_Malformed_1_UnexpectedContinuation_311 ()
340 byte[] data = { 0x80 };
341 string s = utf8.GetString (data);
342 // exception is "really" expected here
346 [ExpectedException (typeof (DecoderException))]
347 public void T3_Malformed_1_UnexpectedContinuation_312 ()
349 byte[] data = { 0xBF };
350 string s = utf8.GetString (data);
351 // exception is "really" expected here
355 [ExpectedException (typeof (DecoderException))]
356 public void T3_Malformed_1_UnexpectedContinuation_313 ()
358 byte[] data = { 0x80, 0xBF };
359 string s = utf8.GetString (data);
360 // exception is "really" expected here
364 [ExpectedException (typeof (DecoderException))]
365 public void T3_Malformed_1_UnexpectedContinuation_314 ()
367 byte[] data = { 0x80, 0xBF, 0x80 };
368 string s = utf8.GetString (data);
369 // exception is "really" expected here
373 [ExpectedException (typeof (DecoderException))]
374 public void T3_Malformed_1_UnexpectedContinuation_315 ()
376 byte[] data = { 0x80, 0xBF, 0x80, 0xBF };
377 string s = utf8.GetString (data);
378 // exception is "really" expected here
382 [ExpectedException (typeof (DecoderException))]
383 public void T3_Malformed_1_UnexpectedContinuation_316 ()
385 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80 };
386 string s = utf8.GetString (data);
387 // exception is "really" expected here
391 [ExpectedException (typeof (DecoderException))]
392 public void T3_Malformed_1_UnexpectedContinuation_317 ()
394 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF };
395 string s = utf8.GetString (data);
396 // exception is "really" expected here
400 [ExpectedException (typeof (DecoderException))]
401 public void T3_Malformed_1_UnexpectedContinuation_318 ()
403 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 };
404 string s = utf8.GetString (data);
405 // exception is "really" expected here
409 [ExpectedException (typeof (DecoderException))]
410 public void T3_Malformed_1_UnexpectedContinuation_319 ()
412 // 64 different continuation characters
414 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
415 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
416 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
417 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF };
418 string s = utf8.GetString (data);
419 // exception is "really" expected here
423 [ExpectedException (typeof (DecoderException))]
424 public void T3_Malformed_2_LonelyStart_321 ()
427 0xC0, 0x20, 0xC1, 0x20, 0xC2, 0x20, 0xC3, 0x20, 0xC4, 0x20, 0xC5, 0x20, 0xC6, 0x20, 0xC7, 0x20,
428 0xC8, 0x20, 0xC9, 0x20, 0xCA, 0x20, 0xCB, 0x20, 0xCC, 0x20, 0xCD, 0x20, 0xCE, 0x20, 0xCF, 0x20,
429 0xD0, 0x20, 0xD1, 0x20, 0xD2, 0x20, 0xD3, 0x20, 0xD4, 0x20, 0xD5, 0x20, 0xD6, 0x20, 0xD7, 0x20,
430 0xD8, 0x20, 0xD9, 0x20, 0xDA, 0x20, 0xDB, 0x20, 0xDC, 0x20, 0xDD, 0x20, 0xDE, 0x20, 0xDF, 0x20 };
431 string s = utf8.GetString (data);
432 // exception is "really" expected here
436 [ExpectedException (typeof (DecoderException))]
437 public void T3_Malformed_2_LonelyStart_322 ()
440 0xE0, 0x20, 0xE1, 0x20, 0xE2, 0x20, 0xE3, 0x20, 0xE4, 0x20, 0xE5, 0x20, 0xE6, 0x20, 0xE7, 0x20,
441 0xE8, 0x20, 0xE9, 0x20, 0xEA, 0x20, 0xEB, 0x20, 0xEC, 0x20, 0xED, 0x20, 0xEE, 0x20, 0xEF, 0x20 };
442 string s = utf8.GetString (data);
443 // exception is "really" expected here
447 [ExpectedException (typeof (DecoderException))]
448 public void T3_Malformed_2_LonelyStart_323 ()
450 byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
451 string s = utf8.GetString (data);
452 // exception is "really" expected here
456 [ExpectedException (typeof (DecoderException))]
457 public void T3_Malformed_2_LonelyStart_324 ()
459 byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
460 string s = utf8.GetString (data);
461 // exception is "really" expected here
465 [ExpectedException (typeof (DecoderException))]
466 public void T3_Malformed_2_LonelyStart_325 ()
468 byte[] data = { 0xFC, 0x20, 0xFD, 0x20 };
469 string s = utf8.GetString (data);
470 // exception is "really" expected here
474 [ExpectedException (typeof (DecoderException))]
475 public void T3_Malformed_3_LastContinuationMissing_331 ()
477 byte[] data = { 0xC0 };
478 string s = utf8.GetString (data);
479 // exception is "really" expected here
483 [ExpectedException (typeof (DecoderException))]
484 public void T3_Malformed_3_LastContinuationMissing_332 ()
486 byte[] data = { 0xE0, 0x80 };
487 string s = utf8.GetString (data);
488 // exception is "really" expected here
492 [ExpectedException (typeof (DecoderException))]
493 public void T3_Malformed_3_LastContinuationMissing_333 ()
495 byte[] data = { 0xF0, 0x80, 0x80 };
496 string s = utf8.GetString (data);
497 // exception is "really" expected here
501 [ExpectedException (typeof (DecoderException))]
502 public void T3_Malformed_3_LastContinuationMissing_334 ()
504 byte[] data = { 0xF8, 0x80, 0x80, 0x80 };
505 string s = utf8.GetString (data);
506 // exception is "really" expected here
510 [ExpectedException (typeof (DecoderException))]
511 public void T3_Malformed_3_LastContinuationMissing_335 ()
513 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80 };
514 string s = utf8.GetString (data);
515 // exception is "really" expected here
519 // MS Fx 1.1 accept this
520 // [ExpectedException (typeof (DecoderException))]
521 public void T3_Malformed_3_LastContinuationMissing_336 ()
523 byte[] data = { 0xDF };
525 string s = utf8.GetString (data);
526 // exception is "really" expected here
527 Assert.AreEqual (String.Empty, s, "MS FX 1.1 behaviour");
529 catch (DecoderException) {
530 // but Mono doesn't - better stick to the standard
535 // MS Fx 1.1 accept this
536 // [ExpectedException (typeof (DecoderException))]
537 public void T3_Malformed_3_LastContinuationMissing_337 ()
539 byte[] data = { 0xEF, 0xBF };
541 string s = utf8.GetString (data);
542 // exception is "really" expected here
543 Assert.AreEqual (String.Empty, s, "MS FX 1.1 behaviour");
545 catch (DecoderException) {
546 // but Mono doesn't - better stick to the standard
551 [ExpectedException (typeof (DecoderException))]
552 public void T3_Malformed_3_LastContinuationMissing_338 ()
554 byte[] data = { 0xF7, 0xBF, 0xBF };
555 string s = utf8.GetString (data);
556 // exception is "really" expected here
560 [ExpectedException (typeof (DecoderException))]
561 public void T3_Malformed_3_LastContinuationMissing_339 ()
563 byte[] data = { 0xF, 0xBF, 0xBF, 0xBF };
564 string s = utf8.GetString (data);
565 // exception is "really" expected here
569 [ExpectedException (typeof (DecoderException))]
570 public void T3_Malformed_3_LastContinuationMissing_3310 ()
572 byte[] data = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
573 string s = utf8.GetString (data);
574 // exception is "really" expected here
578 [ExpectedException (typeof (DecoderException))]
579 public void T3_Malformed_4_ConcatenationImcomplete ()
582 0xC0, 0xE0, 0x80, 0xF0, 0x80, 0x80, 0xF8, 0x80, 0x80, 0x80, 0xFC, 0x80, 0x80, 0x80, 0x80, 0xDF,
583 0xEF, 0xBF, 0xF7, 0xBF, 0xBF, 0xFB, 0xBF, 0xBF, 0xBF, 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
584 string s = utf8.GetString (data);
585 // exception is "really" expected here
589 [ExpectedException (typeof (DecoderException))]
590 public void T3_Malformed_5_ImpossibleBytes_351 ()
592 byte[] data = { 0xFE };
593 string s = utf8.GetString (data);
594 // exception is "really" expected here
598 [ExpectedException (typeof (DecoderException))]
599 public void T3_Malformed_5_ImpossibleBytes_352 ()
601 byte[] data = { 0xFF };
602 string s = utf8.GetString (data);
603 // exception is "really" expected here
607 [ExpectedException (typeof (DecoderException))]
608 public void T3_Malformed_5_ImpossibleBytes_353 ()
610 byte[] data = { 0xFE, 0xFE, 0xFF, 0xFF };
611 string s = utf8.GetString (data);
612 // exception is "really" expected here
615 // Overlong == dangereous -> "safe" decoder should reject them
618 [ExpectedException (typeof (DecoderException))]
619 public void T4_Overlong_1_ASCII_Slash_411 ()
621 byte[] data = { 0xC0, 0xAF };
622 string s = utf8.GetString (data);
623 // exception is "really" expected here
627 [ExpectedException (typeof (DecoderException))]
628 public void T4_Overlong_1_ASCII_Slash_412 ()
630 byte[] data = { 0xE0, 0x80, 0xAF };
631 string s = utf8.GetString (data);
632 // exception is "really" expected here
636 [ExpectedException (typeof (DecoderException))]
637 public void T4_Overlong_1_ASCII_Slash_413 ()
639 byte[] data = { 0xF0, 0x80, 0x80, 0xAF };
640 string s = utf8.GetString (data);
641 // exception is "really" expected here
645 [ExpectedException (typeof (DecoderException))]
646 public void T4_Overlong_1_ASCII_Slash_414 ()
648 byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0xAF };
649 string s = utf8.GetString (data);
650 // exception is "really" expected here
654 [ExpectedException (typeof (DecoderException))]
655 public void T4_Overlong_1_ASCII_Slash_415 ()
657 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0xAF };
658 string s = utf8.GetString (data);
659 // exception is "really" expected here
663 [ExpectedException (typeof (DecoderException))]
664 public void T4_Overlong_2_MaximumBoundary_421 ()
666 byte[] data = { 0xC1, 0xBF };
667 string s = utf8.GetString (data);
668 // exception is "really" expected here
672 [ExpectedException (typeof (DecoderException))]
673 public void T4_Overlong_2_MaximumBoundary_422 ()
675 byte[] data = { 0xE0, 0x9F, 0xBF };
676 string s = utf8.GetString (data);
677 // exception is "really" expected here
681 [ExpectedException (typeof (DecoderException))]
682 public void T4_Overlong_2_MaximumBoundary_423 ()
684 byte[] data = { 0xF0, 0x8F, 0xBF, 0xBF };
685 string s = utf8.GetString (data);
686 // exception is "really" expected here
690 [ExpectedException (typeof (DecoderException))]
691 public void T4_Overlong_2_MaximumBoundary_424 ()
693 byte[] data = { 0xF8, 0x87, 0xBF, 0xBF, 0xBF };
694 string s = utf8.GetString (data);
695 // exception is "really" expected here
699 [ExpectedException (typeof (DecoderException))]
700 public void T4_Overlong_2_MaximumBoundary_425 ()
702 byte[] data = { 0xFC, 0x83, 0xBF, 0xBF, 0xBF, 0xBF };
703 string s = utf8.GetString (data);
704 // exception is "really" expected here
708 [ExpectedException (typeof (DecoderException))]
709 public void T4_Overlong_3_NUL_431 ()
711 byte[] data = { 0xC0, 0x80 };
712 string s = utf8.GetString (data);
713 // exception is "really" expected here
717 [ExpectedException (typeof (DecoderException))]
718 public void T4_Overlong_3_NUL_432 ()
720 byte[] data = { 0xE0, 0x80, 0x80 };
721 string s = utf8.GetString (data);
722 // exception is "really" expected here
726 [ExpectedException (typeof (DecoderException))]
727 public void T4_Overlong_3_NUL_433 ()
729 byte[] data = { 0xF0, 0x80, 0x80, 0x80 };
730 string s = utf8.GetString (data);
731 // exception is "really" expected here
735 [ExpectedException (typeof (DecoderException))]
736 public void T4_Overlong_3_NUL_434 ()
738 byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0x80 };
739 string s = utf8.GetString (data);
740 // exception is "really" expected here
744 [ExpectedException (typeof (DecoderException))]
745 public void T4_Overlong_3_NUL_435 ()
747 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0x80 };
748 string s = utf8.GetString (data);
749 // exception is "really" expected here
754 [ExpectedException (typeof (DecoderFallbackException))]
756 // MS Fx 1.1 accept this
757 [Category ("NotDotNet")]
758 [ExpectedException (typeof (DecoderException))]
760 public void T5_IllegalCodePosition_1_UTF16Surrogates_511 ()
762 byte[] data = { 0xED, 0xA0, 0x80 };
763 string s = utf8.GetString (data);
764 // exception is "really" expected here
765 Assert.AreEqual (55296, s [0], "MS FX 1.1 behaviour");
770 [ExpectedException (typeof (DecoderFallbackException))]
772 // MS Fx 1.1 accept this
773 [Category ("NotDotNet")]
774 [ExpectedException (typeof (DecoderException))]
776 public void T5_IllegalCodePosition_1_UTF16Surrogates_512 ()
778 byte[] data = { 0xED, 0xAD, 0xBF };
779 string s = utf8.GetString (data);
780 // exception is "really" expected here
781 Assert.AreEqual (56191, s [0], "MS FX 1.1 behaviour");
786 [ExpectedException (typeof (DecoderFallbackException))]
788 // MS Fx 1.1 accept this
789 [Category ("NotDotNet")]
790 [ExpectedException (typeof (DecoderException))]
792 public void T5_IllegalCodePosition_1_UTF16Surrogates_513 ()
794 byte[] data = { 0xED, 0xAE, 0x80 };
795 string s = utf8.GetString (data);
796 // exception is "really" expected here
797 Assert.AreEqual (56192, s [0], "MS FX 1.1 behaviour");
802 [ExpectedException (typeof (DecoderFallbackException))]
804 // MS Fx 1.1 accept this
805 [Category ("NotDotNet")]
806 [ExpectedException (typeof (DecoderException))]
808 public void T5_IllegalCodePosition_1_UTF16Surrogates_514 ()
810 byte[] data = { 0xED, 0xAF, 0xBF };
811 string s = utf8.GetString (data);
812 // exception is "really" expected here
813 Assert.AreEqual (56319, s [0], "MS FX 1.1 behaviour");
818 [ExpectedException (typeof (DecoderFallbackException))]
820 // MS Fx 1.1 accept this
821 [Category ("NotDotNet")]
822 [ExpectedException (typeof (DecoderException))]
824 public void T5_IllegalCodePosition_1_UTF16Surrogates_515 ()
826 byte[] data = { 0xED, 0xB0, 0x80 };
827 string s = utf8.GetString (data);
828 // exception is "really" expected here
829 Assert.AreEqual (56320, s [0], "MS FX 1.1 behaviour");
834 [ExpectedException (typeof (DecoderFallbackException))]
836 // MS Fx 1.1 accept this
837 [Category ("NotDotNet")]
838 [ExpectedException (typeof (DecoderException))]
840 public void T5_IllegalCodePosition_1_UTF16Surrogates_516 ()
842 byte[] data = { 0xED, 0xBE, 0x80 };
843 string s = utf8.GetString (data);
844 // exception is "really" expected here
845 Assert.AreEqual (57216, s [0], "MS FX 1.1 behaviour");
850 [ExpectedException (typeof (DecoderFallbackException))]
852 // MS Fx 1.1 accept this
853 [Category ("NotDotNet")]
854 [ExpectedException (typeof (DecoderException))]
856 public void T5_IllegalCodePosition_1_UTF16Surrogates_517 ()
858 byte[] data = { 0xED, 0xBF, 0xBF };
859 string s = utf8.GetString (data);
860 // exception is "really" expected here
861 Assert.AreEqual (57343, s [0], "MS FX 1.1 behaviour");
866 [ExpectedException (typeof (DecoderFallbackException))]
868 // MS Fx 1.1 accept this
869 [Category ("NotDotNet")]
870 [ExpectedException (typeof (DecoderException))]
872 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_521 ()
874 byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80 };
875 string s = utf8.GetString (data);
876 // exception is "really" expected here
877 Assert.AreEqual (55296, s [0], "MS FX 1.1 behaviour");
878 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
883 [ExpectedException (typeof (DecoderFallbackException))]
885 // MS Fx 1.1 accept this
886 [Category ("NotDotNet")]
887 [ExpectedException (typeof (DecoderException))]
889 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_522 ()
891 byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xBF, 0xBF };
892 string s = utf8.GetString (data);
893 // exception is "really" expected here
894 Assert.AreEqual (55296, s [0], "MS FX 1.1 behaviour");
895 Assert.AreEqual (57343, s [1], "MS FX 1.1 behaviour");
900 [ExpectedException (typeof (DecoderFallbackException))]
902 // MS Fx 1.1 accept this
903 [Category ("NotDotNet")]
904 [ExpectedException (typeof (DecoderException))]
906 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_523 ()
908 byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xB0, 0x80 };
909 string s = utf8.GetString (data);
910 // exception is "really" expected here
911 Assert.AreEqual (56191, s [0], "MS FX 1.1 behaviour");
912 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
917 [ExpectedException (typeof (DecoderFallbackException))]
919 // MS Fx 1.1 accept this
920 [Category ("NotDotNet")]
921 [ExpectedException (typeof (DecoderException))]
923 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_524 ()
925 byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xBF, 0xBF };
926 string s = utf8.GetString (data);
927 // exception is "really" expected here
928 Assert.AreEqual (56191, s [0], "MS FX 1.1 behaviour");
929 Assert.AreEqual (57343, s [1], "MS FX 1.1 behaviour");
934 [ExpectedException (typeof (DecoderFallbackException))]
936 // MS Fx 1.1 accept this
937 [Category ("NotDotNet")]
938 [ExpectedException (typeof (DecoderException))]
940 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_525 ()
942 byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xB0, 0x80 };
943 string s = utf8.GetString (data);
944 // exception is "really" expected here
945 Assert.AreEqual (56192, s [0], "MS FX 1.1 behaviour");
946 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
951 [ExpectedException (typeof (DecoderFallbackException))]
953 // MS Fx 1.1 accept this
954 [Category ("NotDotNet")]
955 [ExpectedException (typeof (DecoderException))]
957 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_526 ()
959 byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xBF, 0x8F };
960 string s = utf8.GetString (data);
961 // exception is "really" expected here
962 Assert.AreEqual (56192, s [0], "MS FX 1.1 behaviour");
963 Assert.AreEqual (57295, s [1], "MS FX 1.1 behaviour");
968 [ExpectedException (typeof (DecoderFallbackException))]
970 // MS Fx 1.1 accept this
971 [Category ("NotDotNet")]
972 [ExpectedException (typeof (DecoderException))]
974 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_527 ()
976 byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xB0, 0x80 };
977 string s = utf8.GetString (data);
978 // exception is "really" expected here
979 Assert.AreEqual (56319, s [0], "MS FX 1.1 behaviour");
980 Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
985 [ExpectedException (typeof (DecoderFallbackException))]
987 // MS Fx 1.1 accept this
988 [Category ("NotDotNet")]
989 [ExpectedException (typeof (DecoderException))]
991 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_528 ()
993 byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xBF, 0xBF };
994 string s = utf8.GetString (data);
995 // exception is "really" expected here
996 Assert.AreEqual (56319, s [0], "MS FX 1.1 behaviour");
997 Assert.AreEqual (57343, s [1], "MS FX 1.1 behaviour");
1001 // MS Fx 1.1 accept this
1002 // [ExpectedException (typeof (DecoderException))]
1003 public void T5_IllegalCodePosition_3_Other_531 ()
1005 byte[] data = { 0xEF, 0xBF, 0xBE };
1006 string s = utf8.GetString (data);
1007 // exception is "really" expected here
1008 Assert.AreEqual (65534, s [0], "MS FX 1.1 behaviour");
1012 // MS Fx 1.1 accept this
1013 // [ExpectedException (typeof (DecoderException))]
1014 public void T5_IllegalCodePosition_3_Other_532 ()
1016 byte[] data = { 0xEF, 0xBF, 0xBF };
1017 string s = utf8.GetString (data);
1018 // exception is "really" expected here
1019 Assert.AreEqual (65535, s [0], "MS FX 1.1 behaviour");
1023 // bug #75065 and #73086.
1024 public void GetCharsFEFF ()
1026 byte [] data = new byte [] {0xEF, 0xBB, 0xBF};
1027 Encoding enc = new UTF8Encoding (false, true);
1028 string s = enc.GetString (data);
1029 Assert.AreEqual (s, "\uFEFF");
1031 Encoding utf = Encoding.UTF8;
1032 char[] testChars = {'\uFEFF','A'};
1034 byte[] bytes = utf.GetBytes(testChars);
1035 char[] chars = utf.GetChars(bytes);
1036 Assert.AreEqual ('\uFEFF', chars [0], "#1");
1037 Assert.AreEqual ('A', chars [1], "#2");
1042 public void CloneNotReadOnly ()
1044 Encoding e = Encoding.GetEncoding (65001).Clone ()
1046 Assert.AreEqual (false, e.IsReadOnly);
1047 e.EncoderFallback = new EncoderExceptionFallback ();
1053 [ExpectedException (typeof (DecoderFallbackException))]
1055 [ExpectedException (typeof (ArgumentException))]
1056 [Category ("NotDotNet")] // MS Bug
1058 public void Bug77315 ()
1060 new UTF8Encoding (false, true).GetString (
1061 new byte [] {0xED, 0xA2, 0x8C});
1065 public void SufficientByteArray ()
1067 Encoder e = Encoding.UTF8.GetEncoder ();
1068 byte [] bytes = new byte [0];
1070 char [] chars = new char [] {'\uD800'};
1071 e.GetBytes (chars, 0, 1, bytes, 0, false);
1073 int ret = e.GetBytes (chars, 1, 0, bytes, 0, true);
1075 Assert.AreEqual (0, ret, "drop insufficient char in 2.0: char[]");
1077 Assert.Fail ("ArgumentException is expected: char[]");
1079 } catch (ArgumentException) {
1082 string s = "\uD800";
1084 int ret = Encoding.UTF8.GetBytes (s, 0, 1, bytes, 0);
1086 Assert.AreEqual (0, ret, "drop insufficient char in 2.0: string");
1088 Assert.Fail ("ArgumentException is expected: string");
1090 } catch (ArgumentException) {
1095 [Test] // bug #77550
1096 public void DecoderFallbackSimple ()
1098 UTF8Encoding e = new UTF8Encoding (false, false);
1099 AssertType.AreEqual (1, e.GetDecoder ().GetCharCount (
1100 new byte [] {(byte) 183}, 0, 1),
1102 AssertType.AreEqual (1, e.GetDecoder().GetChars (
1103 new byte [] {(byte) 183}, 0, 1,
1106 AssertType.AreEqual (1, e.GetString (new byte [] {(byte) 183}).Length,
1111 public void FallbackDefaultEncodingUTF8 ()
1113 DecoderReplacementFallbackBuffer b =
1114 Encoding.UTF8.DecoderFallback.CreateFallbackBuffer ()
1115 as DecoderReplacementFallbackBuffer;
1116 AssertType.IsTrue (b.Fallback (new byte [] {}, 0), "#1");
1117 AssertType.IsFalse (b.MovePrevious (), "#2");
1118 AssertType.AreEqual (1, b.Remaining, "#3");
1119 AssertType.AreEqual ('\uFFFD', b.GetNextChar (), "#4");
1123 public void Bug415628 ()
1125 using (var f = File.Open ("Test/resources/415628.bin", FileMode.Open)) {
1126 BinaryReader br = new BinaryReader (f);
1127 byte [] buf = br.ReadBytes (8000);
1128 Encoding.UTF8.GetString(buf);