2 // UTF8EncodingTest.cs - NUnit Test Cases for System.Text.UTF8Encoding
5 // Patrick Kalkman kalkman@cistron.nl
6 // Sebastien Pouliot (spouliot@motus.com)
8 // (C) 2003 Patrick Kalkman
9 // (C) 2004 Novell (http://www.novell.com)
12 using NUnit.Framework;
17 using DecoderException = System.Text.DecoderFallbackException;
19 using DecoderException = System.ArgumentException;
22 using AssertType = NUnit.Framework.Assert;
24 namespace MonoTests.System.Text
27 public class UTF8EncodingTest : Assertion
29 private UTF8Encoding utf8;
34 utf8 = new UTF8Encoding (true, true);
38 public void IsBrowserDisplay ()
40 Assert (utf8.IsBrowserDisplay);
44 public void IsBrowserSave ()
46 Assert (utf8.IsBrowserSave);
50 public void IsMailNewsDisplay ()
52 Assert (utf8.IsMailNewsDisplay);
56 public void IsMailNewsSave ()
58 Assert (utf8.IsMailNewsSave);
62 public void TestEncodingGetBytes1()
64 UTF8Encoding utf8Enc = new UTF8Encoding ();
65 string UniCode = "\u0041\u2262\u0391\u002E";
67 // "A<NOT IDENTICAL TO><ALPHA>." may be encoded as 41 E2 89 A2 CE 91 2E
69 byte[] utf8Bytes = utf8Enc.GetBytes (UniCode);
71 Assertion.AssertEquals ("UTF #1", 0x41, utf8Bytes [0]);
72 Assertion.AssertEquals ("UTF #2", 0xE2, utf8Bytes [1]);
73 Assertion.AssertEquals ("UTF #3", 0x89, utf8Bytes [2]);
74 Assertion.AssertEquals ("UTF #4", 0xA2, utf8Bytes [3]);
75 Assertion.AssertEquals ("UTF #5", 0xCE, utf8Bytes [4]);
76 Assertion.AssertEquals ("UTF #6", 0x91, utf8Bytes [5]);
77 Assertion.AssertEquals ("UTF #7", 0x2E, utf8Bytes [6]);
81 public void TestEncodingGetBytes2()
83 UTF8Encoding utf8Enc = new UTF8Encoding ();
84 string UniCode = "\u0048\u0069\u0020\u004D\u006F\u006D\u0020\u263A\u0021";
86 // "Hi Mom <WHITE SMILING FACE>!" may be encoded as 48 69 20 4D 6F 6D 20 E2 98 BA 21
88 byte[] utf8Bytes = new byte [11];
90 int ByteCnt = utf8Enc.GetBytes (UniCode.ToCharArray(), 0, UniCode.Length, utf8Bytes, 0);
91 Assertion.AssertEquals ("UTF #1", 11, ByteCnt);
92 Assertion.AssertEquals ("UTF #2", 0x48, utf8Bytes [0]);
93 Assertion.AssertEquals ("UTF #3", 0x69, utf8Bytes [1]);
94 Assertion.AssertEquals ("UTF #4", 0x20, utf8Bytes [2]);
95 Assertion.AssertEquals ("UTF #5", 0x4D, utf8Bytes [3]);
96 Assertion.AssertEquals ("UTF #6", 0x6F, utf8Bytes [4]);
97 Assertion.AssertEquals ("UTF #7", 0x6D, utf8Bytes [5]);
98 Assertion.AssertEquals ("UTF #8", 0x20, utf8Bytes [6]);
99 Assertion.AssertEquals ("UTF #9", 0xE2, utf8Bytes [7]);
100 Assertion.AssertEquals ("UTF #10", 0x98, utf8Bytes [8]);
101 Assertion.AssertEquals ("UTF #11", 0xBA, utf8Bytes [9]);
102 Assertion.AssertEquals ("UTF #12", 0x21, utf8Bytes [10]);
106 public void TestDecodingGetChars1()
108 UTF8Encoding utf8Enc = new UTF8Encoding ();
109 // 41 E2 89 A2 CE 91 2E may be decoded as "A<NOT IDENTICAL TO><ALPHA>."
111 byte[] utf8Bytes = new byte [] {0x41, 0xE2, 0x89, 0xA2, 0xCE, 0x91, 0x2E};
112 char[] UniCodeChars = utf8Enc.GetChars(utf8Bytes);
114 Assertion.AssertEquals ("UTF #1", 0x0041, UniCodeChars [0]);
115 Assertion.AssertEquals ("UTF #2", 0x2262, UniCodeChars [1]);
116 Assertion.AssertEquals ("UTF #3", 0x0391, UniCodeChars [2]);
117 Assertion.AssertEquals ("UTF #4", 0x002E, UniCodeChars [3]);
122 [Category ("NotWorking")]
124 public void TestMaxCharCount()
126 UTF8Encoding UTF8enc = new UTF8Encoding ();
128 // hmm, where is this extra 1 coming from?
129 Assertion.AssertEquals ("UTF #1", 51, UTF8enc.GetMaxCharCount(50));
131 Assertion.AssertEquals ("UTF #1", 50, UTF8enc.GetMaxCharCount(50));
137 [Category ("NotWorking")]
139 public void TestMaxByteCount()
141 UTF8Encoding UTF8enc = new UTF8Encoding ();
143 // maybe under .NET 2.0 insufficient surrogate pair is
144 // just not handled, and 3 is Preamble size.
145 Assertion.AssertEquals ("UTF #1", 153, UTF8enc.GetMaxByteCount(50));
147 Assertion.AssertEquals ("UTF #1", 200, UTF8enc.GetMaxByteCount(50));
151 // regression for bug #59648
153 public void TestThrowOnInvalid ()
155 UTF8Encoding u = new UTF8Encoding (true, false);
157 byte[] data = new byte [] { 0xC0, 0xAF };
159 AssertEquals ("#A0", 2, u.GetCharCount (data));
160 string s = u.GetString (data);
161 AssertEquals ("#A1", "\uFFFD\uFFFD", s);
163 AssertEquals ("#A0", 0, u.GetCharCount (data));
164 string s = u.GetString (data);
165 AssertEquals ("#A1", String.Empty, s);
168 data = new byte [] { 0x30, 0x31, 0xC0, 0xAF, 0x30, 0x32 };
169 s = u.GetString (data);
171 AssertEquals ("#B1", 6, s.Length);
172 AssertEquals ("#B2", 0x30, (int) s [0]);
173 AssertEquals ("#B3", 0x31, (int) s [1]);
174 AssertEquals ("#B4", 0xFFFD, (int) s [2]);
175 AssertEquals ("#B5", 0xFFFD, (int) s [3]);
176 AssertEquals ("#B6", 0x30, (int) s [4]);
177 AssertEquals ("#B7", 0x32, (int) s [5]);
179 AssertEquals ("#B1", 4, s.Length);
180 AssertEquals ("#B2", 0x30, (int) s [0]);
181 AssertEquals ("#B3", 0x31, (int) s [1]);
182 AssertEquals ("#B4", 0x30, (int) s [2]);
183 AssertEquals ("#B5", 0x32, (int) s [3]);
187 // UTF8 decoding tests from http://www.cl.cam.ac.uk/~mgk25/
190 public void T1_Correct_GreekWord_kosme ()
192 byte[] data = { 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5 };
193 string s = utf8.GetString (data);
194 // cute but saving source code in unicode can be problematic
195 // so we just ensure we can re-encode this
196 AssertEquals ("Reconverted", BitConverter.ToString (data), BitConverter.ToString (utf8.GetBytes (s)));
200 public void T2_Boundary_1_FirstPossibleSequence_Pass ()
202 byte[] data211 = { 0x00 };
203 string s = utf8.GetString (data211);
204 AssertEquals ("1 byte (U-00000000)", "\0", s);
205 AssertEquals ("Reconverted-1", BitConverter.ToString (data211), BitConverter.ToString (utf8.GetBytes (s)));
207 byte[] data212 = { 0xC2, 0x80 };
208 s = utf8.GetString (data212);
209 AssertEquals ("2 bytes (U-00000080)", 128, s [0]);
210 AssertEquals ("Reconverted-2", BitConverter.ToString (data212), BitConverter.ToString (utf8.GetBytes (s)));
212 byte[] data213 = { 0xE0, 0xA0, 0x80 };
213 s = utf8.GetString (data213);
214 AssertEquals ("3 bytes (U-00000800)", 2048, s [0]);
215 AssertEquals ("Reconverted-3", BitConverter.ToString (data213), BitConverter.ToString (utf8.GetBytes (s)));
217 byte[] data214 = { 0xF0, 0x90, 0x80, 0x80 };
218 s = utf8.GetString (data214);
219 AssertEquals ("4 bytes (U-00010000)-0", 55296, s [0]);
220 AssertEquals ("4 bytes (U-00010000)-1", 56320, s [1]);
221 AssertEquals ("Reconverted-4", BitConverter.ToString (data214), BitConverter.ToString (utf8.GetBytes (s)));
226 [ExpectedException (typeof (DecoderException))]
227 public void T2_Boundary_1_FirstPossibleSequence_Fail_5 ()
229 byte[] data215 = { 0xF8, 0x88, 0x80, 0x80, 0x80 };
230 string s = utf8.GetString (data215);
231 AssertNull ("5 bytes (U-00200000)", s);
232 AssertEquals ("Reconverted-5", BitConverter.ToString (data215), BitConverter.ToString (utf8.GetBytes (s)));
237 [ExpectedException (typeof (DecoderException))]
238 public void T2_Boundary_1_FirstPossibleSequence_Fail_6 ()
240 byte[] data216 = { 0xFC, 0x84, 0x80, 0x80, 0x80, 0x80 };
241 string s = utf8.GetString (data216);
242 AssertNull ("6 bytes (U-04000000)", s);
243 AssertEquals ("Reconverted-6", BitConverter.ToString (data216), BitConverter.ToString (utf8.GetBytes (s)));
247 public void T2_Boundary_2_LastPossibleSequence_Pass ()
249 byte[] data221 = { 0x7F };
250 string s = utf8.GetString (data221);
251 AssertEquals ("1 byte (U-0000007F)", 127, s [0]);
252 AssertEquals ("Reconverted-1", BitConverter.ToString (data221), BitConverter.ToString (utf8.GetBytes (s)));
254 byte[] data222 = { 0xDF, 0xBF };
255 s = utf8.GetString (data222);
256 AssertEquals ("2 bytes (U-000007FF)", 2047, s [0]);
257 AssertEquals ("Reconverted-2", BitConverter.ToString (data222), BitConverter.ToString (utf8.GetBytes (s)));
259 byte[] data223 = { 0xEF, 0xBF, 0xBF };
260 s = utf8.GetString (data223);
261 AssertEquals ("3 bytes (U-0000FFFF)", 65535, s [0]);
262 AssertEquals ("Reconverted-3", BitConverter.ToString (data223), BitConverter.ToString (utf8.GetBytes (s)));
268 [ExpectedException (typeof (DecoderException))]
269 public void T2_Boundary_2_LastPossibleSequence_Fail_4 ()
271 byte[] data224 = { 0x7F, 0xBF, 0xBF, 0xBF };
272 string s = utf8.GetString (data224);
273 AssertNull ("4 bytes (U-001FFFFF)", s);
274 AssertEquals ("Reconverted-4", BitConverter.ToString (data224), BitConverter.ToString (utf8.GetBytes (s)));
279 [ExpectedException (typeof (DecoderException))]
280 public void T2_Boundary_2_LastPossibleSequence_Fail_5 ()
282 byte[] data225 = { 0xFB, 0xBF, 0xBF, 0xBF, 0xBF };
283 string s = utf8.GetString (data225);
284 AssertNull ("5 bytes (U-03FFFFFF)", s);
285 AssertEquals ("Reconverted-5", BitConverter.ToString (data225), BitConverter.ToString (utf8.GetBytes (s)));
290 [ExpectedException (typeof (DecoderException))]
291 public void T2_Boundary_2_LastPossibleSequence_Fail_6 ()
293 byte[] data226 = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF, 0xBF };
294 string s = utf8.GetString (data226);
295 AssertNull ("6 bytes (U-7FFFFFFF)", s);
296 AssertEquals ("Reconverted-6", BitConverter.ToString (data226), BitConverter.ToString (utf8.GetBytes (s)));
300 public void T2_Boundary_3_Other_Pass ()
302 byte[] data231 = { 0xED, 0x9F, 0xBF };
303 string s = utf8.GetString (data231);
304 AssertEquals ("U-0000D7FF", 55295, s [0]);
305 AssertEquals ("Reconverted-1", BitConverter.ToString (data231), BitConverter.ToString (utf8.GetBytes (s)));
307 byte[] data232 = { 0xEE, 0x80, 0x80 };
308 s = utf8.GetString (data232);
309 AssertEquals ("U-0000E000", 57344, s [0]);
310 AssertEquals ("Reconverted-2", BitConverter.ToString (data232), BitConverter.ToString (utf8.GetBytes (s)));
312 byte[] data233 = { 0xEF, 0xBF, 0xBD };
313 s = utf8.GetString (data233);
314 AssertEquals ("U-0000FFFD", 65533, s [0]);
315 AssertEquals ("Reconverted-3", BitConverter.ToString (data233), BitConverter.ToString (utf8.GetBytes (s)));
317 byte[] data234 = { 0xF4, 0x8F, 0xBF, 0xBF };
318 s = utf8.GetString (data234);
319 AssertEquals ("U-0010FFFF-0", 56319, s [0]);
320 AssertEquals ("U-0010FFFF-1", 57343, s [1]);
321 AssertEquals ("Reconverted-4", BitConverter.ToString (data234), BitConverter.ToString (utf8.GetBytes (s)));
326 [ExpectedException (typeof (DecoderException))]
327 public void T2_Boundary_3_Other_Fail_5 ()
329 byte[] data235 = { 0xF4, 0x90, 0x80, 0x80 };
330 string s = utf8.GetString (data235);
331 AssertNull ("U-00110000", s);
332 AssertEquals ("Reconverted-5", BitConverter.ToString (data235), BitConverter.ToString (utf8.GetBytes (s)));
336 [ExpectedException (typeof (DecoderException))]
337 public void T3_Malformed_1_UnexpectedContinuation_311 ()
339 byte[] data = { 0x80 };
340 string s = utf8.GetString (data);
341 // exception is "really" expected here
345 [ExpectedException (typeof (DecoderException))]
346 public void T3_Malformed_1_UnexpectedContinuation_312 ()
348 byte[] data = { 0xBF };
349 string s = utf8.GetString (data);
350 // exception is "really" expected here
354 [ExpectedException (typeof (DecoderException))]
355 public void T3_Malformed_1_UnexpectedContinuation_313 ()
357 byte[] data = { 0x80, 0xBF };
358 string s = utf8.GetString (data);
359 // exception is "really" expected here
363 [ExpectedException (typeof (DecoderException))]
364 public void T3_Malformed_1_UnexpectedContinuation_314 ()
366 byte[] data = { 0x80, 0xBF, 0x80 };
367 string s = utf8.GetString (data);
368 // exception is "really" expected here
372 [ExpectedException (typeof (DecoderException))]
373 public void T3_Malformed_1_UnexpectedContinuation_315 ()
375 byte[] data = { 0x80, 0xBF, 0x80, 0xBF };
376 string s = utf8.GetString (data);
377 // exception is "really" expected here
381 [ExpectedException (typeof (DecoderException))]
382 public void T3_Malformed_1_UnexpectedContinuation_316 ()
384 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80 };
385 string s = utf8.GetString (data);
386 // exception is "really" expected here
390 [ExpectedException (typeof (DecoderException))]
391 public void T3_Malformed_1_UnexpectedContinuation_317 ()
393 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF };
394 string s = utf8.GetString (data);
395 // exception is "really" expected here
399 [ExpectedException (typeof (DecoderException))]
400 public void T3_Malformed_1_UnexpectedContinuation_318 ()
402 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 };
403 string s = utf8.GetString (data);
404 // exception is "really" expected here
408 [ExpectedException (typeof (DecoderException))]
409 public void T3_Malformed_1_UnexpectedContinuation_319 ()
411 // 64 different continuation characters
413 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
414 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
415 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
416 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF };
417 string s = utf8.GetString (data);
418 // exception is "really" expected here
422 [ExpectedException (typeof (DecoderException))]
423 public void T3_Malformed_2_LonelyStart_321 ()
426 0xC0, 0x20, 0xC1, 0x20, 0xC2, 0x20, 0xC3, 0x20, 0xC4, 0x20, 0xC5, 0x20, 0xC6, 0x20, 0xC7, 0x20,
427 0xC8, 0x20, 0xC9, 0x20, 0xCA, 0x20, 0xCB, 0x20, 0xCC, 0x20, 0xCD, 0x20, 0xCE, 0x20, 0xCF, 0x20,
428 0xD0, 0x20, 0xD1, 0x20, 0xD2, 0x20, 0xD3, 0x20, 0xD4, 0x20, 0xD5, 0x20, 0xD6, 0x20, 0xD7, 0x20,
429 0xD8, 0x20, 0xD9, 0x20, 0xDA, 0x20, 0xDB, 0x20, 0xDC, 0x20, 0xDD, 0x20, 0xDE, 0x20, 0xDF, 0x20 };
430 string s = utf8.GetString (data);
431 // exception is "really" expected here
435 [ExpectedException (typeof (DecoderException))]
436 public void T3_Malformed_2_LonelyStart_322 ()
439 0xE0, 0x20, 0xE1, 0x20, 0xE2, 0x20, 0xE3, 0x20, 0xE4, 0x20, 0xE5, 0x20, 0xE6, 0x20, 0xE7, 0x20,
440 0xE8, 0x20, 0xE9, 0x20, 0xEA, 0x20, 0xEB, 0x20, 0xEC, 0x20, 0xED, 0x20, 0xEE, 0x20, 0xEF, 0x20 };
441 string s = utf8.GetString (data);
442 // exception is "really" expected here
446 [ExpectedException (typeof (DecoderException))]
447 public void T3_Malformed_2_LonelyStart_323 ()
449 byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
450 string s = utf8.GetString (data);
451 // exception is "really" expected here
455 [ExpectedException (typeof (DecoderException))]
456 public void T3_Malformed_2_LonelyStart_324 ()
458 byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
459 string s = utf8.GetString (data);
460 // exception is "really" expected here
464 [ExpectedException (typeof (DecoderException))]
465 public void T3_Malformed_2_LonelyStart_325 ()
467 byte[] data = { 0xFC, 0x20, 0xFD, 0x20 };
468 string s = utf8.GetString (data);
469 // exception is "really" expected here
473 [ExpectedException (typeof (DecoderException))]
474 public void T3_Malformed_3_LastContinuationMissing_331 ()
476 byte[] data = { 0xC0 };
477 string s = utf8.GetString (data);
478 // exception is "really" expected here
482 [ExpectedException (typeof (DecoderException))]
483 public void T3_Malformed_3_LastContinuationMissing_332 ()
485 byte[] data = { 0xE0, 0x80 };
486 string s = utf8.GetString (data);
487 // exception is "really" expected here
491 [ExpectedException (typeof (DecoderException))]
492 public void T3_Malformed_3_LastContinuationMissing_333 ()
494 byte[] data = { 0xF0, 0x80, 0x80 };
495 string s = utf8.GetString (data);
496 // exception is "really" expected here
500 [ExpectedException (typeof (DecoderException))]
501 public void T3_Malformed_3_LastContinuationMissing_334 ()
503 byte[] data = { 0xF8, 0x80, 0x80, 0x80 };
504 string s = utf8.GetString (data);
505 // exception is "really" expected here
509 [ExpectedException (typeof (DecoderException))]
510 public void T3_Malformed_3_LastContinuationMissing_335 ()
512 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80 };
513 string s = utf8.GetString (data);
514 // exception is "really" expected here
518 // MS Fx 1.1 accept this
519 // [ExpectedException (typeof (DecoderException))]
520 public void T3_Malformed_3_LastContinuationMissing_336 ()
522 byte[] data = { 0xDF };
524 string s = utf8.GetString (data);
525 // exception is "really" expected here
526 AssertEquals ("MS FX 1.1 behaviour", String.Empty, s);
528 catch (DecoderException) {
529 // but Mono doesn't - better stick to the standard
534 // MS Fx 1.1 accept this
535 // [ExpectedException (typeof (DecoderException))]
536 public void T3_Malformed_3_LastContinuationMissing_337 ()
538 byte[] data = { 0xEF, 0xBF };
540 string s = utf8.GetString (data);
541 // exception is "really" expected here
542 AssertEquals ("MS FX 1.1 behaviour", String.Empty, s);
544 catch (DecoderException) {
545 // but Mono doesn't - better stick to the standard
550 [ExpectedException (typeof (DecoderException))]
551 public void T3_Malformed_3_LastContinuationMissing_338 ()
553 byte[] data = { 0xF7, 0xBF, 0xBF };
554 string s = utf8.GetString (data);
555 // exception is "really" expected here
559 [ExpectedException (typeof (DecoderException))]
560 public void T3_Malformed_3_LastContinuationMissing_339 ()
562 byte[] data = { 0xF, 0xBF, 0xBF, 0xBF };
563 string s = utf8.GetString (data);
564 // exception is "really" expected here
568 [ExpectedException (typeof (DecoderException))]
569 public void T3_Malformed_3_LastContinuationMissing_3310 ()
571 byte[] data = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
572 string s = utf8.GetString (data);
573 // exception is "really" expected here
577 [ExpectedException (typeof (DecoderException))]
578 public void T3_Malformed_4_ConcatenationImcomplete ()
581 0xC0, 0xE0, 0x80, 0xF0, 0x80, 0x80, 0xF8, 0x80, 0x80, 0x80, 0xFC, 0x80, 0x80, 0x80, 0x80, 0xDF,
582 0xEF, 0xBF, 0xF7, 0xBF, 0xBF, 0xFB, 0xBF, 0xBF, 0xBF, 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
583 string s = utf8.GetString (data);
584 // exception is "really" expected here
588 [ExpectedException (typeof (DecoderException))]
589 public void T3_Malformed_5_ImpossibleBytes_351 ()
591 byte[] data = { 0xFE };
592 string s = utf8.GetString (data);
593 // exception is "really" expected here
597 [ExpectedException (typeof (DecoderException))]
598 public void T3_Malformed_5_ImpossibleBytes_352 ()
600 byte[] data = { 0xFF };
601 string s = utf8.GetString (data);
602 // exception is "really" expected here
606 [ExpectedException (typeof (DecoderException))]
607 public void T3_Malformed_5_ImpossibleBytes_353 ()
609 byte[] data = { 0xFE, 0xFE, 0xFF, 0xFF };
610 string s = utf8.GetString (data);
611 // exception is "really" expected here
614 // Overlong == dangereous -> "safe" decoder should reject them
617 [ExpectedException (typeof (DecoderException))]
618 public void T4_Overlong_1_ASCII_Slash_411 ()
620 byte[] data = { 0xC0, 0xAF };
621 string s = utf8.GetString (data);
622 // exception is "really" expected here
626 [ExpectedException (typeof (DecoderException))]
627 public void T4_Overlong_1_ASCII_Slash_412 ()
629 byte[] data = { 0xE0, 0x80, 0xAF };
630 string s = utf8.GetString (data);
631 // exception is "really" expected here
635 [ExpectedException (typeof (DecoderException))]
636 public void T4_Overlong_1_ASCII_Slash_413 ()
638 byte[] data = { 0xF0, 0x80, 0x80, 0xAF };
639 string s = utf8.GetString (data);
640 // exception is "really" expected here
644 [ExpectedException (typeof (DecoderException))]
645 public void T4_Overlong_1_ASCII_Slash_414 ()
647 byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0xAF };
648 string s = utf8.GetString (data);
649 // exception is "really" expected here
653 [ExpectedException (typeof (DecoderException))]
654 public void T4_Overlong_1_ASCII_Slash_415 ()
656 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0xAF };
657 string s = utf8.GetString (data);
658 // exception is "really" expected here
662 [ExpectedException (typeof (DecoderException))]
663 public void T4_Overlong_2_MaximumBoundary_421 ()
665 byte[] data = { 0xC1, 0xBF };
666 string s = utf8.GetString (data);
667 // exception is "really" expected here
671 [ExpectedException (typeof (DecoderException))]
672 public void T4_Overlong_2_MaximumBoundary_422 ()
674 byte[] data = { 0xE0, 0x9F, 0xBF };
675 string s = utf8.GetString (data);
676 // exception is "really" expected here
680 [ExpectedException (typeof (DecoderException))]
681 public void T4_Overlong_2_MaximumBoundary_423 ()
683 byte[] data = { 0xF0, 0x8F, 0xBF, 0xBF };
684 string s = utf8.GetString (data);
685 // exception is "really" expected here
689 [ExpectedException (typeof (DecoderException))]
690 public void T4_Overlong_2_MaximumBoundary_424 ()
692 byte[] data = { 0xF8, 0x87, 0xBF, 0xBF, 0xBF };
693 string s = utf8.GetString (data);
694 // exception is "really" expected here
698 [ExpectedException (typeof (DecoderException))]
699 public void T4_Overlong_2_MaximumBoundary_425 ()
701 byte[] data = { 0xFC, 0x83, 0xBF, 0xBF, 0xBF, 0xBF };
702 string s = utf8.GetString (data);
703 // exception is "really" expected here
707 [ExpectedException (typeof (DecoderException))]
708 public void T4_Overlong_3_NUL_431 ()
710 byte[] data = { 0xC0, 0x80 };
711 string s = utf8.GetString (data);
712 // exception is "really" expected here
716 [ExpectedException (typeof (DecoderException))]
717 public void T4_Overlong_3_NUL_432 ()
719 byte[] data = { 0xE0, 0x80, 0x80 };
720 string s = utf8.GetString (data);
721 // exception is "really" expected here
725 [ExpectedException (typeof (DecoderException))]
726 public void T4_Overlong_3_NUL_433 ()
728 byte[] data = { 0xF0, 0x80, 0x80, 0x80 };
729 string s = utf8.GetString (data);
730 // exception is "really" expected here
734 [ExpectedException (typeof (DecoderException))]
735 public void T4_Overlong_3_NUL_434 ()
737 byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0x80 };
738 string s = utf8.GetString (data);
739 // exception is "really" expected here
743 [ExpectedException (typeof (DecoderException))]
744 public void T4_Overlong_3_NUL_435 ()
746 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0x80 };
747 string s = utf8.GetString (data);
748 // exception is "really" expected here
753 [ExpectedException (typeof (DecoderFallbackException))]
755 // MS Fx 1.1 accept this
756 [Category ("NotDotNet")]
757 [ExpectedException (typeof (DecoderException))]
759 public void T5_IllegalCodePosition_1_UTF16Surrogates_511 ()
761 byte[] data = { 0xED, 0xA0, 0x80 };
762 string s = utf8.GetString (data);
763 // exception is "really" expected here
764 AssertEquals ("MS FX 1.1 behaviour", 55296, s [0]);
769 [ExpectedException (typeof (DecoderFallbackException))]
771 // MS Fx 1.1 accept this
772 [Category ("NotDotNet")]
773 [ExpectedException (typeof (DecoderException))]
775 public void T5_IllegalCodePosition_1_UTF16Surrogates_512 ()
777 byte[] data = { 0xED, 0xAD, 0xBF };
778 string s = utf8.GetString (data);
779 // exception is "really" expected here
780 AssertEquals ("MS FX 1.1 behaviour", 56191, s [0]);
785 [ExpectedException (typeof (DecoderFallbackException))]
787 // MS Fx 1.1 accept this
788 [Category ("NotDotNet")]
789 [ExpectedException (typeof (DecoderException))]
791 public void T5_IllegalCodePosition_1_UTF16Surrogates_513 ()
793 byte[] data = { 0xED, 0xAE, 0x80 };
794 string s = utf8.GetString (data);
795 // exception is "really" expected here
796 AssertEquals ("MS FX 1.1 behaviour", 56192, s [0]);
801 [ExpectedException (typeof (DecoderFallbackException))]
803 // MS Fx 1.1 accept this
804 [Category ("NotDotNet")]
805 [ExpectedException (typeof (DecoderException))]
807 public void T5_IllegalCodePosition_1_UTF16Surrogates_514 ()
809 byte[] data = { 0xED, 0xAF, 0xBF };
810 string s = utf8.GetString (data);
811 // exception is "really" expected here
812 AssertEquals ("MS FX 1.1 behaviour", 56319, s [0]);
817 [ExpectedException (typeof (DecoderFallbackException))]
819 // MS Fx 1.1 accept this
820 [Category ("NotDotNet")]
821 [ExpectedException (typeof (DecoderException))]
823 public void T5_IllegalCodePosition_1_UTF16Surrogates_515 ()
825 byte[] data = { 0xED, 0xB0, 0x80 };
826 string s = utf8.GetString (data);
827 // exception is "really" expected here
828 AssertEquals ("MS FX 1.1 behaviour", 56320, s [0]);
833 [ExpectedException (typeof (DecoderFallbackException))]
835 // MS Fx 1.1 accept this
836 [Category ("NotDotNet")]
837 [ExpectedException (typeof (DecoderException))]
839 public void T5_IllegalCodePosition_1_UTF16Surrogates_516 ()
841 byte[] data = { 0xED, 0xBE, 0x80 };
842 string s = utf8.GetString (data);
843 // exception is "really" expected here
844 AssertEquals ("MS FX 1.1 behaviour", 57216, s [0]);
849 [ExpectedException (typeof (DecoderFallbackException))]
851 // MS Fx 1.1 accept this
852 [Category ("NotDotNet")]
853 [ExpectedException (typeof (DecoderException))]
855 public void T5_IllegalCodePosition_1_UTF16Surrogates_517 ()
857 byte[] data = { 0xED, 0xBF, 0xBF };
858 string s = utf8.GetString (data);
859 // exception is "really" expected here
860 AssertEquals ("MS FX 1.1 behaviour", 57343, s [0]);
865 [ExpectedException (typeof (DecoderFallbackException))]
867 // MS Fx 1.1 accept this
868 [Category ("NotDotNet")]
869 [ExpectedException (typeof (DecoderException))]
871 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_521 ()
873 byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80 };
874 string s = utf8.GetString (data);
875 // exception is "really" expected here
876 AssertEquals ("MS FX 1.1 behaviour", 55296, s [0]);
877 AssertEquals ("MS FX 1.1 behaviour", 56320, s [1]);
882 [ExpectedException (typeof (DecoderFallbackException))]
884 // MS Fx 1.1 accept this
885 [Category ("NotDotNet")]
886 [ExpectedException (typeof (DecoderException))]
888 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_522 ()
890 byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xBF, 0xBF };
891 string s = utf8.GetString (data);
892 // exception is "really" expected here
893 AssertEquals ("MS FX 1.1 behaviour", 55296, s [0]);
894 AssertEquals ("MS FX 1.1 behaviour", 57343, s [1]);
899 [ExpectedException (typeof (DecoderFallbackException))]
901 // MS Fx 1.1 accept this
902 [Category ("NotDotNet")]
903 [ExpectedException (typeof (DecoderException))]
905 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_523 ()
907 byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xB0, 0x80 };
908 string s = utf8.GetString (data);
909 // exception is "really" expected here
910 AssertEquals ("MS FX 1.1 behaviour", 56191, s [0]);
911 AssertEquals ("MS FX 1.1 behaviour", 56320, s [1]);
916 [ExpectedException (typeof (DecoderFallbackException))]
918 // MS Fx 1.1 accept this
919 [Category ("NotDotNet")]
920 [ExpectedException (typeof (DecoderException))]
922 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_524 ()
924 byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xBF, 0xBF };
925 string s = utf8.GetString (data);
926 // exception is "really" expected here
927 AssertEquals ("MS FX 1.1 behaviour", 56191, s [0]);
928 AssertEquals ("MS FX 1.1 behaviour", 57343, s [1]);
933 [ExpectedException (typeof (DecoderFallbackException))]
935 // MS Fx 1.1 accept this
936 [Category ("NotDotNet")]
937 [ExpectedException (typeof (DecoderException))]
939 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_525 ()
941 byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xB0, 0x80 };
942 string s = utf8.GetString (data);
943 // exception is "really" expected here
944 AssertEquals ("MS FX 1.1 behaviour", 56192, s [0]);
945 AssertEquals ("MS FX 1.1 behaviour", 56320, s [1]);
950 [ExpectedException (typeof (DecoderFallbackException))]
952 // MS Fx 1.1 accept this
953 [Category ("NotDotNet")]
954 [ExpectedException (typeof (DecoderException))]
956 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_526 ()
958 byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xBF, 0x8F };
959 string s = utf8.GetString (data);
960 // exception is "really" expected here
961 AssertEquals ("MS FX 1.1 behaviour", 56192, s [0]);
962 AssertEquals ("MS FX 1.1 behaviour", 57295, s [1]);
967 [ExpectedException (typeof (DecoderFallbackException))]
969 // MS Fx 1.1 accept this
970 [Category ("NotDotNet")]
971 [ExpectedException (typeof (DecoderException))]
973 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_527 ()
975 byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xB0, 0x80 };
976 string s = utf8.GetString (data);
977 // exception is "really" expected here
978 AssertEquals ("MS FX 1.1 behaviour", 56319, s [0]);
979 AssertEquals ("MS FX 1.1 behaviour", 56320, s [1]);
984 [ExpectedException (typeof (DecoderFallbackException))]
986 // MS Fx 1.1 accept this
987 [Category ("NotDotNet")]
988 [ExpectedException (typeof (DecoderException))]
990 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_528 ()
992 byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xBF, 0xBF };
993 string s = utf8.GetString (data);
994 // exception is "really" expected here
995 AssertEquals ("MS FX 1.1 behaviour", 56319, s [0]);
996 AssertEquals ("MS FX 1.1 behaviour", 57343, s [1]);
1000 // MS Fx 1.1 accept this
1001 // [ExpectedException (typeof (DecoderException))]
1002 public void T5_IllegalCodePosition_3_Other_531 ()
1004 byte[] data = { 0xEF, 0xBF, 0xBE };
1005 string s = utf8.GetString (data);
1006 // exception is "really" expected here
1007 AssertEquals ("MS FX 1.1 behaviour", 65534, s [0]);
1011 // MS Fx 1.1 accept this
1012 // [ExpectedException (typeof (DecoderException))]
1013 public void T5_IllegalCodePosition_3_Other_532 ()
1015 byte[] data = { 0xEF, 0xBF, 0xBF };
1016 string s = utf8.GetString (data);
1017 // exception is "really" expected here
1018 AssertEquals ("MS FX 1.1 behaviour", 65535, s [0]);
1022 // bug #75065 and #73086.
1023 public void GetCharsFEFF ()
1025 byte [] data = new byte [] {0xEF, 0xBB, 0xBF};
1026 Encoding enc = new UTF8Encoding (false, true);
1027 string s = enc.GetString (data);
1028 AssertEquals ("\uFEFF", s);
1030 Encoding utf = Encoding.UTF8;
1031 char[] testChars = {'\uFEFF','A'};
1033 byte[] bytes = utf.GetBytes(testChars);
1034 char[] chars = utf.GetChars(bytes);
1035 AssertEquals ("#1", '\uFEFF', chars [0]);
1036 AssertEquals ("#2", 'A', chars [1]);
1041 public void CloneNotReadOnly ()
1043 Encoding e = Encoding.GetEncoding (65001).Clone ()
1045 AssertEquals (false, e.IsReadOnly);
1046 e.EncoderFallback = new EncoderExceptionFallback ();
1052 [ExpectedException (typeof (DecoderFallbackException))]
1054 [ExpectedException (typeof (ArgumentException))]
1055 [Category ("NotDotNet")] // MS Bug
1057 public void Bug77315 ()
1059 new UTF8Encoding (false, true).GetString (
1060 new byte [] {0xED, 0xA2, 0x8C});
1064 public void SufficientByteArray ()
1066 Encoder e = Encoding.UTF8.GetEncoder ();
1067 byte [] bytes = new byte [0];
1069 char [] chars = new char [] {'\uD800'};
1070 e.GetBytes (chars, 0, 1, bytes, 0, false);
1072 int ret = e.GetBytes (chars, 1, 0, bytes, 0, true);
1074 AssertEquals ("drop insufficient char in 2.0: char[]", 0, ret);
1076 Fail ("ArgumentException is expected: char[]");
1078 } catch (ArgumentException) {
1081 string s = "\uD800";
1083 int ret = Encoding.UTF8.GetBytes (s, 0, 1, bytes, 0);
1085 AssertEquals ("drop insufficient char in 2.0: string", 0, ret);
1087 Fail ("ArgumentException is expected: string");
1089 } catch (ArgumentException) {
1094 [Test] // bug #77550
1095 public void DecoderFallbackSimple ()
1097 UTF8Encoding e = new UTF8Encoding (false, false);
1098 AssertType.AreEqual (1, e.GetDecoder ().GetCharCount (
1099 new byte [] {(byte) 183}, 0, 1),
1101 AssertType.AreEqual (1, e.GetDecoder().GetChars (
1102 new byte [] {(byte) 183}, 0, 1,
1105 AssertType.AreEqual (1, e.GetString (new byte [] {(byte) 183}).Length,
1110 public void FallbackDefaultEncodingUTF8 ()
1112 DecoderReplacementFallbackBuffer b =
1113 Encoding.UTF8.DecoderFallback.CreateFallbackBuffer ()
1114 as DecoderReplacementFallbackBuffer;
1115 AssertType.IsTrue (b.Fallback (new byte [] {}, 0), "#1");
1116 AssertType.IsFalse (b.MovePrevious (), "#2");
1117 AssertType.AreEqual (1, b.Remaining, "#3");
1118 AssertType.AreEqual ('\uFFFD', b.GetNextChar (), "#4");