2 // UTF8EncodingTest.cs - NUnit Test Cases for System.Text.UTF8Encoding
5 // Patrick Kalkman kalkman@cistron.nl
6 // Sebastien Pouliot (spouliot@motus.com)
8 // (C) 2003 Patrick Kalkman
9 // (C) 2004 Novell (http://www.novell.com)
12 using NUnit.Framework;
18 using DecoderException = System.Text.DecoderFallbackException;
20 using DecoderException = System.ArgumentException;
23 using AssertType = NUnit.Framework.Assert;
25 namespace MonoTests.System.Text
28 public class UTF8EncodingTest : Assertion
30 private UTF8Encoding utf8;
35 utf8 = new UTF8Encoding (true, true);
39 public void IsBrowserDisplay ()
41 Assert (utf8.IsBrowserDisplay);
45 public void IsBrowserSave ()
47 Assert (utf8.IsBrowserSave);
51 public void IsMailNewsDisplay ()
53 Assert (utf8.IsMailNewsDisplay);
57 public void IsMailNewsSave ()
59 Assert (utf8.IsMailNewsSave);
63 public void TestEncodingGetBytes1()
65 UTF8Encoding utf8Enc = new UTF8Encoding ();
66 string UniCode = "\u0041\u2262\u0391\u002E";
68 // "A<NOT IDENTICAL TO><ALPHA>." may be encoded as 41 E2 89 A2 CE 91 2E
70 byte[] utf8Bytes = utf8Enc.GetBytes (UniCode);
72 Assertion.AssertEquals ("UTF #1", 0x41, utf8Bytes [0]);
73 Assertion.AssertEquals ("UTF #2", 0xE2, utf8Bytes [1]);
74 Assertion.AssertEquals ("UTF #3", 0x89, utf8Bytes [2]);
75 Assertion.AssertEquals ("UTF #4", 0xA2, utf8Bytes [3]);
76 Assertion.AssertEquals ("UTF #5", 0xCE, utf8Bytes [4]);
77 Assertion.AssertEquals ("UTF #6", 0x91, utf8Bytes [5]);
78 Assertion.AssertEquals ("UTF #7", 0x2E, utf8Bytes [6]);
82 public void TestEncodingGetBytes2()
84 UTF8Encoding utf8Enc = new UTF8Encoding ();
85 string UniCode = "\u0048\u0069\u0020\u004D\u006F\u006D\u0020\u263A\u0021";
87 // "Hi Mom <WHITE SMILING FACE>!" may be encoded as 48 69 20 4D 6F 6D 20 E2 98 BA 21
89 byte[] utf8Bytes = new byte [11];
91 int ByteCnt = utf8Enc.GetBytes (UniCode.ToCharArray(), 0, UniCode.Length, utf8Bytes, 0);
92 Assertion.AssertEquals ("UTF #1", 11, ByteCnt);
93 Assertion.AssertEquals ("UTF #2", 0x48, utf8Bytes [0]);
94 Assertion.AssertEquals ("UTF #3", 0x69, utf8Bytes [1]);
95 Assertion.AssertEquals ("UTF #4", 0x20, utf8Bytes [2]);
96 Assertion.AssertEquals ("UTF #5", 0x4D, utf8Bytes [3]);
97 Assertion.AssertEquals ("UTF #6", 0x6F, utf8Bytes [4]);
98 Assertion.AssertEquals ("UTF #7", 0x6D, utf8Bytes [5]);
99 Assertion.AssertEquals ("UTF #8", 0x20, utf8Bytes [6]);
100 Assertion.AssertEquals ("UTF #9", 0xE2, utf8Bytes [7]);
101 Assertion.AssertEquals ("UTF #10", 0x98, utf8Bytes [8]);
102 Assertion.AssertEquals ("UTF #11", 0xBA, utf8Bytes [9]);
103 Assertion.AssertEquals ("UTF #12", 0x21, utf8Bytes [10]);
107 public void TestDecodingGetChars1()
109 UTF8Encoding utf8Enc = new UTF8Encoding ();
110 // 41 E2 89 A2 CE 91 2E may be decoded as "A<NOT IDENTICAL TO><ALPHA>."
112 byte[] utf8Bytes = new byte [] {0x41, 0xE2, 0x89, 0xA2, 0xCE, 0x91, 0x2E};
113 char[] UniCodeChars = utf8Enc.GetChars(utf8Bytes);
115 Assertion.AssertEquals ("UTF #1", 0x0041, UniCodeChars [0]);
116 Assertion.AssertEquals ("UTF #2", 0x2262, UniCodeChars [1]);
117 Assertion.AssertEquals ("UTF #3", 0x0391, UniCodeChars [2]);
118 Assertion.AssertEquals ("UTF #4", 0x002E, UniCodeChars [3]);
123 [Category ("NotWorking")]
125 public void TestMaxCharCount()
127 UTF8Encoding UTF8enc = new UTF8Encoding ();
129 // hmm, where is this extra 1 coming from?
130 Assertion.AssertEquals ("UTF #1", 51, UTF8enc.GetMaxCharCount(50));
132 Assertion.AssertEquals ("UTF #1", 50, UTF8enc.GetMaxCharCount(50));
138 [Category ("NotWorking")]
140 public void TestMaxByteCount()
142 UTF8Encoding UTF8enc = new UTF8Encoding ();
144 // maybe under .NET 2.0 insufficient surrogate pair is
145 // just not handled, and 3 is Preamble size.
146 Assertion.AssertEquals ("UTF #1", 153, UTF8enc.GetMaxByteCount(50));
148 Assertion.AssertEquals ("UTF #1", 200, UTF8enc.GetMaxByteCount(50));
152 // regression for bug #59648
154 public void TestThrowOnInvalid ()
156 UTF8Encoding u = new UTF8Encoding (true, false);
158 byte[] data = new byte [] { 0xC0, 0xAF };
160 AssertEquals ("#A0", 2, u.GetCharCount (data));
161 string s = u.GetString (data);
162 AssertEquals ("#A1", "\uFFFD\uFFFD", s);
164 AssertEquals ("#A0", 0, u.GetCharCount (data));
165 string s = u.GetString (data);
166 AssertEquals ("#A1", String.Empty, s);
169 data = new byte [] { 0x30, 0x31, 0xC0, 0xAF, 0x30, 0x32 };
170 s = u.GetString (data);
172 AssertEquals ("#B1", 6, s.Length);
173 AssertEquals ("#B2", 0x30, (int) s [0]);
174 AssertEquals ("#B3", 0x31, (int) s [1]);
175 AssertEquals ("#B4", 0xFFFD, (int) s [2]);
176 AssertEquals ("#B5", 0xFFFD, (int) s [3]);
177 AssertEquals ("#B6", 0x30, (int) s [4]);
178 AssertEquals ("#B7", 0x32, (int) s [5]);
180 AssertEquals ("#B1", 4, s.Length);
181 AssertEquals ("#B2", 0x30, (int) s [0]);
182 AssertEquals ("#B3", 0x31, (int) s [1]);
183 AssertEquals ("#B4", 0x30, (int) s [2]);
184 AssertEquals ("#B5", 0x32, (int) s [3]);
188 // UTF8 decoding tests from http://www.cl.cam.ac.uk/~mgk25/
191 public void T1_Correct_GreekWord_kosme ()
193 byte[] data = { 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5 };
194 string s = utf8.GetString (data);
195 // cute but saving source code in unicode can be problematic
196 // so we just ensure we can re-encode this
197 AssertEquals ("Reconverted", BitConverter.ToString (data), BitConverter.ToString (utf8.GetBytes (s)));
201 public void T2_Boundary_1_FirstPossibleSequence_Pass ()
203 byte[] data211 = { 0x00 };
204 string s = utf8.GetString (data211);
205 AssertEquals ("1 byte (U-00000000)", "\0", s);
206 AssertEquals ("Reconverted-1", BitConverter.ToString (data211), BitConverter.ToString (utf8.GetBytes (s)));
208 byte[] data212 = { 0xC2, 0x80 };
209 s = utf8.GetString (data212);
210 AssertEquals ("2 bytes (U-00000080)", 128, s [0]);
211 AssertEquals ("Reconverted-2", BitConverter.ToString (data212), BitConverter.ToString (utf8.GetBytes (s)));
213 byte[] data213 = { 0xE0, 0xA0, 0x80 };
214 s = utf8.GetString (data213);
215 AssertEquals ("3 bytes (U-00000800)", 2048, s [0]);
216 AssertEquals ("Reconverted-3", BitConverter.ToString (data213), BitConverter.ToString (utf8.GetBytes (s)));
218 byte[] data214 = { 0xF0, 0x90, 0x80, 0x80 };
219 s = utf8.GetString (data214);
220 AssertEquals ("4 bytes (U-00010000)-0", 55296, s [0]);
221 AssertEquals ("4 bytes (U-00010000)-1", 56320, s [1]);
222 AssertEquals ("Reconverted-4", BitConverter.ToString (data214), BitConverter.ToString (utf8.GetBytes (s)));
227 [ExpectedException (typeof (DecoderException))]
228 public void T2_Boundary_1_FirstPossibleSequence_Fail_5 ()
230 byte[] data215 = { 0xF8, 0x88, 0x80, 0x80, 0x80 };
231 string s = utf8.GetString (data215);
232 AssertNull ("5 bytes (U-00200000)", s);
233 AssertEquals ("Reconverted-5", BitConverter.ToString (data215), BitConverter.ToString (utf8.GetBytes (s)));
238 [ExpectedException (typeof (DecoderException))]
239 public void T2_Boundary_1_FirstPossibleSequence_Fail_6 ()
241 byte[] data216 = { 0xFC, 0x84, 0x80, 0x80, 0x80, 0x80 };
242 string s = utf8.GetString (data216);
243 AssertNull ("6 bytes (U-04000000)", s);
244 AssertEquals ("Reconverted-6", BitConverter.ToString (data216), BitConverter.ToString (utf8.GetBytes (s)));
248 public void T2_Boundary_2_LastPossibleSequence_Pass ()
250 byte[] data221 = { 0x7F };
251 string s = utf8.GetString (data221);
252 AssertEquals ("1 byte (U-0000007F)", 127, s [0]);
253 AssertEquals ("Reconverted-1", BitConverter.ToString (data221), BitConverter.ToString (utf8.GetBytes (s)));
255 byte[] data222 = { 0xDF, 0xBF };
256 s = utf8.GetString (data222);
257 AssertEquals ("2 bytes (U-000007FF)", 2047, s [0]);
258 AssertEquals ("Reconverted-2", BitConverter.ToString (data222), BitConverter.ToString (utf8.GetBytes (s)));
260 byte[] data223 = { 0xEF, 0xBF, 0xBF };
261 s = utf8.GetString (data223);
262 AssertEquals ("3 bytes (U-0000FFFF)", 65535, s [0]);
263 AssertEquals ("Reconverted-3", BitConverter.ToString (data223), BitConverter.ToString (utf8.GetBytes (s)));
269 [ExpectedException (typeof (DecoderException))]
270 public void T2_Boundary_2_LastPossibleSequence_Fail_4 ()
272 byte[] data224 = { 0x7F, 0xBF, 0xBF, 0xBF };
273 string s = utf8.GetString (data224);
274 AssertNull ("4 bytes (U-001FFFFF)", s);
275 AssertEquals ("Reconverted-4", BitConverter.ToString (data224), BitConverter.ToString (utf8.GetBytes (s)));
280 [ExpectedException (typeof (DecoderException))]
281 public void T2_Boundary_2_LastPossibleSequence_Fail_5 ()
283 byte[] data225 = { 0xFB, 0xBF, 0xBF, 0xBF, 0xBF };
284 string s = utf8.GetString (data225);
285 AssertNull ("5 bytes (U-03FFFFFF)", s);
286 AssertEquals ("Reconverted-5", BitConverter.ToString (data225), BitConverter.ToString (utf8.GetBytes (s)));
291 [ExpectedException (typeof (DecoderException))]
292 public void T2_Boundary_2_LastPossibleSequence_Fail_6 ()
294 byte[] data226 = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF, 0xBF };
295 string s = utf8.GetString (data226);
296 AssertNull ("6 bytes (U-7FFFFFFF)", s);
297 AssertEquals ("Reconverted-6", BitConverter.ToString (data226), BitConverter.ToString (utf8.GetBytes (s)));
301 public void T2_Boundary_3_Other_Pass ()
303 byte[] data231 = { 0xED, 0x9F, 0xBF };
304 string s = utf8.GetString (data231);
305 AssertEquals ("U-0000D7FF", 55295, s [0]);
306 AssertEquals ("Reconverted-1", BitConverter.ToString (data231), BitConverter.ToString (utf8.GetBytes (s)));
308 byte[] data232 = { 0xEE, 0x80, 0x80 };
309 s = utf8.GetString (data232);
310 AssertEquals ("U-0000E000", 57344, s [0]);
311 AssertEquals ("Reconverted-2", BitConverter.ToString (data232), BitConverter.ToString (utf8.GetBytes (s)));
313 byte[] data233 = { 0xEF, 0xBF, 0xBD };
314 s = utf8.GetString (data233);
315 AssertEquals ("U-0000FFFD", 65533, s [0]);
316 AssertEquals ("Reconverted-3", BitConverter.ToString (data233), BitConverter.ToString (utf8.GetBytes (s)));
318 byte[] data234 = { 0xF4, 0x8F, 0xBF, 0xBF };
319 s = utf8.GetString (data234);
320 AssertEquals ("U-0010FFFF-0", 56319, s [0]);
321 AssertEquals ("U-0010FFFF-1", 57343, s [1]);
322 AssertEquals ("Reconverted-4", BitConverter.ToString (data234), BitConverter.ToString (utf8.GetBytes (s)));
327 [ExpectedException (typeof (DecoderException))]
328 public void T2_Boundary_3_Other_Fail_5 ()
330 byte[] data235 = { 0xF4, 0x90, 0x80, 0x80 };
331 string s = utf8.GetString (data235);
332 AssertNull ("U-00110000", s);
333 AssertEquals ("Reconverted-5", BitConverter.ToString (data235), BitConverter.ToString (utf8.GetBytes (s)));
337 [ExpectedException (typeof (DecoderException))]
338 public void T3_Malformed_1_UnexpectedContinuation_311 ()
340 byte[] data = { 0x80 };
341 string s = utf8.GetString (data);
342 // exception is "really" expected here
346 [ExpectedException (typeof (DecoderException))]
347 public void T3_Malformed_1_UnexpectedContinuation_312 ()
349 byte[] data = { 0xBF };
350 string s = utf8.GetString (data);
351 // exception is "really" expected here
355 [ExpectedException (typeof (DecoderException))]
356 public void T3_Malformed_1_UnexpectedContinuation_313 ()
358 byte[] data = { 0x80, 0xBF };
359 string s = utf8.GetString (data);
360 // exception is "really" expected here
364 [ExpectedException (typeof (DecoderException))]
365 public void T3_Malformed_1_UnexpectedContinuation_314 ()
367 byte[] data = { 0x80, 0xBF, 0x80 };
368 string s = utf8.GetString (data);
369 // exception is "really" expected here
373 [ExpectedException (typeof (DecoderException))]
374 public void T3_Malformed_1_UnexpectedContinuation_315 ()
376 byte[] data = { 0x80, 0xBF, 0x80, 0xBF };
377 string s = utf8.GetString (data);
378 // exception is "really" expected here
382 [ExpectedException (typeof (DecoderException))]
383 public void T3_Malformed_1_UnexpectedContinuation_316 ()
385 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80 };
386 string s = utf8.GetString (data);
387 // exception is "really" expected here
391 [ExpectedException (typeof (DecoderException))]
392 public void T3_Malformed_1_UnexpectedContinuation_317 ()
394 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF };
395 string s = utf8.GetString (data);
396 // exception is "really" expected here
400 [ExpectedException (typeof (DecoderException))]
401 public void T3_Malformed_1_UnexpectedContinuation_318 ()
403 byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 };
404 string s = utf8.GetString (data);
405 // exception is "really" expected here
409 [ExpectedException (typeof (DecoderException))]
410 public void T3_Malformed_1_UnexpectedContinuation_319 ()
412 // 64 different continuation characters
414 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
415 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
416 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
417 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF };
418 string s = utf8.GetString (data);
419 // exception is "really" expected here
423 [ExpectedException (typeof (DecoderException))]
424 public void T3_Malformed_2_LonelyStart_321 ()
427 0xC0, 0x20, 0xC1, 0x20, 0xC2, 0x20, 0xC3, 0x20, 0xC4, 0x20, 0xC5, 0x20, 0xC6, 0x20, 0xC7, 0x20,
428 0xC8, 0x20, 0xC9, 0x20, 0xCA, 0x20, 0xCB, 0x20, 0xCC, 0x20, 0xCD, 0x20, 0xCE, 0x20, 0xCF, 0x20,
429 0xD0, 0x20, 0xD1, 0x20, 0xD2, 0x20, 0xD3, 0x20, 0xD4, 0x20, 0xD5, 0x20, 0xD6, 0x20, 0xD7, 0x20,
430 0xD8, 0x20, 0xD9, 0x20, 0xDA, 0x20, 0xDB, 0x20, 0xDC, 0x20, 0xDD, 0x20, 0xDE, 0x20, 0xDF, 0x20 };
431 string s = utf8.GetString (data);
432 // exception is "really" expected here
436 [ExpectedException (typeof (DecoderException))]
437 public void T3_Malformed_2_LonelyStart_322 ()
440 0xE0, 0x20, 0xE1, 0x20, 0xE2, 0x20, 0xE3, 0x20, 0xE4, 0x20, 0xE5, 0x20, 0xE6, 0x20, 0xE7, 0x20,
441 0xE8, 0x20, 0xE9, 0x20, 0xEA, 0x20, 0xEB, 0x20, 0xEC, 0x20, 0xED, 0x20, 0xEE, 0x20, 0xEF, 0x20 };
442 string s = utf8.GetString (data);
443 // exception is "really" expected here
447 [ExpectedException (typeof (DecoderException))]
448 public void T3_Malformed_2_LonelyStart_323 ()
450 byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
451 string s = utf8.GetString (data);
452 // exception is "really" expected here
456 [ExpectedException (typeof (DecoderException))]
457 public void T3_Malformed_2_LonelyStart_324 ()
459 byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
460 string s = utf8.GetString (data);
461 // exception is "really" expected here
465 [ExpectedException (typeof (DecoderException))]
466 public void T3_Malformed_2_LonelyStart_325 ()
468 byte[] data = { 0xFC, 0x20, 0xFD, 0x20 };
469 string s = utf8.GetString (data);
470 // exception is "really" expected here
474 [ExpectedException (typeof (DecoderException))]
475 public void T3_Malformed_3_LastContinuationMissing_331 ()
477 byte[] data = { 0xC0 };
478 string s = utf8.GetString (data);
479 // exception is "really" expected here
483 [ExpectedException (typeof (DecoderException))]
484 public void T3_Malformed_3_LastContinuationMissing_332 ()
486 byte[] data = { 0xE0, 0x80 };
487 string s = utf8.GetString (data);
488 // exception is "really" expected here
492 [ExpectedException (typeof (DecoderException))]
493 public void T3_Malformed_3_LastContinuationMissing_333 ()
495 byte[] data = { 0xF0, 0x80, 0x80 };
496 string s = utf8.GetString (data);
497 // exception is "really" expected here
501 [ExpectedException (typeof (DecoderException))]
502 public void T3_Malformed_3_LastContinuationMissing_334 ()
504 byte[] data = { 0xF8, 0x80, 0x80, 0x80 };
505 string s = utf8.GetString (data);
506 // exception is "really" expected here
510 [ExpectedException (typeof (DecoderException))]
511 public void T3_Malformed_3_LastContinuationMissing_335 ()
513 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80 };
514 string s = utf8.GetString (data);
515 // exception is "really" expected here
519 // MS Fx 1.1 accept this
520 // [ExpectedException (typeof (DecoderException))]
521 public void T3_Malformed_3_LastContinuationMissing_336 ()
523 byte[] data = { 0xDF };
525 string s = utf8.GetString (data);
526 // exception is "really" expected here
527 AssertEquals ("MS FX 1.1 behaviour", String.Empty, s);
529 catch (DecoderException) {
530 // but Mono doesn't - better stick to the standard
535 // MS Fx 1.1 accept this
536 // [ExpectedException (typeof (DecoderException))]
537 public void T3_Malformed_3_LastContinuationMissing_337 ()
539 byte[] data = { 0xEF, 0xBF };
541 string s = utf8.GetString (data);
542 // exception is "really" expected here
543 AssertEquals ("MS FX 1.1 behaviour", String.Empty, s);
545 catch (DecoderException) {
546 // but Mono doesn't - better stick to the standard
551 [ExpectedException (typeof (DecoderException))]
552 public void T3_Malformed_3_LastContinuationMissing_338 ()
554 byte[] data = { 0xF7, 0xBF, 0xBF };
555 string s = utf8.GetString (data);
556 // exception is "really" expected here
560 [ExpectedException (typeof (DecoderException))]
561 public void T3_Malformed_3_LastContinuationMissing_339 ()
563 byte[] data = { 0xF, 0xBF, 0xBF, 0xBF };
564 string s = utf8.GetString (data);
565 // exception is "really" expected here
569 [ExpectedException (typeof (DecoderException))]
570 public void T3_Malformed_3_LastContinuationMissing_3310 ()
572 byte[] data = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
573 string s = utf8.GetString (data);
574 // exception is "really" expected here
578 [ExpectedException (typeof (DecoderException))]
579 public void T3_Malformed_4_ConcatenationImcomplete ()
582 0xC0, 0xE0, 0x80, 0xF0, 0x80, 0x80, 0xF8, 0x80, 0x80, 0x80, 0xFC, 0x80, 0x80, 0x80, 0x80, 0xDF,
583 0xEF, 0xBF, 0xF7, 0xBF, 0xBF, 0xFB, 0xBF, 0xBF, 0xBF, 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
584 string s = utf8.GetString (data);
585 // exception is "really" expected here
589 [ExpectedException (typeof (DecoderException))]
590 public void T3_Malformed_5_ImpossibleBytes_351 ()
592 byte[] data = { 0xFE };
593 string s = utf8.GetString (data);
594 // exception is "really" expected here
598 [ExpectedException (typeof (DecoderException))]
599 public void T3_Malformed_5_ImpossibleBytes_352 ()
601 byte[] data = { 0xFF };
602 string s = utf8.GetString (data);
603 // exception is "really" expected here
607 [ExpectedException (typeof (DecoderException))]
608 public void T3_Malformed_5_ImpossibleBytes_353 ()
610 byte[] data = { 0xFE, 0xFE, 0xFF, 0xFF };
611 string s = utf8.GetString (data);
612 // exception is "really" expected here
615 // Overlong == dangereous -> "safe" decoder should reject them
618 [ExpectedException (typeof (DecoderException))]
619 public void T4_Overlong_1_ASCII_Slash_411 ()
621 byte[] data = { 0xC0, 0xAF };
622 string s = utf8.GetString (data);
623 // exception is "really" expected here
627 [ExpectedException (typeof (DecoderException))]
628 public void T4_Overlong_1_ASCII_Slash_412 ()
630 byte[] data = { 0xE0, 0x80, 0xAF };
631 string s = utf8.GetString (data);
632 // exception is "really" expected here
636 [ExpectedException (typeof (DecoderException))]
637 public void T4_Overlong_1_ASCII_Slash_413 ()
639 byte[] data = { 0xF0, 0x80, 0x80, 0xAF };
640 string s = utf8.GetString (data);
641 // exception is "really" expected here
645 [ExpectedException (typeof (DecoderException))]
646 public void T4_Overlong_1_ASCII_Slash_414 ()
648 byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0xAF };
649 string s = utf8.GetString (data);
650 // exception is "really" expected here
654 [ExpectedException (typeof (DecoderException))]
655 public void T4_Overlong_1_ASCII_Slash_415 ()
657 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0xAF };
658 string s = utf8.GetString (data);
659 // exception is "really" expected here
663 [ExpectedException (typeof (DecoderException))]
664 public void T4_Overlong_2_MaximumBoundary_421 ()
666 byte[] data = { 0xC1, 0xBF };
667 string s = utf8.GetString (data);
668 // exception is "really" expected here
672 [ExpectedException (typeof (DecoderException))]
673 public void T4_Overlong_2_MaximumBoundary_422 ()
675 byte[] data = { 0xE0, 0x9F, 0xBF };
676 string s = utf8.GetString (data);
677 // exception is "really" expected here
681 [ExpectedException (typeof (DecoderException))]
682 public void T4_Overlong_2_MaximumBoundary_423 ()
684 byte[] data = { 0xF0, 0x8F, 0xBF, 0xBF };
685 string s = utf8.GetString (data);
686 // exception is "really" expected here
690 [ExpectedException (typeof (DecoderException))]
691 public void T4_Overlong_2_MaximumBoundary_424 ()
693 byte[] data = { 0xF8, 0x87, 0xBF, 0xBF, 0xBF };
694 string s = utf8.GetString (data);
695 // exception is "really" expected here
699 [ExpectedException (typeof (DecoderException))]
700 public void T4_Overlong_2_MaximumBoundary_425 ()
702 byte[] data = { 0xFC, 0x83, 0xBF, 0xBF, 0xBF, 0xBF };
703 string s = utf8.GetString (data);
704 // exception is "really" expected here
708 [ExpectedException (typeof (DecoderException))]
709 public void T4_Overlong_3_NUL_431 ()
711 byte[] data = { 0xC0, 0x80 };
712 string s = utf8.GetString (data);
713 // exception is "really" expected here
717 [ExpectedException (typeof (DecoderException))]
718 public void T4_Overlong_3_NUL_432 ()
720 byte[] data = { 0xE0, 0x80, 0x80 };
721 string s = utf8.GetString (data);
722 // exception is "really" expected here
726 [ExpectedException (typeof (DecoderException))]
727 public void T4_Overlong_3_NUL_433 ()
729 byte[] data = { 0xF0, 0x80, 0x80, 0x80 };
730 string s = utf8.GetString (data);
731 // exception is "really" expected here
735 [ExpectedException (typeof (DecoderException))]
736 public void T4_Overlong_3_NUL_434 ()
738 byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0x80 };
739 string s = utf8.GetString (data);
740 // exception is "really" expected here
744 [ExpectedException (typeof (DecoderException))]
745 public void T4_Overlong_3_NUL_435 ()
747 byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0x80 };
748 string s = utf8.GetString (data);
749 // exception is "really" expected here
754 [ExpectedException (typeof (DecoderFallbackException))]
756 // MS Fx 1.1 accept this
757 [Category ("NotDotNet")]
758 [ExpectedException (typeof (DecoderException))]
760 public void T5_IllegalCodePosition_1_UTF16Surrogates_511 ()
762 byte[] data = { 0xED, 0xA0, 0x80 };
763 string s = utf8.GetString (data);
764 // exception is "really" expected here
765 AssertEquals ("MS FX 1.1 behaviour", 55296, s [0]);
770 [ExpectedException (typeof (DecoderFallbackException))]
772 // MS Fx 1.1 accept this
773 [Category ("NotDotNet")]
774 [ExpectedException (typeof (DecoderException))]
776 public void T5_IllegalCodePosition_1_UTF16Surrogates_512 ()
778 byte[] data = { 0xED, 0xAD, 0xBF };
779 string s = utf8.GetString (data);
780 // exception is "really" expected here
781 AssertEquals ("MS FX 1.1 behaviour", 56191, s [0]);
786 [ExpectedException (typeof (DecoderFallbackException))]
788 // MS Fx 1.1 accept this
789 [Category ("NotDotNet")]
790 [ExpectedException (typeof (DecoderException))]
792 public void T5_IllegalCodePosition_1_UTF16Surrogates_513 ()
794 byte[] data = { 0xED, 0xAE, 0x80 };
795 string s = utf8.GetString (data);
796 // exception is "really" expected here
797 AssertEquals ("MS FX 1.1 behaviour", 56192, s [0]);
802 [ExpectedException (typeof (DecoderFallbackException))]
804 // MS Fx 1.1 accept this
805 [Category ("NotDotNet")]
806 [ExpectedException (typeof (DecoderException))]
808 public void T5_IllegalCodePosition_1_UTF16Surrogates_514 ()
810 byte[] data = { 0xED, 0xAF, 0xBF };
811 string s = utf8.GetString (data);
812 // exception is "really" expected here
813 AssertEquals ("MS FX 1.1 behaviour", 56319, s [0]);
818 [ExpectedException (typeof (DecoderFallbackException))]
820 // MS Fx 1.1 accept this
821 [Category ("NotDotNet")]
822 [ExpectedException (typeof (DecoderException))]
824 public void T5_IllegalCodePosition_1_UTF16Surrogates_515 ()
826 byte[] data = { 0xED, 0xB0, 0x80 };
827 string s = utf8.GetString (data);
828 // exception is "really" expected here
829 AssertEquals ("MS FX 1.1 behaviour", 56320, s [0]);
834 [ExpectedException (typeof (DecoderFallbackException))]
836 // MS Fx 1.1 accept this
837 [Category ("NotDotNet")]
838 [ExpectedException (typeof (DecoderException))]
840 public void T5_IllegalCodePosition_1_UTF16Surrogates_516 ()
842 byte[] data = { 0xED, 0xBE, 0x80 };
843 string s = utf8.GetString (data);
844 // exception is "really" expected here
845 AssertEquals ("MS FX 1.1 behaviour", 57216, s [0]);
850 [ExpectedException (typeof (DecoderFallbackException))]
852 // MS Fx 1.1 accept this
853 [Category ("NotDotNet")]
854 [ExpectedException (typeof (DecoderException))]
856 public void T5_IllegalCodePosition_1_UTF16Surrogates_517 ()
858 byte[] data = { 0xED, 0xBF, 0xBF };
859 string s = utf8.GetString (data);
860 // exception is "really" expected here
861 AssertEquals ("MS FX 1.1 behaviour", 57343, s [0]);
866 [ExpectedException (typeof (DecoderFallbackException))]
868 // MS Fx 1.1 accept this
869 [Category ("NotDotNet")]
870 [ExpectedException (typeof (DecoderException))]
872 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_521 ()
874 byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80 };
875 string s = utf8.GetString (data);
876 // exception is "really" expected here
877 AssertEquals ("MS FX 1.1 behaviour", 55296, s [0]);
878 AssertEquals ("MS FX 1.1 behaviour", 56320, s [1]);
883 [ExpectedException (typeof (DecoderFallbackException))]
885 // MS Fx 1.1 accept this
886 [Category ("NotDotNet")]
887 [ExpectedException (typeof (DecoderException))]
889 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_522 ()
891 byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xBF, 0xBF };
892 string s = utf8.GetString (data);
893 // exception is "really" expected here
894 AssertEquals ("MS FX 1.1 behaviour", 55296, s [0]);
895 AssertEquals ("MS FX 1.1 behaviour", 57343, s [1]);
900 [ExpectedException (typeof (DecoderFallbackException))]
902 // MS Fx 1.1 accept this
903 [Category ("NotDotNet")]
904 [ExpectedException (typeof (DecoderException))]
906 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_523 ()
908 byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xB0, 0x80 };
909 string s = utf8.GetString (data);
910 // exception is "really" expected here
911 AssertEquals ("MS FX 1.1 behaviour", 56191, s [0]);
912 AssertEquals ("MS FX 1.1 behaviour", 56320, s [1]);
917 [ExpectedException (typeof (DecoderFallbackException))]
919 // MS Fx 1.1 accept this
920 [Category ("NotDotNet")]
921 [ExpectedException (typeof (DecoderException))]
923 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_524 ()
925 byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xBF, 0xBF };
926 string s = utf8.GetString (data);
927 // exception is "really" expected here
928 AssertEquals ("MS FX 1.1 behaviour", 56191, s [0]);
929 AssertEquals ("MS FX 1.1 behaviour", 57343, s [1]);
934 [ExpectedException (typeof (DecoderFallbackException))]
936 // MS Fx 1.1 accept this
937 [Category ("NotDotNet")]
938 [ExpectedException (typeof (DecoderException))]
940 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_525 ()
942 byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xB0, 0x80 };
943 string s = utf8.GetString (data);
944 // exception is "really" expected here
945 AssertEquals ("MS FX 1.1 behaviour", 56192, s [0]);
946 AssertEquals ("MS FX 1.1 behaviour", 56320, s [1]);
951 [ExpectedException (typeof (DecoderFallbackException))]
953 // MS Fx 1.1 accept this
954 [Category ("NotDotNet")]
955 [ExpectedException (typeof (DecoderException))]
957 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_526 ()
959 byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xBF, 0x8F };
960 string s = utf8.GetString (data);
961 // exception is "really" expected here
962 AssertEquals ("MS FX 1.1 behaviour", 56192, s [0]);
963 AssertEquals ("MS FX 1.1 behaviour", 57295, s [1]);
968 [ExpectedException (typeof (DecoderFallbackException))]
970 // MS Fx 1.1 accept this
971 [Category ("NotDotNet")]
972 [ExpectedException (typeof (DecoderException))]
974 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_527 ()
976 byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xB0, 0x80 };
977 string s = utf8.GetString (data);
978 // exception is "really" expected here
979 AssertEquals ("MS FX 1.1 behaviour", 56319, s [0]);
980 AssertEquals ("MS FX 1.1 behaviour", 56320, s [1]);
985 [ExpectedException (typeof (DecoderFallbackException))]
987 // MS Fx 1.1 accept this
988 [Category ("NotDotNet")]
989 [ExpectedException (typeof (DecoderException))]
991 public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_528 ()
993 byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xBF, 0xBF };
994 string s = utf8.GetString (data);
995 // exception is "really" expected here
996 AssertEquals ("MS FX 1.1 behaviour", 56319, s [0]);
997 AssertEquals ("MS FX 1.1 behaviour", 57343, s [1]);
1001 // MS Fx 1.1 accept this
1002 // [ExpectedException (typeof (DecoderException))]
1003 public void T5_IllegalCodePosition_3_Other_531 ()
1005 byte[] data = { 0xEF, 0xBF, 0xBE };
1006 string s = utf8.GetString (data);
1007 // exception is "really" expected here
1008 AssertEquals ("MS FX 1.1 behaviour", 65534, s [0]);
1012 // MS Fx 1.1 accept this
1013 // [ExpectedException (typeof (DecoderException))]
1014 public void T5_IllegalCodePosition_3_Other_532 ()
1016 byte[] data = { 0xEF, 0xBF, 0xBF };
1017 string s = utf8.GetString (data);
1018 // exception is "really" expected here
1019 AssertEquals ("MS FX 1.1 behaviour", 65535, s [0]);
1023 // bug #75065 and #73086.
1024 public void GetCharsFEFF ()
1026 byte [] data = new byte [] {0xEF, 0xBB, 0xBF};
1027 Encoding enc = new UTF8Encoding (false, true);
1028 string s = enc.GetString (data);
1029 AssertEquals ("\uFEFF", s);
1031 Encoding utf = Encoding.UTF8;
1032 char[] testChars = {'\uFEFF','A'};
1034 byte[] bytes = utf.GetBytes(testChars);
1035 char[] chars = utf.GetChars(bytes);
1036 AssertEquals ("#1", '\uFEFF', chars [0]);
1037 AssertEquals ("#2", 'A', chars [1]);
1042 public void CloneNotReadOnly ()
1044 Encoding e = Encoding.GetEncoding (65001).Clone ()
1046 AssertEquals (false, e.IsReadOnly);
1047 e.EncoderFallback = new EncoderExceptionFallback ();
1053 [ExpectedException (typeof (DecoderFallbackException))]
1055 [ExpectedException (typeof (ArgumentException))]
1056 [Category ("NotDotNet")] // MS Bug
1058 public void Bug77315 ()
1060 new UTF8Encoding (false, true).GetString (
1061 new byte [] {0xED, 0xA2, 0x8C});
1065 public void SufficientByteArray ()
1067 Encoder e = Encoding.UTF8.GetEncoder ();
1068 byte [] bytes = new byte [0];
1070 char [] chars = new char [] {'\uD800'};
1071 e.GetBytes (chars, 0, 1, bytes, 0, false);
1073 int ret = e.GetBytes (chars, 1, 0, bytes, 0, true);
1075 AssertEquals ("drop insufficient char in 2.0: char[]", 0, ret);
1077 Fail ("ArgumentException is expected: char[]");
1079 } catch (ArgumentException) {
1082 string s = "\uD800";
1084 int ret = Encoding.UTF8.GetBytes (s, 0, 1, bytes, 0);
1086 AssertEquals ("drop insufficient char in 2.0: string", 0, ret);
1088 Fail ("ArgumentException is expected: string");
1090 } catch (ArgumentException) {
1095 [Test] // bug #77550
1096 public void DecoderFallbackSimple ()
1098 UTF8Encoding e = new UTF8Encoding (false, false);
1099 AssertType.AreEqual (1, e.GetDecoder ().GetCharCount (
1100 new byte [] {(byte) 183}, 0, 1),
1102 AssertType.AreEqual (1, e.GetDecoder().GetChars (
1103 new byte [] {(byte) 183}, 0, 1,
1106 AssertType.AreEqual (1, e.GetString (new byte [] {(byte) 183}).Length,
1111 public void FallbackDefaultEncodingUTF8 ()
1113 DecoderReplacementFallbackBuffer b =
1114 Encoding.UTF8.DecoderFallback.CreateFallbackBuffer ()
1115 as DecoderReplacementFallbackBuffer;
1116 AssertType.IsTrue (b.Fallback (new byte [] {}, 0), "#1");
1117 AssertType.IsFalse (b.MovePrevious (), "#2");
1118 AssertType.AreEqual (1, b.Remaining, "#3");
1119 AssertType.AreEqual ('\uFFFD', b.GetNextChar (), "#4");
1123 public void Bug415628 ()
1125 using (var f = File.Open ("Test/resources/415628.bin", FileMode.Open)) {
1126 BinaryReader br = new BinaryReader (f);
1127 byte [] buf = br.ReadBytes (8000);
1128 Encoding.UTF8.GetString(buf);