5 // Atsushi Enomoto <atsushi@ximian.com>
7 // Copyright (C) 2005 Novell, Inc. http://www.novell.com
13 using NUnit.Framework;
15 namespace MonoTests.I18N.CJK
20 private global::I18N.Common.Manager Manager = global::I18N.Common.Manager.PrimaryManager;
22 void AssertEncode (string utf8file, string decfile, int codepage)
24 string decoded = null;
25 byte [] encoded = null;
26 using (StreamReader sr = new StreamReader (utf8file,
28 decoded = sr.ReadToEnd ();
30 using (FileStream fs = File.OpenRead (decfile)) {
31 encoded = new byte [fs.Length];
32 fs.Read (encoded, 0, (int) fs.Length);
34 Encoding enc = Manager.GetEncoding (codepage);
38 //Assert.AreEqual (encoded.Length,
39 // enc.GetByteCount (decoded),
40 // "GetByteCount(string)");
41 actual = enc.GetBytes (decoded);
42 Assert.AreEqual (encoded, actual,
46 Assert.AreEqual (encoded.Length,
47 enc.GetByteCount (decoded.ToCharArray (), 0, decoded.Length),
48 "GetByteCount(char[], 0, len)");
49 actual = enc.GetBytes (decoded.ToCharArray (), 0, decoded.Length);
50 Assert.AreEqual (encoded, actual,
51 "GetBytes(char[], 0, len)");
54 void AssertDecode (string utf8file, string decfile, int codepage)
56 string decoded = null;
57 byte [] encoded = null;
58 using (StreamReader sr = new StreamReader (utf8file,
60 decoded = sr.ReadToEnd ();
62 using (FileStream fs = File.OpenRead (decfile)) {
63 encoded = new byte [fs.Length];
64 fs.Read (encoded, 0, (int) fs.Length);
66 Encoding enc = Manager.GetEncoding (codepage);
69 Assert.AreEqual (decoded.Length,
70 enc.GetCharCount (encoded, 0, encoded.Length),
71 "GetCharCount(byte[], 0, len)");
72 actual = enc.GetChars (encoded, 0, encoded.Length);
73 Assert.AreEqual (decoded.ToCharArray (), actual,
74 "GetChars(byte[], 0, len)");
82 public void CP936_Encode ()
84 AssertEncode ("Test/texts/chinese-utf8.txt", "Test/texts/chinese-936.txt", 936);
88 public void CP936_Encode3 ()
90 AssertEncode("Test/texts/chinese3-utf8.txt", "Test/texts/chinese3-936.txt", 936);
94 public void CP936_Decode ()
96 AssertDecode ("Test/texts/chinese-utf8.txt", "Test/texts/chinese-936.txt", 936);
100 public void Bug_1531()
102 string str = @"wqk=";
103 byte[] utf8 = Convert.FromBase64String(str);
104 char[] data = Encoding.UTF8.GetChars(utf8);
106 var encoding = Manager.GetEncoding("GB2312");
107 var result = encoding.GetBytes(data);
109 Assert.AreEqual(new byte[] { 63 }, result);
115 public void CP950_Encode ()
117 AssertEncode ("Test/texts/chinese2-utf8.txt", "Test/texts/chinese2-950.txt", 950);
121 public void CP950_Encode4 ()
123 AssertEncode("Test/texts/chinese4-utf8.txt", "Test/texts/chinese4-950.txt", 950);
127 public void CP950_Decode ()
129 AssertDecode ("Test/texts/chinese2-utf8.txt", "Test/texts/chinese2-950.txt", 950);
135 public void CP54936_Encode ()
137 AssertEncode ("Test/texts/chinese-utf8.txt", "Test/texts/chinese-54936.txt", 54936);
141 public void CP54936_Decode ()
143 AssertDecode ("Test/texts/chinese-utf8.txt", "Test/texts/chinese-54936.txt", 54936);
153 public void CP932_Encode ()
155 AssertEncode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-932.txt", 932);
159 public void CP932_Decode ()
161 AssertDecode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-932.txt", 932);
167 public void CP51932_Encode ()
169 AssertEncode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-51932.txt", 51932);
173 public void CP51932_Decode ()
175 AssertDecode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-51932.txt", 51932);
181 public void CP50220_Encode ()
183 AssertEncode ("Test/texts/japanese2-utf8.txt", "Test/texts/japanese2-50220.txt", 50220);
187 public void CP50220_Encode_3 ()
189 AssertEncode("Test/texts/japanese3-utf8.txt", "Test/texts/japanese3-50220.txt", 50220);
193 public void CP50220_Decode ()
195 AssertDecode ("Test/texts/japanese2-utf8.txt", "Test/texts/japanese2-50220.txt", 50220);
199 public void CP50221_Encode ()
201 AssertEncode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-50221.txt", 50221);
205 public void CP50221_Encode_3()
207 AssertEncode("Test/texts/japanese3-utf8.txt", "Test/texts/japanese3-50221.txt", 50221);
211 public void CP50221_Decode ()
213 AssertDecode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-50221.txt", 50221);
218 [Category ("NotDotNet")] // MS is buggy here
220 public void CP50222_Encode ()
222 AssertEncode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-50222.txt", 50222);
227 [Category ("NotDotNet")] // MS is buggy here
229 public void CP50222_Decode ()
231 AssertDecode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-50222.txt", 50222);
235 public void CP50220BrokenESC ()
237 Assert.AreEqual ("\u001B$0", Manager.GetEncoding (50220).GetString (new byte [] {0x1B, 0x24, 0x30}), "#1");
241 public void CP50220BrokenESC2 ()
243 // it does not really invoke fallback ...
244 Assert.AreEqual ("\u001B$0", Encoding.GetEncoding (50220, new EncoderReplacementFallback (), new DecoderReplacementFallback ("")).GetString (new byte [] {0x1B, 0x24, 0x30}), "#1");
248 public void CP50220BrokenESC3 ()
251 Assert.AreEqual ("\u001B$0", Encoding.GetEncoding (50220, new EncoderExceptionFallback (), new DecoderExceptionFallback ()).GetString (new byte [] {0x1B, 0x24, 0x30}), "#2");
256 [Category ("NotDotNet")] // MS bug
258 public void Bug77723 ()
260 GetBytesAllSingleChars (51932);
264 public void Bug77724 ()
266 GetBytesAllSingleChars (932);
270 public void Bug77307 ()
272 GetBytesAllSingleChars (54936);
275 void GetBytesAllSingleChars (int enc)
277 Encoding e = Manager.GetEncoding (enc);
278 for (int i = 0; i < 0x10000; i++)
279 e.GetBytes (new char [] { (char)i });
282 void GetCharsAllBytePairs (int enc)
284 Encoding e = Manager.GetEncoding (enc);
285 byte [] bytes = new byte [2];
286 for (int i0 = 0; i0 < 0x100; i0++) {
287 bytes [0] = (byte) i0;
288 for (int i1 = 0; i1 < 0x100; i1++) {
289 bytes [1] = (byte) i1;
296 public void Bug77222 ()
298 GetCharsAllBytePairs (51932);
302 public void Bug77238 ()
304 GetCharsAllBytePairs (936);
308 public void Bug77306 ()
310 GetCharsAllBytePairs (54936);
314 public void Bug77298 ()
316 GetCharsAllBytePairs (949);
320 public void Bug77274 ()
322 GetCharsAllBytePairs (950);
327 [Category ("NotDotNet")] // MS bug
329 public void Encoder54936Refresh ()
331 Encoding e = Manager.GetEncoding ("gb18030");
332 Encoder d = e.GetEncoder ();
335 bytes = new byte [4];
336 Assert.AreEqual (0, d.GetBytes (new char [] {'\uD800'}, 0, 1, bytes, 0, false), "#1");
337 Assert.AreEqual (new byte [] {00, 00, 00, 00},
340 bytes = new byte [4];
341 Assert.AreEqual (4, d.GetBytes (new char [] {'\uDC00'}, 0, 1, bytes, 0, true), "#3");
342 Assert.AreEqual (new byte [] {0x90, 0x30, 0x81, 0x30},
345 bytes = new byte [4];
346 Assert.AreEqual (1, d.GetBytes (new char [] {'\uD800'}, 0, 1, bytes, 0, true), "#5");
347 Assert.AreEqual (new byte [] {0x3F, 00, 00, 00},
352 public void Bug491799 ()
354 Assert.AreEqual (new byte [] {0xEE, 0xFC},
355 Manager.GetEncoding (932).GetBytes ("\uFF02"));
360 public void Decoder932Refresh ()
362 Encoding e = Manager.GetEncoding (932);
363 Decoder d = e.GetDecoder ();
366 chars = new char [1];
367 Assert.AreEqual (0, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, false), "#1");
368 Assert.AreEqual (new char [] {'\0'}, chars, "#2");
370 chars = new char [1];
371 Assert.AreEqual (1, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, true), "#3");
372 Assert.AreEqual (new char [] {'\uFF1D'}, chars, "#4");
374 chars = new char [1];
375 Assert.AreEqual (1, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, true), "#5");
376 Assert.AreEqual (new char [] {'\u30FB'}, chars, "#6");
380 public void Decoder51932Refresh ()
382 Encoding e = Manager.GetEncoding (51932);
383 Decoder d = e.GetDecoder ();
387 chars = new char [1];
388 Assert.AreEqual (1, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, false), "#0.1");
389 Assert.AreEqual (new char [] {'\u30FB'}, chars, "#0.2");
392 chars = new char [1];
393 Assert.AreEqual (0, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0, false), "#1");
394 Assert.AreEqual (new char [] {'\0'}, chars, "#2");
397 chars = new char [1];
398 Assert.AreEqual (1, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0, true), "#3");
399 Assert.AreEqual (new char [] {'\u3000'}, chars, "#4");
401 // incomplete but refreshed
402 chars = new char [1];
403 Assert.AreEqual (1, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0, true), "#5");
404 Assert.AreEqual (new char [] {'\u30FB'}, chars, "#6");
408 public void Decoder936Refresh ()
410 Encoding e = Manager.GetEncoding (936);
411 Decoder d = e.GetDecoder ();
415 chars = new char [1];
416 Assert.AreEqual (0, d.GetChars (new byte [] {0xB0}, 0, 1, chars, 0, false), "#1");
417 Assert.AreEqual (new char [] {'\0'}, chars, "#2");
420 chars = new char [1];
421 Assert.AreEqual (1, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0, false), "#3");
422 Assert.AreEqual (new char [] {'\u554A'}, chars, "#4");
424 // incomplete but refreshed
425 chars = new char [1];
426 Assert.AreEqual (1, d.GetChars (new byte [] {0xB0}, 0, 1, chars, 0, true), "#5");
427 Assert.AreEqual (new char [] {'?'}, chars, "#6");
431 public void Decoder949Refresh ()
433 Encoding e = Manager.GetEncoding (949);
434 Decoder d = e.GetDecoder ();
438 chars = new char [1];
439 Assert.AreEqual (0, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, false), "#1");
440 Assert.AreEqual (new char [] {'\0'}, chars, "#2");
443 chars = new char [1];
444 Assert.AreEqual (1, d.GetChars (new byte [] {0x41}, 0, 1, chars, 0, false), "#3");
445 Assert.AreEqual (new char [] {'\uAC02'}, chars, "#4");
447 // incomplete but refreshed
448 chars = new char [1];
449 Assert.AreEqual (1, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, true), "#5");
450 Assert.AreEqual (new char [] {'?'}, chars, "#6");
454 public void Decoder950Refresh ()
456 Encoding e = Manager.GetEncoding (950);
457 Decoder d = e.GetDecoder ();
461 chars = new char [1];
462 Assert.AreEqual (0, d.GetChars (new byte [] {0xF9}, 0, 1, chars, 0, false), "#1");
463 Assert.AreEqual (new char [] {'\0'}, chars, "#2");
466 chars = new char [1];
467 Assert.AreEqual (1, d.GetChars (new byte [] {0x40}, 0, 1, chars, 0, false), "#3");
468 Assert.AreEqual (new char [] {'\u7E98'}, chars, "#4");
470 // incomplete but refreshed
471 chars = new char [1];
472 Assert.AreEqual (1, d.GetChars (new byte [] {0xF9}, 0, 1, chars, 0, true), "#5");
473 Assert.AreEqual (new char [] {'?'}, chars, "#6");
479 public void Decoder51932NoRefresh ()
481 Encoding e = Manager.GetEncoding (51932);
482 Decoder d = e.GetDecoder ();
486 chars = new char [1];
487 Assert.AreEqual (0, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0), "#1");
488 Assert.AreEqual (new char [] {'\0'}, chars, "#2");
491 chars = new char [1];
492 Assert.AreEqual (1, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0), "#3");
493 Assert.AreEqual (new char [] {'\u3000'}, chars, "#4");
495 // incomplete but refreshed
496 chars = new char [1];
497 Assert.AreEqual (0, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0), "#5");
498 Assert.AreEqual (new char [] {'\0'}, chars, "#6");
502 public void Decoder936NoRefresh ()
504 Encoding e = Manager.GetEncoding (936);
505 Decoder d = e.GetDecoder ();
509 chars = new char [1];
510 Assert.AreEqual (0, d.GetChars (new byte [] {0xB0}, 0, 1, chars, 0), "#1");
511 Assert.AreEqual (new char [] {'\0'}, chars, "#2");
514 chars = new char [1];
515 Assert.AreEqual (1, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0), "#3");
516 Assert.AreEqual (new char [] {'\u554A'}, chars, "#4");
518 // incomplete but refreshed
519 chars = new char [1];
520 Assert.AreEqual (0, d.GetChars (new byte [] {0xB0}, 0, 1, chars, 0), "#5");
521 Assert.AreEqual (new char [] {'\0'}, chars, "#6");
525 public void Decoder949NoRefresh ()
527 Encoding e = Manager.GetEncoding (949);
528 Decoder d = e.GetDecoder ();
532 chars = new char [1];
533 Assert.AreEqual (0, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0), "#1");
534 Assert.AreEqual (new char [] {'\0'}, chars, "#2");
537 chars = new char [1];
538 Assert.AreEqual (1, d.GetChars (new byte [] {0x41}, 0, 1, chars, 0), "#3");
539 Assert.AreEqual (new char [] {'\uAC02'}, chars, "#4");
541 // incomplete but refreshed
542 chars = new char [1];
543 Assert.AreEqual (0, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0), "#5");
544 Assert.AreEqual (new char [] {'\0'}, chars, "#6");
548 public void Decoder950NoRefresh ()
550 Encoding e = Manager.GetEncoding (950);
551 Decoder d = e.GetDecoder ();
555 chars = new char [1];
556 Assert.AreEqual (0, d.GetChars (new byte [] {0xF9}, 0, 1, chars, 0), "#1");
557 Assert.AreEqual (new char [] {'\0'}, chars, "#2");
560 chars = new char [1];
561 Assert.AreEqual (1, d.GetChars (new byte [] {0x40}, 0, 1, chars, 0), "#3");
562 Assert.AreEqual (new char [] {'\u7E98'}, chars, "#4");
564 // incomplete but refreshed
565 chars = new char [1];
566 Assert.AreEqual (0, d.GetChars (new byte [] {0xF9}, 0, 1, chars, 0), "#5");
567 Assert.AreEqual (new char [] {'\0'}, chars, "#6");
571 public void HandleObsoletedESCJ () // bug #398273
573 byte [] b = new byte [] {0x64, 0x6f, 0x6e, 0x1b, 0x24, 0x42, 0x21, 0x47, 0x1b, 0x28, 0x4a, 0x74};
574 string s = Manager.GetEncoding ("ISO-2022-JP").GetString (b);
575 Assert.AreEqual ("don\u2019t", s);
580 public void Bug14591 ()
582 var expected = "\u4f50\u85e4\u8c4a";
583 var text = Encoding.GetEncoding ("iso-2022-jp").GetString (Convert.FromBase64String ("GyRAOjRGI0stGyhK"));
584 Assert.AreEqual (expected, text, "#1");
591 public void CP949_Encode ()
593 AssertEncode ("Test/texts/korean-utf8.txt", "Test/texts/korean-949.txt", 949);
597 public void CP949_Decode ()
599 AssertDecode ("Test/texts/korean-utf8.txt", "Test/texts/korean-949.txt", 949);