5 // Atsushi Enomoto <atsushi@ximian.com>
7 // Copyright (C) 2005 Novell, Inc. http://www.novell.com
13 using NUnit.Framework;
15 namespace MonoTests.I18N.CJK
20 void AssertEncode (string utf8file, string decfile, int codepage)
22 string decoded = null;
23 byte [] encoded = null;
24 using (StreamReader sr = new StreamReader (utf8file,
26 decoded = sr.ReadToEnd ();
28 using (FileStream fs = File.OpenRead (decfile)) {
29 encoded = new byte [fs.Length];
30 fs.Read (encoded, 0, (int) fs.Length);
32 Encoding enc = Encoding.GetEncoding (codepage);
36 //Assert.AreEqual (encoded.Length,
37 // enc.GetByteCount (decoded),
38 // "GetByteCount(string)");
39 actual = enc.GetBytes (decoded);
40 Assert.AreEqual (encoded, actual,
44 Assert.AreEqual (encoded.Length,
45 enc.GetByteCount (decoded.ToCharArray (), 0, decoded.Length),
46 "GetByteCount(char[], 0, len)");
47 actual = enc.GetBytes (decoded.ToCharArray (), 0, decoded.Length);
48 Assert.AreEqual (encoded, actual,
49 "GetBytes(char[], 0, len)");
52 void AssertDecode (string utf8file, string decfile, int codepage)
54 string decoded = null;
55 byte [] encoded = null;
56 using (StreamReader sr = new StreamReader (utf8file,
58 decoded = sr.ReadToEnd ();
60 using (FileStream fs = File.OpenRead (decfile)) {
61 encoded = new byte [fs.Length];
62 fs.Read (encoded, 0, (int) fs.Length);
64 Encoding enc = Encoding.GetEncoding (codepage);
67 Assert.AreEqual (decoded.Length,
68 enc.GetCharCount (encoded, 0, encoded.Length),
69 "GetCharCount(byte[], 0, len)");
70 actual = enc.GetChars (encoded, 0, encoded.Length);
71 Assert.AreEqual (decoded.ToCharArray (), actual,
72 "GetChars(byte[], 0, len)");
80 public void CP936_Encode ()
82 AssertEncode ("Test/texts/chinese-utf8.txt", "Test/texts/chinese-936.txt", 936);
86 public void CP936_Decode ()
88 AssertDecode ("Test/texts/chinese-utf8.txt", "Test/texts/chinese-936.txt", 936);
94 public void CP950_Encode ()
96 AssertEncode ("Test/texts/chinese2-utf8.txt", "Test/texts/chinese2-950.txt", 950);
100 public void CP950_Decode ()
102 AssertDecode ("Test/texts/chinese2-utf8.txt", "Test/texts/chinese2-950.txt", 950);
108 public void CP54936_Encode ()
110 AssertEncode ("Test/texts/chinese-utf8.txt", "Test/texts/chinese-54936.txt", 54936);
114 public void CP54936_Decode ()
116 AssertDecode ("Test/texts/chinese-utf8.txt", "Test/texts/chinese-54936.txt", 54936);
126 public void CP932_Encode ()
128 AssertEncode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-932.txt", 932);
132 public void CP932_Decode ()
134 AssertDecode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-932.txt", 932);
140 public void CP51932_Encode ()
142 AssertEncode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-51932.txt", 51932);
146 public void CP51932_Decode ()
148 AssertDecode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-51932.txt", 51932);
154 public void CP50220_Encode ()
156 AssertEncode ("Test/texts/japanese2-utf8.txt", "Test/texts/japanese2-50220.txt", 50220);
160 public void CP50220_Decode ()
162 AssertDecode ("Test/texts/japanese2-utf8.txt", "Test/texts/japanese2-50220.txt", 50220);
166 public void CP50221_Encode ()
168 AssertEncode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-50221.txt", 50221);
172 public void CP50221_Decode ()
174 AssertDecode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-50221.txt", 50221);
179 [Category ("NotDotNet")] // MS is buggy here
181 public void CP50222_Encode ()
183 AssertEncode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-50222.txt", 50222);
188 [Category ("NotDotNet")] // MS is buggy here
190 public void CP50222_Decode ()
192 AssertDecode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-50222.txt", 50222);
197 [Category ("NotDotNet")] // MS bug
199 public void Bug77723 ()
201 GetBytesAllSingleChars (51932);
205 public void Bug77724 ()
207 GetBytesAllSingleChars (932);
211 public void Bug77307 ()
213 GetBytesAllSingleChars (54936);
216 void GetBytesAllSingleChars (int enc)
218 Encoding e = Encoding.GetEncoding (enc);
219 for (int i = 0; i < 0x10000; i++)
220 e.GetBytes (new char [] { (char)i });
223 void GetCharsAllBytePairs (int enc)
225 Encoding e = Encoding.GetEncoding (enc);
226 byte [] bytes = new byte [2];
227 for (int i0 = 0; i0 < 0x100; i0++) {
228 bytes [0] = (byte) i0;
229 for (int i1 = 0; i1 < 0x100; i1++) {
230 bytes [1] = (byte) i1;
237 public void Bug77222 ()
239 GetCharsAllBytePairs (51932);
243 public void Bug77238 ()
245 GetCharsAllBytePairs (936);
249 public void Bug77306 ()
251 GetCharsAllBytePairs (54936);
255 public void Bug77298 ()
257 GetCharsAllBytePairs (949);
261 public void Bug77274 ()
263 GetCharsAllBytePairs (950);
268 [Category ("NotDotNet")] // MS bug
270 public void Encoder54936Refresh ()
272 Encoding e = Encoding.GetEncoding ("gb18030");
273 Encoder d = e.GetEncoder ();
276 bytes = new byte [4];
277 Assert.AreEqual (0, d.GetBytes (new char [] {'\uD800'}, 0, 1, bytes, 0, false), "#1");
278 Assert.AreEqual (new byte [] {00, 00, 00, 00},
281 bytes = new byte [4];
282 Assert.AreEqual (4, d.GetBytes (new char [] {'\uDC00'}, 0, 1, bytes, 0, true), "#3");
283 Assert.AreEqual (new byte [] {0x90, 0x30, 0x81, 0x30},
286 bytes = new byte [4];
287 Assert.AreEqual (1, d.GetBytes (new char [] {'\uD800'}, 0, 1, bytes, 0, true), "#5");
288 Assert.AreEqual (new byte [] {0x3F, 00, 00, 00},
294 public void Decoder932Refresh ()
296 Encoding e = Encoding.GetEncoding (932);
297 Decoder d = e.GetDecoder ();
300 chars = new char [1];
301 Assert.AreEqual (0, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, false), "#1");
302 Assert.AreEqual (new char [] {'\0'}, chars, "#2");
304 chars = new char [1];
305 Assert.AreEqual (1, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, true), "#3");
306 Assert.AreEqual (new char [] {'\uFF1D'}, chars, "#4");
308 chars = new char [1];
309 Assert.AreEqual (1, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, true), "#5");
310 Assert.AreEqual (new char [] {'\u30FB'}, chars, "#6");
314 public void Decoder51932Refresh ()
316 Encoding e = Encoding.GetEncoding (51932);
317 Decoder d = e.GetDecoder ();
321 chars = new char [1];
322 Assert.AreEqual (1, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, false), "#0.1");
323 Assert.AreEqual (new char [] {'\u30FB'}, chars, "#0.2");
326 chars = new char [1];
327 Assert.AreEqual (0, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0, false), "#1");
328 Assert.AreEqual (new char [] {'\0'}, chars, "#2");
331 chars = new char [1];
332 Assert.AreEqual (1, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0, true), "#3");
333 Assert.AreEqual (new char [] {'\u3000'}, chars, "#4");
335 // incomplete but refreshed
336 chars = new char [1];
337 Assert.AreEqual (1, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0, true), "#5");
338 Assert.AreEqual (new char [] {'\u30FB'}, chars, "#6");
342 public void Decoder936Refresh ()
344 Encoding e = Encoding.GetEncoding (936);
345 Decoder d = e.GetDecoder ();
349 chars = new char [1];
350 Assert.AreEqual (0, d.GetChars (new byte [] {0xB0}, 0, 1, chars, 0, false), "#1");
351 Assert.AreEqual (new char [] {'\0'}, chars, "#2");
354 chars = new char [1];
355 Assert.AreEqual (1, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0, false), "#3");
356 Assert.AreEqual (new char [] {'\u554A'}, chars, "#4");
358 // incomplete but refreshed
359 chars = new char [1];
360 Assert.AreEqual (1, d.GetChars (new byte [] {0xB0}, 0, 1, chars, 0, true), "#5");
361 Assert.AreEqual (new char [] {'?'}, chars, "#6");
365 public void Decoder949Refresh ()
367 Encoding e = Encoding.GetEncoding (949);
368 Decoder d = e.GetDecoder ();
372 chars = new char [1];
373 Assert.AreEqual (0, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, false), "#1");
374 Assert.AreEqual (new char [] {'\0'}, chars, "#2");
377 chars = new char [1];
378 Assert.AreEqual (1, d.GetChars (new byte [] {0x41}, 0, 1, chars, 0, false), "#3");
379 Assert.AreEqual (new char [] {'\uAC02'}, chars, "#4");
381 // incomplete but refreshed
382 chars = new char [1];
383 Assert.AreEqual (1, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, true), "#5");
384 Assert.AreEqual (new char [] {'?'}, chars, "#6");
388 public void Decoder950Refresh ()
390 Encoding e = Encoding.GetEncoding (950);
391 Decoder d = e.GetDecoder ();
395 chars = new char [1];
396 Assert.AreEqual (0, d.GetChars (new byte [] {0xF9}, 0, 1, chars, 0, false), "#1");
397 Assert.AreEqual (new char [] {'\0'}, chars, "#2");
400 chars = new char [1];
401 Assert.AreEqual (1, d.GetChars (new byte [] {0x40}, 0, 1, chars, 0, false), "#3");
402 Assert.AreEqual (new char [] {'\u7E98'}, chars, "#4");
404 // incomplete but refreshed
405 chars = new char [1];
406 Assert.AreEqual (1, d.GetChars (new byte [] {0xF9}, 0, 1, chars, 0, true), "#5");
407 Assert.AreEqual (new char [] {'?'}, chars, "#6");
413 public void Decoder51932NoRefresh ()
415 Encoding e = Encoding.GetEncoding (51932);
416 Decoder d = e.GetDecoder ();
420 chars = new char [1];
421 Assert.AreEqual (0, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0), "#1");
422 Assert.AreEqual (new char [] {'\0'}, chars, "#2");
425 chars = new char [1];
426 Assert.AreEqual (1, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0), "#3");
427 Assert.AreEqual (new char [] {'\u3000'}, chars, "#4");
429 // incomplete but refreshed
430 chars = new char [1];
431 Assert.AreEqual (0, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0), "#5");
432 Assert.AreEqual (new char [] {'\0'}, chars, "#6");
436 public void Decoder936NoRefresh ()
438 Encoding e = Encoding.GetEncoding (936);
439 Decoder d = e.GetDecoder ();
443 chars = new char [1];
444 Assert.AreEqual (0, d.GetChars (new byte [] {0xB0}, 0, 1, chars, 0), "#1");
445 Assert.AreEqual (new char [] {'\0'}, chars, "#2");
448 chars = new char [1];
449 Assert.AreEqual (1, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0), "#3");
450 Assert.AreEqual (new char [] {'\u554A'}, chars, "#4");
452 // incomplete but refreshed
453 chars = new char [1];
454 Assert.AreEqual (0, d.GetChars (new byte [] {0xB0}, 0, 1, chars, 0), "#5");
455 Assert.AreEqual (new char [] {'\0'}, chars, "#6");
459 public void Decoder949NoRefresh ()
461 Encoding e = Encoding.GetEncoding (949);
462 Decoder d = e.GetDecoder ();
466 chars = new char [1];
467 Assert.AreEqual (0, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0), "#1");
468 Assert.AreEqual (new char [] {'\0'}, chars, "#2");
471 chars = new char [1];
472 Assert.AreEqual (1, d.GetChars (new byte [] {0x41}, 0, 1, chars, 0), "#3");
473 Assert.AreEqual (new char [] {'\uAC02'}, chars, "#4");
475 // incomplete but refreshed
476 chars = new char [1];
477 Assert.AreEqual (0, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0), "#5");
478 Assert.AreEqual (new char [] {'\0'}, chars, "#6");
482 public void Decoder950NoRefresh ()
484 Encoding e = Encoding.GetEncoding (950);
485 Decoder d = e.GetDecoder ();
489 chars = new char [1];
490 Assert.AreEqual (0, d.GetChars (new byte [] {0xF9}, 0, 1, chars, 0), "#1");
491 Assert.AreEqual (new char [] {'\0'}, chars, "#2");
494 chars = new char [1];
495 Assert.AreEqual (1, d.GetChars (new byte [] {0x40}, 0, 1, chars, 0), "#3");
496 Assert.AreEqual (new char [] {'\u7E98'}, chars, "#4");
498 // incomplete but refreshed
499 chars = new char [1];
500 Assert.AreEqual (0, d.GetChars (new byte [] {0xF9}, 0, 1, chars, 0), "#5");
501 Assert.AreEqual (new char [] {'\0'}, chars, "#6");
508 public void CP949_Encode ()
510 AssertEncode ("Test/texts/korean-utf8.txt", "Test/texts/korean-949.txt", 949);
514 public void CP949_Decode ()
516 AssertDecode ("Test/texts/korean-utf8.txt", "Test/texts/korean-949.txt", 949);