Merge pull request #565 from rneatherway/master
[mono.git] / mcs / class / I18N / CJK / Test / I18N.CJK.Test.cs
1 //
2 // I18N.CJK.Test.cs
3 //
4 // Author:
5 //      Atsushi Enomoto  <atsushi@ximian.com>
6 //
7 // Copyright (C) 2005 Novell, Inc.  http://www.novell.com
8 //
9
10 using System;
11 using System.IO;
12 using System.Text;
13 using NUnit.Framework;
14
15 namespace MonoTests.I18N.CJK
16 {
17         [TestFixture]
18         public class TestCJK
19         {
20                 private global::I18N.Common.Manager Manager = global::I18N.Common.Manager.PrimaryManager;
21
22                 void AssertEncode (string utf8file, string decfile, int codepage)
23                 {
24                         string decoded = null;
25                         byte [] encoded = null;
26                         using (StreamReader sr = new StreamReader (utf8file, 
27                                 Encoding.UTF8)) {
28                                 decoded = sr.ReadToEnd ();
29                         }
30                         using (FileStream fs = File.OpenRead (decfile)) {
31                                 encoded = new byte [fs.Length];
32                                 fs.Read (encoded, 0, (int) fs.Length);
33                         }
34                         Encoding enc = Manager.GetEncoding (codepage);
35                         byte [] actual;
36
37                         // simple string case
38                         //Assert.AreEqual (encoded.Length,
39                         //      enc.GetByteCount (decoded),
40                         //      "GetByteCount(string)");
41                         actual = enc.GetBytes (decoded);
42                         Assert.AreEqual (encoded, actual,
43                                 "GetBytes(string)");
44
45                         // simple char[] case
46                         Assert.AreEqual (encoded.Length,
47                                 enc.GetByteCount (decoded.ToCharArray (), 0, decoded.Length),
48                                 "GetByteCount(char[], 0, len)");
49                         actual = enc.GetBytes (decoded.ToCharArray (), 0, decoded.Length);
50                         Assert.AreEqual (encoded, actual,
51                                 "GetBytes(char[], 0, len)");
52                 }
53
54                 void AssertDecode (string utf8file, string decfile, int codepage)
55                 {
56                         string decoded = null;
57                         byte [] encoded = null;
58                         using (StreamReader sr = new StreamReader (utf8file,
59                                 Encoding.UTF8)) {
60                                 decoded = sr.ReadToEnd ();
61                         }
62                         using (FileStream fs = File.OpenRead (decfile)) {
63                                 encoded = new byte [fs.Length];
64                                 fs.Read (encoded, 0, (int) fs.Length);
65                         }
66                         Encoding enc = Manager.GetEncoding (codepage);
67                         char [] actual;
68
69                         Assert.AreEqual (decoded.Length,
70                                 enc.GetCharCount (encoded, 0, encoded.Length),
71                                 "GetCharCount(byte[], 0, len)");
72                         actual = enc.GetChars (encoded, 0, encoded.Length);
73                         Assert.AreEqual (decoded.ToCharArray (), actual,
74                                 "GetChars(byte[], 0, len)");
75                 }
76
77                 #region Chinese
78
79                 // GB2312
80
81                 [Test]
82                 public void CP936_Encode ()
83                 {
84                         AssertEncode ("Test/texts/chinese-utf8.txt", "Test/texts/chinese-936.txt", 936);
85                 }
86
87                 [Test]
88                 public void CP936_Encode3 ()
89                 {
90                         AssertEncode("Test/texts/chinese3-utf8.txt", "Test/texts/chinese3-936.txt", 936);
91                 }
92
93                 [Test]
94                 public void CP936_Decode ()
95                 {
96                         AssertDecode ("Test/texts/chinese-utf8.txt", "Test/texts/chinese-936.txt", 936);
97                 }
98
99                 [Test]
100                 public void Bug_1531()
101                 {
102                         string str = @"wqk=";
103                         byte[] utf8 = Convert.FromBase64String(str);
104                         char[] data = Encoding.UTF8.GetChars(utf8);
105
106                         var encoding = Manager.GetEncoding("GB2312");
107                         var result = encoding.GetBytes(data);
108
109                         Assert.AreEqual(new byte[] { 63 }, result);
110                 }
111
112                 // BIG5
113
114                 [Test]
115                 public void CP950_Encode ()
116                 {
117                         AssertEncode ("Test/texts/chinese2-utf8.txt", "Test/texts/chinese2-950.txt", 950);
118                 }
119
120                 [Test]
121                 public void CP950_Encode4 ()
122                 {
123                         AssertEncode("Test/texts/chinese4-utf8.txt", "Test/texts/chinese4-950.txt", 950);
124                 }
125
126                 [Test]
127                 public void CP950_Decode ()
128                 {
129                         AssertDecode ("Test/texts/chinese2-utf8.txt", "Test/texts/chinese2-950.txt", 950);
130                 }
131
132                 // GB18030
133
134                 [Test]
135                 public void CP54936_Encode ()
136                 {
137                         AssertEncode ("Test/texts/chinese-utf8.txt", "Test/texts/chinese-54936.txt", 54936);
138                 }
139
140                 [Test]
141                 public void CP54936_Decode ()
142                 {
143                         AssertDecode ("Test/texts/chinese-utf8.txt", "Test/texts/chinese-54936.txt", 54936);
144                 }
145
146                 #endregion
147
148                 #region Japanese
149
150                 // Shift_JIS
151
152                 [Test]
153                 public void CP932_Encode ()
154                 {
155                         AssertEncode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-932.txt", 932);
156                 }
157
158                 [Test]
159                 public void CP932_Decode ()
160                 {
161                         AssertDecode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-932.txt", 932);
162                 }
163
164                 // EUC-JP
165
166                 [Test]
167                 public void CP51932_Encode ()
168                 {
169                         AssertEncode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-51932.txt", 51932);
170                 }
171
172                 [Test]
173                 public void CP51932_Decode ()
174                 {
175                         AssertDecode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-51932.txt", 51932);
176                 }
177
178                 // ISO-2022-JP
179
180                 [Test]
181                 public void CP50220_Encode ()
182                 {
183                         AssertEncode ("Test/texts/japanese2-utf8.txt", "Test/texts/japanese2-50220.txt", 50220);
184                 }
185
186                 [Test]
187                 public void CP50220_Encode_3 ()
188                 {
189                         AssertEncode("Test/texts/japanese3-utf8.txt", "Test/texts/japanese3-50220.txt", 50220);
190                 }
191
192                 [Test]
193                 public void CP50220_Decode ()
194                 {
195                         AssertDecode ("Test/texts/japanese2-utf8.txt", "Test/texts/japanese2-50220.txt", 50220);
196                 }
197
198                 [Test]
199                 public void CP50221_Encode ()
200                 {
201                         AssertEncode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-50221.txt", 50221);
202                 }
203
204                 [Test]
205                 public void CP50221_Encode_3()
206                 {
207                         AssertEncode("Test/texts/japanese3-utf8.txt", "Test/texts/japanese3-50221.txt", 50221);
208                 }
209
210                 [Test]
211                 public void CP50221_Decode ()
212                 {
213                         AssertDecode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-50221.txt", 50221);
214                 }
215
216                 [Test]
217 #if !NET_2_0
218                 [Category ("NotDotNet")] // MS is buggy here
219 #endif
220                 public void CP50222_Encode ()
221                 {
222                         AssertEncode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-50222.txt", 50222);
223                 }
224
225                 [Test]
226 #if !NET_2_0
227                 [Category ("NotDotNet")] // MS is buggy here
228 #endif
229                 public void CP50222_Decode ()
230                 {
231                         AssertDecode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-50222.txt", 50222);
232                 }
233
234                 [Test]
235                 public void CP50220BrokenESC ()
236                 {
237                         Assert.AreEqual ("\u001B$0", Manager.GetEncoding (50220).GetString (new byte [] {0x1B, 0x24, 0x30}), "#1");
238                 }
239
240                 [Test]
241                 public void CP50220BrokenESC2 ()
242                 {
243                         // it does not really invoke fallback ...
244                         Assert.AreEqual ("\u001B$0", Encoding.GetEncoding (50220, new EncoderReplacementFallback (), new DecoderReplacementFallback ("")).GetString (new byte [] {0x1B, 0x24, 0x30}), "#1");
245                 }
246
247                 [Test]
248                 public void CP50220BrokenESC3 ()
249                 {
250                         // neither ...
251                         Assert.AreEqual ("\u001B$0", Encoding.GetEncoding (50220, new EncoderExceptionFallback (), new DecoderExceptionFallback ()).GetString (new byte [] {0x1B, 0x24, 0x30}), "#2");
252                 }
253
254                 [Test]
255 #if !NET_2_0
256                 [Category ("NotDotNet")] // MS bug
257 #endif
258                 public void Bug77723 ()
259                 {
260                         GetBytesAllSingleChars (51932);
261                 }
262
263                 [Test]
264                 public void Bug77724 ()
265                 {
266                         GetBytesAllSingleChars (932);
267                 }
268
269                 [Test]
270                 public void Bug77307 ()
271                 {
272                         GetBytesAllSingleChars (54936);
273                 }
274
275                 void GetBytesAllSingleChars (int enc)
276                 {
277                         Encoding e = Manager.GetEncoding (enc);
278                         for (int i = 0; i < 0x10000; i++)
279                                 e.GetBytes (new char [] { (char)i });
280                 }
281
282                 void GetCharsAllBytePairs (int enc)
283                 {
284                         Encoding e = Manager.GetEncoding (enc);
285                         byte [] bytes = new byte [2];
286                         for (int i0 = 0; i0 < 0x100; i0++) {
287                                 bytes [0] = (byte) i0;
288                                 for (int i1 = 0; i1 < 0x100; i1++) {
289                                         bytes [1] = (byte) i1;
290                                         e.GetChars (bytes);
291                                 }
292                         }
293                 }
294
295                 [Test]
296                 public void Bug77222 ()
297                 {
298                         GetCharsAllBytePairs (51932);
299                 }
300
301                 [Test]
302                 public void Bug77238 ()
303                 {
304                         GetCharsAllBytePairs (936);
305                 }
306
307                 [Test]
308                 public void Bug77306 ()
309                 {
310                         GetCharsAllBytePairs (54936);
311                 }
312
313                 [Test]
314                 public void Bug77298 ()
315                 {
316                         GetCharsAllBytePairs (949);
317                 }
318
319                 [Test]
320                 public void Bug77274 ()
321                 {
322                         GetCharsAllBytePairs (950);
323                 }
324
325                 [Test]
326 #if !NET_2_0
327                 [Category ("NotDotNet")] // MS bug
328 #endif
329                 public void Encoder54936Refresh ()
330                 {
331                         Encoding e = Manager.GetEncoding ("gb18030");
332                         Encoder d = e.GetEncoder ();
333                         byte [] bytes;
334
335                         bytes = new byte [4];
336                         Assert.AreEqual (0, d.GetBytes (new char [] {'\uD800'}, 0, 1, bytes, 0, false), "#1");
337                         Assert.AreEqual (new byte [] {00, 00, 00, 00},
338                                 bytes, "#2");
339
340                         bytes = new byte [4];
341                         Assert.AreEqual (4, d.GetBytes (new char [] {'\uDC00'}, 0, 1, bytes, 0, true), "#3");
342                         Assert.AreEqual (new byte [] {0x90, 0x30, 0x81, 0x30},
343                                 bytes, "#4");
344
345                         bytes = new byte [4];
346                         Assert.AreEqual (1, d.GetBytes (new char [] {'\uD800'}, 0, 1, bytes, 0, true), "#5");
347                         Assert.AreEqual (new byte [] {0x3F, 00, 00, 00},
348                                 bytes, "#6");
349                 }
350
351                 [Test]
352                 public void Bug491799 ()
353                 {
354                         Assert.AreEqual (new byte [] {0xEE, 0xFC},
355                                            Manager.GetEncoding (932).GetBytes ("\uFF02"));
356                 }
357
358 #if NET_2_0
359                 [Test]
360                 public void Decoder932Refresh ()
361                 {
362                         Encoding e = Manager.GetEncoding (932);
363                         Decoder d = e.GetDecoder ();
364                         char [] chars;
365
366                         chars = new char [1];
367                         Assert.AreEqual (0, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, false), "#1");
368                         Assert.AreEqual (new char [] {'\0'}, chars, "#2");
369
370                         chars = new char [1];
371                         Assert.AreEqual (1, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, true), "#3");
372                         Assert.AreEqual (new char [] {'\uFF1D'}, chars, "#4");
373
374                         chars = new char [1];
375                         Assert.AreEqual (1, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, true), "#5");
376                         Assert.AreEqual (new char [] {'\u30FB'}, chars, "#6");
377                 }
378
379                 [Test]
380                 public void Decoder51932Refresh ()
381                 {
382                         Encoding e = Manager.GetEncoding (51932);
383                         Decoder d = e.GetDecoder ();
384                         char [] chars;
385
386                         // invalid one
387                         chars = new char [1];
388                         Assert.AreEqual (1, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, false), "#0.1");
389                         Assert.AreEqual (new char [] {'\u30FB'}, chars, "#0.2");
390
391                         // incomplete
392                         chars = new char [1];
393                         Assert.AreEqual (0, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0, false), "#1");
394                         Assert.AreEqual (new char [] {'\0'}, chars, "#2");
395
396                         // became complete
397                         chars = new char [1];
398                         Assert.AreEqual (1, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0, true), "#3");
399                         Assert.AreEqual (new char [] {'\u3000'}, chars, "#4");
400
401                         // incomplete but refreshed
402                         chars = new char [1];
403                         Assert.AreEqual (1, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0, true), "#5");
404                         Assert.AreEqual (new char [] {'\u30FB'}, chars, "#6");
405                 }
406
407                 [Test]
408                 public void Decoder936Refresh ()
409                 {
410                         Encoding e = Manager.GetEncoding (936);
411                         Decoder d = e.GetDecoder ();
412                         char [] chars;
413
414                         // incomplete
415                         chars = new char [1];
416                         Assert.AreEqual (0, d.GetChars (new byte [] {0xB0}, 0, 1, chars, 0, false), "#1");
417                         Assert.AreEqual (new char [] {'\0'}, chars, "#2");
418
419                         // became complete
420                         chars = new char [1];
421                         Assert.AreEqual (1, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0, false), "#3");
422                         Assert.AreEqual (new char [] {'\u554A'}, chars, "#4");
423
424                         // incomplete but refreshed
425                         chars = new char [1];
426                         Assert.AreEqual (1, d.GetChars (new byte [] {0xB0}, 0, 1, chars, 0, true), "#5");
427                         Assert.AreEqual (new char [] {'?'}, chars, "#6");
428                 }
429
430                 [Test]
431                 public void Decoder949Refresh ()
432                 {
433                         Encoding e = Manager.GetEncoding (949);
434                         Decoder d = e.GetDecoder ();
435                         char [] chars;
436
437                         // incomplete
438                         chars = new char [1];
439                         Assert.AreEqual (0, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, false), "#1");
440                         Assert.AreEqual (new char [] {'\0'}, chars, "#2");
441
442                         // became complete
443                         chars = new char [1];
444                         Assert.AreEqual (1, d.GetChars (new byte [] {0x41}, 0, 1, chars, 0, false), "#3");
445                         Assert.AreEqual (new char [] {'\uAC02'}, chars, "#4");
446
447                         // incomplete but refreshed
448                         chars = new char [1];
449                         Assert.AreEqual (1, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, true), "#5");
450                         Assert.AreEqual (new char [] {'?'}, chars, "#6");
451                 }
452
453                 [Test]
454                 public void Decoder950Refresh ()
455                 {
456                         Encoding e = Manager.GetEncoding (950);
457                         Decoder d = e.GetDecoder ();
458                         char [] chars;
459
460                         // incomplete
461                         chars = new char [1];
462                         Assert.AreEqual (0, d.GetChars (new byte [] {0xF9}, 0, 1, chars, 0, false), "#1");
463                         Assert.AreEqual (new char [] {'\0'}, chars, "#2");
464
465                         // became complete
466                         chars = new char [1];
467                         Assert.AreEqual (1, d.GetChars (new byte [] {0x40}, 0, 1, chars, 0, false), "#3");
468                         Assert.AreEqual (new char [] {'\u7E98'}, chars, "#4");
469
470                         // incomplete but refreshed
471                         chars = new char [1];
472                         Assert.AreEqual (1, d.GetChars (new byte [] {0xF9}, 0, 1, chars, 0, true), "#5");
473                         Assert.AreEqual (new char [] {'?'}, chars, "#6");
474                 }
475 #endif
476
477
478                 [Test]
479                 public void Decoder51932NoRefresh ()
480                 {
481                         Encoding e = Manager.GetEncoding (51932);
482                         Decoder d = e.GetDecoder ();
483                         char [] chars;
484
485                         // incomplete
486                         chars = new char [1];
487                         Assert.AreEqual (0, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0), "#1");
488                         Assert.AreEqual (new char [] {'\0'}, chars, "#2");
489
490                         // became complete
491                         chars = new char [1];
492                         Assert.AreEqual (1, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0), "#3");
493                         Assert.AreEqual (new char [] {'\u3000'}, chars, "#4");
494
495                         // incomplete but refreshed
496                         chars = new char [1];
497                         Assert.AreEqual (0, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0), "#5");
498                         Assert.AreEqual (new char [] {'\0'}, chars, "#6");
499                 }
500
501                 [Test]
502                 public void Decoder936NoRefresh ()
503                 {
504                         Encoding e = Manager.GetEncoding (936);
505                         Decoder d = e.GetDecoder ();
506                         char [] chars;
507
508                         // incomplete
509                         chars = new char [1];
510                         Assert.AreEqual (0, d.GetChars (new byte [] {0xB0}, 0, 1, chars, 0), "#1");
511                         Assert.AreEqual (new char [] {'\0'}, chars, "#2");
512
513                         // became complete
514                         chars = new char [1];
515                         Assert.AreEqual (1, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0), "#3");
516                         Assert.AreEqual (new char [] {'\u554A'}, chars, "#4");
517
518                         // incomplete but refreshed
519                         chars = new char [1];
520                         Assert.AreEqual (0, d.GetChars (new byte [] {0xB0}, 0, 1, chars, 0), "#5");
521                         Assert.AreEqual (new char [] {'\0'}, chars, "#6");
522                 }
523
524                 [Test]
525                 public void Decoder949NoRefresh ()
526                 {
527                         Encoding e = Manager.GetEncoding (949);
528                         Decoder d = e.GetDecoder ();
529                         char [] chars;
530
531                         // incomplete
532                         chars = new char [1];
533                         Assert.AreEqual (0, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0), "#1");
534                         Assert.AreEqual (new char [] {'\0'}, chars, "#2");
535
536                         // became complete
537                         chars = new char [1];
538                         Assert.AreEqual (1, d.GetChars (new byte [] {0x41}, 0, 1, chars, 0), "#3");
539                         Assert.AreEqual (new char [] {'\uAC02'}, chars, "#4");
540
541                         // incomplete but refreshed
542                         chars = new char [1];
543                         Assert.AreEqual (0, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0), "#5");
544                         Assert.AreEqual (new char [] {'\0'}, chars, "#6");
545                 }
546
547                 [Test]
548                 public void Decoder950NoRefresh ()
549                 {
550                         Encoding e = Manager.GetEncoding (950);
551                         Decoder d = e.GetDecoder ();
552                         char [] chars;
553
554                         // incomplete
555                         chars = new char [1];
556                         Assert.AreEqual (0, d.GetChars (new byte [] {0xF9}, 0, 1, chars, 0), "#1");
557                         Assert.AreEqual (new char [] {'\0'}, chars, "#2");
558
559                         // became complete
560                         chars = new char [1];
561                         Assert.AreEqual (1, d.GetChars (new byte [] {0x40}, 0, 1, chars, 0), "#3");
562                         Assert.AreEqual (new char [] {'\u7E98'}, chars, "#4");
563
564                         // incomplete but refreshed
565                         chars = new char [1];
566                         Assert.AreEqual (0, d.GetChars (new byte [] {0xF9}, 0, 1, chars, 0), "#5");
567                         Assert.AreEqual (new char [] {'\0'}, chars, "#6");
568                 }
569
570                 [Test]
571                 public void HandleObsoletedESCJ () // bug #398273
572                 {
573                         byte [] b = new byte [] {0x64, 0x6f, 0x6e, 0x1b, 0x24, 0x42, 0x21, 0x47, 0x1b, 0x28, 0x4a, 0x74};
574                         string s = Manager.GetEncoding ("ISO-2022-JP").GetString (b);
575                         Assert.AreEqual ("don\u2019t", s);
576
577                 }
578                 
579                 [Test]
580                 public void Bug14591 ()
581                 {
582                         var expected = "\u4f50\u85e4\u8c4a";
583                         var text = Encoding.GetEncoding ("iso-2022-jp").GetString (Convert.FromBase64String ("GyRAOjRGI0stGyhK"));
584                         Assert.AreEqual (expected, text, "#1");
585                 }
586                 #endregion
587
588                 #region Korean
589
590                 [Test]
591                 public void CP949_Encode ()
592                 {
593                         AssertEncode ("Test/texts/korean-utf8.txt", "Test/texts/korean-949.txt", 949);
594                 }
595
596                 [Test]
597                 public void CP949_Decode ()
598                 {
599                         AssertDecode ("Test/texts/korean-utf8.txt", "Test/texts/korean-949.txt", 949);
600                 }
601
602                 #endregion
603         }
604 }