New test.
[mono.git] / mcs / class / I18N / CJK / Test / I18N.CJK.Test.cs
1 //
2 // I18N.CJK.Test.cs
3 //
4 // Author:
5 //      Atsushi Enomoto  <atsushi@ximian.com>
6 //
7 // Copyright (C) 2005 Novell, Inc.  http://www.novell.com
8 //
9
10 using System;
11 using System.IO;
12 using System.Text;
13 using NUnit.Framework;
14
15 namespace MonoTests.I18N.CJK
16 {
17         [TestFixture]
18         public class TestCJK
19         {
20                 void AssertEncode (string utf8file, string decfile, int codepage)
21                 {
22                         string decoded = null;
23                         byte [] encoded = null;
24                         using (StreamReader sr = new StreamReader (utf8file,
25                                 Encoding.UTF8)) {
26                                 decoded = sr.ReadToEnd ();
27                         }
28                         using (FileStream fs = File.OpenRead (decfile)) {
29                                 encoded = new byte [fs.Length];
30                                 fs.Read (encoded, 0, (int) fs.Length);
31                         }
32                         Encoding enc = Encoding.GetEncoding (codepage);
33                         byte [] actual;
34
35                         // simple string case
36                         //Assert.AreEqual (encoded.Length,
37                         //      enc.GetByteCount (decoded),
38                         //      "GetByteCount(string)");
39                         actual = enc.GetBytes (decoded);
40                         Assert.AreEqual (encoded, actual,
41                                 "GetBytes(string)");
42
43                         // simple char[] case
44                         Assert.AreEqual (encoded.Length,
45                                 enc.GetByteCount (decoded.ToCharArray (), 0, decoded.Length),
46                                 "GetByteCount(char[], 0, len)");
47                         actual = enc.GetBytes (decoded.ToCharArray (), 0, decoded.Length);
48                         Assert.AreEqual (encoded, actual,
49                                 "GetBytes(char[], 0, len)");
50                 }
51
52                 void AssertDecode (string utf8file, string decfile, int codepage)
53                 {
54                         string decoded = null;
55                         byte [] encoded = null;
56                         using (StreamReader sr = new StreamReader (utf8file,
57                                 Encoding.UTF8)) {
58                                 decoded = sr.ReadToEnd ();
59                         }
60                         using (FileStream fs = File.OpenRead (decfile)) {
61                                 encoded = new byte [fs.Length];
62                                 fs.Read (encoded, 0, (int) fs.Length);
63                         }
64                         Encoding enc = Encoding.GetEncoding (codepage);
65                         char [] actual;
66
67                         Assert.AreEqual (decoded.Length,
68                                 enc.GetCharCount (encoded, 0, encoded.Length),
69                                 "GetCharCount(byte[], 0, len)");
70                         actual = enc.GetChars (encoded, 0, encoded.Length);
71                         Assert.AreEqual (decoded.ToCharArray (), actual,
72                                 "GetChars(byte[], 0, len)");
73                 }
74
75                 #region Chinese
76
77                 // GB2312
78
79                 [Test]
80                 public void CP936_Encode ()
81                 {
82                         AssertEncode ("Test/texts/chinese-utf8.txt", "Test/texts/chinese-936.txt", 936);
83                 }
84
85                 [Test]
86                 public void CP936_Decode ()
87                 {
88                         AssertDecode ("Test/texts/chinese-utf8.txt", "Test/texts/chinese-936.txt", 936);
89                 }
90
91                 // BIG5
92
93                 [Test]
94                 public void CP950_Encode ()
95                 {
96                         AssertEncode ("Test/texts/chinese2-utf8.txt", "Test/texts/chinese2-950.txt", 950);
97                 }
98
99                 [Test]
100                 public void CP950_Decode ()
101                 {
102                         AssertDecode ("Test/texts/chinese2-utf8.txt", "Test/texts/chinese2-950.txt", 950);
103                 }
104
105                 // GB18030
106
107                 [Test]
108                 public void CP54936_Encode ()
109                 {
110                         AssertEncode ("Test/texts/chinese-utf8.txt", "Test/texts/chinese-54936.txt", 54936);
111                 }
112
113                 [Test]
114                 public void CP54936_Decode ()
115                 {
116                         AssertDecode ("Test/texts/chinese-utf8.txt", "Test/texts/chinese-54936.txt", 54936);
117                 }
118
119                 #endregion
120
121                 #region Japanese
122
123                 // Shift_JIS
124
125                 [Test]
126                 public void CP932_Encode ()
127                 {
128                         AssertEncode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-932.txt", 932);
129                 }
130
131                 [Test]
132                 public void CP932_Decode ()
133                 {
134                         AssertDecode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-932.txt", 932);
135                 }
136
137                 // EUC-JP
138
139                 [Test]
140                 public void CP51932_Encode ()
141                 {
142                         AssertEncode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-51932.txt", 51932);
143                 }
144
145                 [Test]
146                 public void CP51932_Decode ()
147                 {
148                         AssertDecode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-51932.txt", 51932);
149                 }
150
151                 // ISO-2022-JP
152
153                 [Test]
154                 public void CP50220_Encode ()
155                 {
156                         AssertEncode ("Test/texts/japanese2-utf8.txt", "Test/texts/japanese2-50220.txt", 50220);
157                 }
158
159                 [Test]
160                 public void CP50220_Decode ()
161                 {
162                         AssertDecode ("Test/texts/japanese2-utf8.txt", "Test/texts/japanese2-50220.txt", 50220);
163                 }
164
165                 [Test]
166                 public void CP50221_Encode ()
167                 {
168                         AssertEncode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-50221.txt", 50221);
169                 }
170
171                 [Test]
172                 public void CP50221_Decode ()
173                 {
174                         AssertDecode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-50221.txt", 50221);
175                 }
176
177                 [Test]
178 #if !NET_2_0
179                 [Category ("NotDotNet")] // MS is buggy here
180 #endif
181                 public void CP50222_Encode ()
182                 {
183                         AssertEncode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-50222.txt", 50222);
184                 }
185
186                 [Test]
187 #if !NET_2_0
188                 [Category ("NotDotNet")] // MS is buggy here
189 #endif
190                 public void CP50222_Decode ()
191                 {
192                         AssertDecode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-50222.txt", 50222);
193                 }
194
195                 [Test]
196 #if !NET_2_0
197                 [Category ("NotDotNet")] // MS bug
198 #endif
199                 public void Bug77723 ()
200                 {
201                         GetBytesAllSingleChars (51932);
202                 }
203
204                 [Test]
205                 public void Bug77724 ()
206                 {
207                         GetBytesAllSingleChars (932);
208                 }
209
210                 [Test]
211                 public void Bug77307 ()
212                 {
213                         GetBytesAllSingleChars (54936);
214                 }
215
216                 void GetBytesAllSingleChars (int enc)
217                 {
218                         Encoding e = Encoding.GetEncoding (enc);
219                         for (int i = 0; i < 0x10000; i++)
220                                 e.GetBytes (new char [] { (char)i });
221                 }
222
223                 void GetCharsAllBytePairs (int enc)
224                 {
225                         Encoding e = Encoding.GetEncoding (enc);
226                         byte [] bytes = new byte [2];
227                         for (int i0 = 0; i0 < 0x100; i0++) {
228                                 bytes [0] = (byte) i0;
229                                 for (int i1 = 0; i1 < 0x100; i1++) {
230                                         bytes [1] = (byte) i1;
231                                         e.GetChars (bytes);
232                                 }
233                         }
234                 }
235
236                 [Test]
237                 public void Bug77222 ()
238                 {
239                         GetCharsAllBytePairs (51932);
240                 }
241
242                 [Test]
243                 public void Bug77238 ()
244                 {
245                         GetCharsAllBytePairs (936);
246                 }
247
248                 [Test]
249                 public void Bug77306 ()
250                 {
251                         GetCharsAllBytePairs (54936);
252                 }
253
254                 [Test]
255                 public void Bug77298 ()
256                 {
257                         GetCharsAllBytePairs (949);
258                 }
259
260                 [Test]
261                 public void Bug77274 ()
262                 {
263                         GetCharsAllBytePairs (950);
264                 }
265
266                 [Test]
267 #if !NET_2_0
268                 [Category ("NotDotNet")] // MS bug
269 #endif
270                 public void Encoder54936Refresh ()
271                 {
272                         Encoding e = Encoding.GetEncoding ("gb18030");
273                         Encoder d = e.GetEncoder ();
274                         byte [] bytes;
275
276                         bytes = new byte [4];
277                         Assert.AreEqual (0, d.GetBytes (new char [] {'\uD800'}, 0, 1, bytes, 0, false), "#1");
278                         Assert.AreEqual (new byte [] {00, 00, 00, 00},
279                                 bytes, "#2");
280
281                         bytes = new byte [4];
282                         Assert.AreEqual (4, d.GetBytes (new char [] {'\uDC00'}, 0, 1, bytes, 0, true), "#3");
283                         Assert.AreEqual (new byte [] {0x90, 0x30, 0x81, 0x30},
284                                 bytes, "#4");
285
286                         bytes = new byte [4];
287                         Assert.AreEqual (1, d.GetBytes (new char [] {'\uD800'}, 0, 1, bytes, 0, true), "#5");
288                         Assert.AreEqual (new byte [] {0x3F, 00, 00, 00},
289                                 bytes, "#6");
290                 }
291
292 #if NET_2_0
293                 [Test]
294                 public void Decoder932Refresh ()
295                 {
296                         Encoding e = Encoding.GetEncoding (932);
297                         Decoder d = e.GetDecoder ();
298                         char [] chars;
299
300                         chars = new char [1];
301                         Assert.AreEqual (0, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, false), "#1");
302                         Assert.AreEqual (new char [] {'\0'}, chars, "#2");
303
304                         chars = new char [1];
305                         Assert.AreEqual (1, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, true), "#3");
306                         Assert.AreEqual (new char [] {'\uFF1D'}, chars, "#4");
307
308                         chars = new char [1];
309                         Assert.AreEqual (1, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, true), "#5");
310                         Assert.AreEqual (new char [] {'\u30FB'}, chars, "#6");
311                 }
312
313                 [Test]
314                 public void Decoder51932Refresh ()
315                 {
316                         Encoding e = Encoding.GetEncoding (51932);
317                         Decoder d = e.GetDecoder ();
318                         char [] chars;
319
320                         // invalid one
321                         chars = new char [1];
322                         Assert.AreEqual (1, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, false), "#0.1");
323                         Assert.AreEqual (new char [] {'\u30FB'}, chars, "#0.2");
324
325                         // incomplete
326                         chars = new char [1];
327                         Assert.AreEqual (0, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0, false), "#1");
328                         Assert.AreEqual (new char [] {'\0'}, chars, "#2");
329
330                         // became complete
331                         chars = new char [1];
332                         Assert.AreEqual (1, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0, true), "#3");
333                         Assert.AreEqual (new char [] {'\u3000'}, chars, "#4");
334
335                         // incomplete but refreshed
336                         chars = new char [1];
337                         Assert.AreEqual (1, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0, true), "#5");
338                         Assert.AreEqual (new char [] {'\u30FB'}, chars, "#6");
339                 }
340
341                 [Test]
342                 public void Decoder936Refresh ()
343                 {
344                         Encoding e = Encoding.GetEncoding (936);
345                         Decoder d = e.GetDecoder ();
346                         char [] chars;
347
348                         // incomplete
349                         chars = new char [1];
350                         Assert.AreEqual (0, d.GetChars (new byte [] {0xB0}, 0, 1, chars, 0, false), "#1");
351                         Assert.AreEqual (new char [] {'\0'}, chars, "#2");
352
353                         // became complete
354                         chars = new char [1];
355                         Assert.AreEqual (1, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0, false), "#3");
356                         Assert.AreEqual (new char [] {'\u554A'}, chars, "#4");
357
358                         // incomplete but refreshed
359                         chars = new char [1];
360                         Assert.AreEqual (1, d.GetChars (new byte [] {0xB0}, 0, 1, chars, 0, true), "#5");
361                         Assert.AreEqual (new char [] {'?'}, chars, "#6");
362                 }
363
364                 [Test]
365                 public void Decoder949Refresh ()
366                 {
367                         Encoding e = Encoding.GetEncoding (949);
368                         Decoder d = e.GetDecoder ();
369                         char [] chars;
370
371                         // incomplete
372                         chars = new char [1];
373                         Assert.AreEqual (0, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, false), "#1");
374                         Assert.AreEqual (new char [] {'\0'}, chars, "#2");
375
376                         // became complete
377                         chars = new char [1];
378                         Assert.AreEqual (1, d.GetChars (new byte [] {0x41}, 0, 1, chars, 0, false), "#3");
379                         Assert.AreEqual (new char [] {'\uAC02'}, chars, "#4");
380
381                         // incomplete but refreshed
382                         chars = new char [1];
383                         Assert.AreEqual (1, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, true), "#5");
384                         Assert.AreEqual (new char [] {'?'}, chars, "#6");
385                 }
386
387                 [Test]
388                 public void Decoder950Refresh ()
389                 {
390                         Encoding e = Encoding.GetEncoding (950);
391                         Decoder d = e.GetDecoder ();
392                         char [] chars;
393
394                         // incomplete
395                         chars = new char [1];
396                         Assert.AreEqual (0, d.GetChars (new byte [] {0xF9}, 0, 1, chars, 0, false), "#1");
397                         Assert.AreEqual (new char [] {'\0'}, chars, "#2");
398
399                         // became complete
400                         chars = new char [1];
401                         Assert.AreEqual (1, d.GetChars (new byte [] {0x40}, 0, 1, chars, 0, false), "#3");
402                         Assert.AreEqual (new char [] {'\u7E98'}, chars, "#4");
403
404                         // incomplete but refreshed
405                         chars = new char [1];
406                         Assert.AreEqual (1, d.GetChars (new byte [] {0xF9}, 0, 1, chars, 0, true), "#5");
407                         Assert.AreEqual (new char [] {'?'}, chars, "#6");
408                 }
409 #endif
410
411
412                 [Test]
413                 public void Decoder51932NoRefresh ()
414                 {
415                         Encoding e = Encoding.GetEncoding (51932);
416                         Decoder d = e.GetDecoder ();
417                         char [] chars;
418
419                         // incomplete
420                         chars = new char [1];
421                         Assert.AreEqual (0, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0), "#1");
422                         Assert.AreEqual (new char [] {'\0'}, chars, "#2");
423
424                         // became complete
425                         chars = new char [1];
426                         Assert.AreEqual (1, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0), "#3");
427                         Assert.AreEqual (new char [] {'\u3000'}, chars, "#4");
428
429                         // incomplete but refreshed
430                         chars = new char [1];
431                         Assert.AreEqual (0, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0), "#5");
432                         Assert.AreEqual (new char [] {'\0'}, chars, "#6");
433                 }
434
435                 [Test]
436                 public void Decoder936NoRefresh ()
437                 {
438                         Encoding e = Encoding.GetEncoding (936);
439                         Decoder d = e.GetDecoder ();
440                         char [] chars;
441
442                         // incomplete
443                         chars = new char [1];
444                         Assert.AreEqual (0, d.GetChars (new byte [] {0xB0}, 0, 1, chars, 0), "#1");
445                         Assert.AreEqual (new char [] {'\0'}, chars, "#2");
446
447                         // became complete
448                         chars = new char [1];
449                         Assert.AreEqual (1, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0), "#3");
450                         Assert.AreEqual (new char [] {'\u554A'}, chars, "#4");
451
452                         // incomplete but refreshed
453                         chars = new char [1];
454                         Assert.AreEqual (0, d.GetChars (new byte [] {0xB0}, 0, 1, chars, 0), "#5");
455                         Assert.AreEqual (new char [] {'\0'}, chars, "#6");
456                 }
457
458                 [Test]
459                 public void Decoder949NoRefresh ()
460                 {
461                         Encoding e = Encoding.GetEncoding (949);
462                         Decoder d = e.GetDecoder ();
463                         char [] chars;
464
465                         // incomplete
466                         chars = new char [1];
467                         Assert.AreEqual (0, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0), "#1");
468                         Assert.AreEqual (new char [] {'\0'}, chars, "#2");
469
470                         // became complete
471                         chars = new char [1];
472                         Assert.AreEqual (1, d.GetChars (new byte [] {0x41}, 0, 1, chars, 0), "#3");
473                         Assert.AreEqual (new char [] {'\uAC02'}, chars, "#4");
474
475                         // incomplete but refreshed
476                         chars = new char [1];
477                         Assert.AreEqual (0, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0), "#5");
478                         Assert.AreEqual (new char [] {'\0'}, chars, "#6");
479                 }
480
481                 [Test]
482                 public void Decoder950NoRefresh ()
483                 {
484                         Encoding e = Encoding.GetEncoding (950);
485                         Decoder d = e.GetDecoder ();
486                         char [] chars;
487
488                         // incomplete
489                         chars = new char [1];
490                         Assert.AreEqual (0, d.GetChars (new byte [] {0xF9}, 0, 1, chars, 0), "#1");
491                         Assert.AreEqual (new char [] {'\0'}, chars, "#2");
492
493                         // became complete
494                         chars = new char [1];
495                         Assert.AreEqual (1, d.GetChars (new byte [] {0x40}, 0, 1, chars, 0), "#3");
496                         Assert.AreEqual (new char [] {'\u7E98'}, chars, "#4");
497
498                         // incomplete but refreshed
499                         chars = new char [1];
500                         Assert.AreEqual (0, d.GetChars (new byte [] {0xF9}, 0, 1, chars, 0), "#5");
501                         Assert.AreEqual (new char [] {'\0'}, chars, "#6");
502                 }
503                 #endregion
504
505                 #region Korean
506
507                 [Test]
508                 public void CP949_Encode ()
509                 {
510                         AssertEncode ("Test/texts/korean-utf8.txt", "Test/texts/korean-949.txt", 949);
511                 }
512
513                 [Test]
514                 public void CP949_Decode ()
515                 {
516                         AssertDecode ("Test/texts/korean-utf8.txt", "Test/texts/korean-949.txt", 949);
517                 }
518
519                 #endregion
520         }
521 }