2006-01-20 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / I18N / CJK / Test / I18N.CJK.Test.cs
1 //
2 // I18N.CJK.Test.cs
3 //
4 // Author:
5 //      Atsushi Enomoto  <atsushi@ximian.com>
6 //
7 // Copyright (C) 2005 Novell, Inc.  http://www.novell.com
8 //
9
10 using System;
11 using System.IO;
12 using System.Text;
13 using NUnit.Framework;
14
15 namespace MonoTests.I18N.CJK
16 {
17         [TestFixture]
18         public class TestCJK
19         {
20                 void AssertEncode (string utf8file, string decfile, int codepage)
21                 {
22                         string decoded = null;
23                         byte [] encoded = null;
24                         using (StreamReader sr = new StreamReader (utf8file,
25                                 Encoding.UTF8)) {
26                                 decoded = sr.ReadToEnd ();
27                         }
28                         using (FileStream fs = File.OpenRead (decfile)) {
29                                 encoded = new byte [fs.Length];
30                                 fs.Read (encoded, 0, (int) fs.Length);
31                         }
32                         Encoding enc = Encoding.GetEncoding (codepage);
33                         byte [] actual;
34
35                         // simple string case
36                         //Assert.AreEqual (encoded.Length,
37                         //      enc.GetByteCount (decoded),
38                         //      "GetByteCount(string)");
39                         actual = enc.GetBytes (decoded);
40                         Assert.AreEqual (encoded, actual,
41                                 "GetBytes(string)");
42
43                         // simple char[] case
44                         Assert.AreEqual (encoded.Length,
45                                 enc.GetByteCount (decoded.ToCharArray (), 0, decoded.Length),
46                                 "GetByteCount(char[], 0, len)");
47                         actual = enc.GetBytes (decoded.ToCharArray (), 0, decoded.Length);
48                         Assert.AreEqual (encoded, actual,
49                                 "GetBytes(char[], 0, len)");
50                 }
51
52                 void AssertDecode (string utf8file, string decfile, int codepage)
53                 {
54                         string decoded = null;
55                         byte [] encoded = null;
56                         using (StreamReader sr = new StreamReader (utf8file,
57                                 Encoding.UTF8)) {
58                                 decoded = sr.ReadToEnd ();
59                         }
60                         using (FileStream fs = File.OpenRead (decfile)) {
61                                 encoded = new byte [fs.Length];
62                                 fs.Read (encoded, 0, (int) fs.Length);
63                         }
64                         Encoding enc = Encoding.GetEncoding (codepage);
65                         char [] actual;
66
67                         Assert.AreEqual (decoded.Length,
68                                 enc.GetCharCount (encoded, 0, encoded.Length),
69                                 "GetCharCount(byte[], 0, len)");
70                         actual = enc.GetChars (encoded, 0, encoded.Length);
71                         Assert.AreEqual (decoded.ToCharArray (), actual,
72                                 "GetChars(byte[], 0, len)");
73                 }
74
75                 #region Chinese
76
77                 // GB2312
78
79                 [Test]
80                 public void CP936_Encode ()
81                 {
82                         AssertEncode ("Test/texts/chinese-utf8.txt", "Test/texts/chinese-936.txt", 936);
83                 }
84
85                 [Test]
86                 public void CP936_Decode ()
87                 {
88                         AssertDecode ("Test/texts/chinese-utf8.txt", "Test/texts/chinese-936.txt", 936);
89                 }
90
91                 // BIG5
92
93                 [Test]
94                 public void CP950_Encode ()
95                 {
96                         AssertEncode ("Test/texts/chinese2-utf8.txt", "Test/texts/chinese2-950.txt", 950);
97                 }
98
99                 [Test]
100                 public void CP950_Decode ()
101                 {
102                         AssertDecode ("Test/texts/chinese2-utf8.txt", "Test/texts/chinese2-950.txt", 950);
103                 }
104
105                 // GB18030
106
107                 [Test]
108                 public void CP54936_Encode ()
109                 {
110                         AssertEncode ("Test/texts/chinese-utf8.txt", "Test/texts/chinese-54936.txt", 54936);
111                 }
112
113                 [Test]
114                 public void CP54936_Decode ()
115                 {
116                         AssertDecode ("Test/texts/chinese-utf8.txt", "Test/texts/chinese-54936.txt", 54936);
117                 }
118
119                 #endregion
120
121                 #region Japanese
122
123                 // Shift_JIS
124
125                 [Test]
126                 public void CP932_Encode ()
127                 {
128                         AssertEncode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-932.txt", 932);
129                 }
130
131                 [Test]
132                 public void CP932_Decode ()
133                 {
134                         AssertDecode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-932.txt", 932);
135                 }
136
137                 // EUC-JP
138
139                 [Test]
140                 public void CP51932_Encode ()
141                 {
142                         AssertEncode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-51932.txt", 51932);
143                 }
144
145                 [Test]
146                 public void CP51932_Decode ()
147                 {
148                         AssertDecode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-51932.txt", 51932);
149                 }
150
151                 // ISO-2022-JP
152
153                 [Test]
154                 public void CP50220_Encode ()
155                 {
156                         AssertEncode ("Test/texts/japanese2-utf8.txt", "Test/texts/japanese2-50220.txt", 50220);
157                 }
158
159                 [Test]
160                 public void CP50220_Decode ()
161                 {
162                         AssertDecode ("Test/texts/japanese2-utf8.txt", "Test/texts/japanese2-50220.txt", 50220);
163                 }
164
165                 [Test]
166                 public void CP50221_Encode ()
167                 {
168                         AssertEncode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-50221.txt", 50221);
169                 }
170
171                 [Test]
172                 public void CP50221_Decode ()
173                 {
174                         AssertDecode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-50221.txt", 50221);
175                 }
176
177                 [Test]
178 #if !NET_2_0
179                 [Category ("NotDotNet")] // MS is buggy here
180 #endif
181                 public void CP50222_Encode ()
182                 {
183                         AssertEncode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-50222.txt", 50222);
184                 }
185
186                 [Test]
187 #if !NET_2_0
188                 [Category ("NotDotNet")] // MS is buggy here
189 #endif
190                 public void CP50222_Decode ()
191                 {
192                         AssertDecode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-50222.txt", 50222);
193                 }
194
195                 [Test]
196 #if !NET_2_0
197                 [Category ("NotDotNet")] // MS bug
198 #endif
199                 public void Bug77723 ()
200                 {
201                         GetBytesAllSingleChars (51932);
202                 }
203
204                 [Test]
205                 public void Bug77724 ()
206                 {
207                         GetBytesAllSingleChars (932);
208                 }
209
210                 [Test]
211                 public void Bug77307 ()
212                 {
213                         GetBytesAllSingleChars (54936);
214                 }
215
216                 void GetBytesAllSingleChars (int enc)
217                 {
218                         Encoding e = Encoding.GetEncoding (enc);
219                         for (int i = 0; i < 0x10000; i++)
220                                 e.GetBytes (new char [] { (char)i });
221                 }
222
223                 void GetCharsAllBytePairs (int enc)
224                 {
225                         Encoding e = Encoding.GetEncoding (enc);
226                         byte [] bytes = new byte [2];
227                         for (int i0 = 0; i0 < 0x100; i0++) {
228                                 bytes [0] = (byte) i0;
229                                 for (int i1 = 0; i1 < 0x100; i1++) {
230                                         bytes [1] = (byte) i1;
231                                         e.GetChars (bytes);
232                                 }
233                         }
234                 }
235
236                 [Test]
237                 public void Bug77222 ()
238                 {
239                         GetCharsAllBytePairs (51932);
240                 }
241
242                 [Test]
243                 public void Bug77238 ()
244                 {
245                         GetCharsAllBytePairs (936);
246                 }
247
248                 [Test]
249                 public void Bug77306 ()
250                 {
251                         GetCharsAllBytePairs (54936);
252                 }
253
254                 [Test]
255                 public void Bug77298 ()
256                 {
257                         GetCharsAllBytePairs (949);
258                 }
259
260                 [Test]
261                 public void Bug77274 ()
262                 {
263                         GetCharsAllBytePairs (950);
264                 }
265
266                 [Test]
267 #if !NET_2_0
268                 [Category ("NotDotNet")] // MS bug
269 #endif
270                 public void Encoder54936Refresh ()
271                 {
272                         Encoding e = Encoding.GetEncoding ("gb18030");
273                         Encoder d = e.GetEncoder ();
274                         byte [] bytes;
275
276                         bytes = new byte [4];
277                         Assert.AreEqual (0, d.GetBytes (new char [] {'\uD800'}, 0, 1, bytes, 0, false), "#1");
278                         Assert.AreEqual (new byte [] {00, 00, 00, 00},
279                                 bytes, "#2");
280
281                         bytes = new byte [4];
282                         Assert.AreEqual (4, d.GetBytes (new char [] {'\uDC00'}, 0, 1, bytes, 0, true), "#3");
283                         Assert.AreEqual (new byte [] {0x90, 0x30, 0x81, 0x30},
284                                 bytes, "#4");
285
286                         bytes = new byte [4];
287                         Assert.AreEqual (1, d.GetBytes (new char [] {'\uD800'}, 0, 1, bytes, 0, true), "#5");
288                         Assert.AreEqual (new byte [] {0x3F, 00, 00, 00},
289                                 bytes, "#6");
290                 }
291
292 #if NET_2_0
293                 [Test]
294                 public void Decoder932Refresh ()
295                 {
296                         Encoding e = Encoding.GetEncoding (932);
297                         Decoder d = e.GetDecoder ();
298                         char [] chars;
299
300                         chars = new char [1];
301                         Assert.AreEqual (0, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, false), "#1");
302                         Assert.AreEqual (new char [] {'\0'}, chars, "#2");
303
304                         chars = new char [1];
305                         Assert.AreEqual (1, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, true), "#3");
306                         Assert.AreEqual (new char [] {'\uFF1D'}, chars, "#4");
307
308                         chars = new char [1];
309                         Assert.AreEqual (1, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, true), "#5");
310                         Assert.AreEqual (new char [] {'\u30FB'}, chars, "#6");
311                 }
312
313                 [Test]
314                 public void Decoder51932Refresh ()
315                 {
316                         Encoding e = Encoding.GetEncoding (51932);
317                         Decoder d = e.GetDecoder ();
318                         char [] chars;
319
320                         // invalid one
321                         chars = new char [1];
322                         Assert.AreEqual (1, d.GetChars (new byte [] {0x81}, 0, 1, chars, 0, false), "#0.1");
323                         Assert.AreEqual (new char [] {'\u30FB'}, chars, "#0.2");
324
325                         // incomplete
326                         chars = new char [1];
327                         Assert.AreEqual (0, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0, false), "#1");
328                         Assert.AreEqual (new char [] {'\0'}, chars, "#2");
329
330                         // became complete
331                         chars = new char [1];
332                         Assert.AreEqual (1, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0, true), "#3");
333                         Assert.AreEqual (new char [] {'\u3000'}, chars, "#4");
334
335                         // incomplete but refreshed
336                         chars = new char [1];
337                         Assert.AreEqual (1, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0, true), "#5");
338                         Assert.AreEqual (new char [] {'\u30FB'}, chars, "#6");
339                 }
340 #endif
341
342
343                 [Test]
344                 public void Decoder51932NoRefresh ()
345                 {
346                         Encoding e = Encoding.GetEncoding (51932);
347                         Decoder d = e.GetDecoder ();
348                         char [] chars;
349
350                         // incomplete
351                         chars = new char [1];
352                         Assert.AreEqual (0, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0), "#1");
353                         Assert.AreEqual (new char [] {'\0'}, chars, "#2");
354
355                         // became complete
356                         chars = new char [1];
357                         Assert.AreEqual (1, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0), "#3");
358                         Assert.AreEqual (new char [] {'\u3000'}, chars, "#4");
359
360                         // incomplete but refreshed
361                         chars = new char [1];
362                         Assert.AreEqual (0, d.GetChars (new byte [] {0xA1}, 0, 1, chars, 0), "#5");
363                         Assert.AreEqual (new char [] {'\0'}, chars, "#6");
364                 }
365                 #endregion
366
367                 #region Korean
368
369                 [Test]
370                 public void CP949_Encode ()
371                 {
372                         AssertEncode ("Test/texts/korean-utf8.txt", "Test/texts/korean-949.txt", 949);
373                 }
374
375                 [Test]
376                 public void CP949_Decode ()
377                 {
378                         AssertDecode ("Test/texts/korean-utf8.txt", "Test/texts/korean-949.txt", 949);
379                 }
380
381                 #endregion
382         }
383 }