New test.
[mono.git] / mcs / class / I18N / CJK / CP950.cs
1 //
2 // I18N.CJK.CP950
3 //
4 // Author:
5 //   Alan Tam Siu Lung (Tam@SiuLung.com)
6 //   Atsushi Enomoto  <atsushi@ximian.com>
7 //
8
9 using System;
10 using System.Text;
11 using I18N.Common;
12
13 namespace I18N.CJK
14 {
15         [Serializable]
16         internal class CP950 : DbcsEncoding
17         {
18                 // Magic number used by Windows for the Big5 code page.
19                 private const int BIG5_CODE_PAGE = 950;
20                 
21                 // Constructor.
22                 public CP950() : base(BIG5_CODE_PAGE) {
23                 }
24
25                 internal override DbcsConvert GetConvert ()
26                 {
27                         return DbcsConvert.Big5;
28                 }
29
30                 // Get the bytes that result from encoding a character buffer.
31                 public unsafe override int GetByteCountImpl (char* chars, int count)
32                 {
33                         DbcsConvert convert = GetConvert ();
34                         int index = 0;
35                         int length = 0;
36 #if NET_2_0
37                         EncoderFallbackBuffer buffer = null;
38 #endif
39
40                         while (count-- > 0) {
41                                 char c = chars[index++];
42                                 if (c <= 0x80 || c == 0xFF) { // ASCII
43                                         length++;
44                                         continue;
45                                 }
46                                 byte b1 = convert.u2n[((int)c) * 2 + 1];
47                                 byte b2 = convert.u2n[((int)c) * 2];
48                                 if (b1 == 0 && b2 == 0) {
49 #if NET_2_0
50                                         // FIXME: handle fallback for GetByteCountImpl().
51                                         length++;
52 #else
53                                         length++;
54 #endif
55                                 }
56                                 else
57                                         length += 2;
58                         }
59                         return length;
60                 }
61
62                 // Get the bytes that result from encoding a character buffer.
63                 public unsafe override int GetBytesImpl (char* chars, int charCount,
64                                              byte* bytes, int byteCount)
65                 {
66                         DbcsConvert convert = GetConvert ();
67                         int charIndex = 0;
68                         int byteIndex = 0;
69 #if NET_2_0
70                         EncoderFallbackBuffer buffer = null;
71 #endif
72
73                         int origIndex = byteIndex;
74                         while (charCount-- > 0) {
75                                 char c = chars[charIndex++];
76                                 if (c <= 0x80 || c == 0xFF) { // ASCII
77                                         bytes[byteIndex++] = (byte)c;
78                                         continue;
79                                 }
80                                 byte b1 = convert.u2n[((int)c) * 2 + 1];
81                                 byte b2 = convert.u2n[((int)c) * 2];
82                                 if (b1 == 0 && b2 == 0) {
83 #if NET_2_0
84                                         HandleFallback (ref buffer, chars,
85                                                 ref charIndex, ref charCount,
86                                                 bytes, ref byteIndex, ref byteCount);
87 #else
88                                         bytes[byteIndex++] = (byte)'?';
89 #endif
90                                 } else {
91                                         bytes[byteIndex++] = b1;
92                                         bytes[byteIndex++] = b2;
93                                 }
94                         }
95                         return byteIndex - origIndex;
96                 }
97                 
98                 // Get the characters that result from decoding a byte buffer.
99                 public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
100                                              char[] chars, int charIndex)
101                 {
102                         /*
103                         DbcsConvert convert = GetConvert ();
104                         // A1 40 - FA FF
105                         base.GetChars(bytes, byteIndex, byteCount, chars, charIndex);
106                         int origIndex = charIndex;
107                         int lastByte = 0;
108                         while (byteCount-- > 0) {
109                                 int b = bytes[byteIndex++];
110                                 if (lastByte == 0) {
111                                         if (b <= 0x80 || b == 0xFF) { // ASCII
112                                                 chars[charIndex++] = (char)b;
113                                         } else if (b < 0xA1 || b >= 0xFA) {
114                                                 // incorrect first byte.
115                                                 chars[charIndex++] = '?';
116                                                 byteCount--; // cut one more byte.
117                                         } else {
118                                                 lastByte = b;
119                                         }
120                                         continue;
121                                 }
122                                 int ord = ((lastByte - 0xA1) * 191 + b - 0x40) * 2;
123                                 char c1 = ord < 0 || ord > convert.n2u.Length ?
124                                         '\0' :
125                                         (char)(convert.n2u[ord] + convert.n2u[ord + 1] * 256);
126                                 if (c1 == 0)
127                                         chars[charIndex++] = '?';
128                                 else
129                                         chars[charIndex++] = c1;
130                                 lastByte = 0;
131                         }
132                         if (lastByte != 0)
133                                 chars[charIndex++] = '?';
134
135                         return charIndex - origIndex;
136                         */
137
138                         return GetDecoder ().GetChars (bytes, byteIndex, byteCount, chars, charIndex);
139                 }
140                 
141                 // Get a decoder that handles a rolling Big5 state.
142                 public override Decoder GetDecoder()
143                 {
144                         return new CP950Decoder(GetConvert ());
145                 }
146                 
147                 // Get the mail body name for this encoding.
148                 public override String BodyName
149                 {
150                         get { return "big5"; }
151                 }
152                 
153                 // Get the human-readable name for this encoding.
154                 public override String EncodingName
155                 {
156                         get { return "Chinese Traditional (Big5)"; }
157                 }
158                 
159                 // Get the mail agent header name for this encoding.
160                 public override String HeaderName
161                 {
162                         get { return "big5"; }
163                 }
164                 
165                 // Get the IANA-preferred Web name for this encoding.
166                 public override String WebName
167                 {
168                         get { return "big5"; }
169                 }
170                 
171                 /*
172                 // Get the Windows code page represented by this object.
173                 public override int WindowsCodePage
174                 {
175                         get { return BIG5_PAGE; }
176                 }
177                 */
178                 
179                 // Decoder that handles a rolling Big5 state.
180                 private sealed class CP950Decoder : DbcsDecoder
181                 {
182                         // Constructor.
183                         public CP950Decoder(DbcsConvert convert) : base(convert) {}
184                         int last_byte_count, last_byte_conv;
185
186                         public override int GetCharCount (byte[] bytes, int index, int count)
187                         {
188                                 return GetCharCount (bytes, index, count, false);
189                         }
190
191 #if NET_2_0
192                         public override
193 #endif
194                         int GetCharCount (byte[] bytes, int index, int count, bool refresh)
195                         {
196                                 CheckRange (bytes, index, count);
197
198                                 int lastByte = last_byte_count;
199                                 last_byte_count = 0;
200                                 int length = 0;
201                                 while (count-- > 0) {
202                                         int b = bytes[index++];
203                                         if (lastByte == 0) {
204                                                 if (b <= 0x80 || b == 0xFF) { // ASCII
205                                                         length++;
206                                                 } else if (b < 0xA1 || b >= 0xFA) {
207                                                         // incorrect first byte.
208                                                         length++;
209                                                         count--; // cut one more byte.
210                                                 } else {
211                                                         lastByte = b;
212                                                 }
213                                                 continue;
214                                         }
215                                         int ord = ((lastByte - 0xA1) * 191 + b - 0x40) * 2;
216                                         char c1 = ord < 0 || ord > convert.n2u.Length ?
217                                                 '\0' :
218                                                 (char)(convert.n2u[ord] + convert.n2u[ord + 1] * 256);
219                                         if (c1 == 0)
220                                                 // FIXME: fallback
221                                                 length++;
222                                         else
223                                                 length++;
224                                         lastByte = 0;
225                                 }
226
227                                 if (lastByte != 0) {
228                                         if (refresh)
229                                                 // FIXME: fallback
230                                                 length++;
231                                         else
232                                                 last_byte_count = lastByte;
233                                 }
234                                 return length;
235                         }
236
237                         public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
238                                                      char[] chars, int charIndex)
239                         {
240                                 return GetChars (bytes, byteIndex, byteCount, chars, charIndex, false);
241                         }
242
243 #if NET_2_0
244                         public override
245 #endif
246                         int GetChars(byte[] bytes, int byteIndex, int byteCount,
247                                                      char[] chars, int charIndex, bool refresh)
248                         {
249                                 CheckRange (bytes, byteIndex, byteCount, chars, charIndex);
250
251                                 int origIndex = charIndex;
252                                 int lastByte = last_byte_conv;
253                                 last_byte_conv = 0;
254                                 while (byteCount-- > 0) {
255                                         int b = bytes[byteIndex++];
256                                         if (lastByte == 0) {
257                                                 if (b <= 0x80 || b == 0xFF) { // ASCII
258                                                         chars[charIndex++] = (char)b;
259                                                 } else if (b < 0xA1 || b >= 0xFA) {
260                                                         // incorrect first byte.
261                                                         chars[charIndex++] = '?';
262                                                         byteCount--; // cut one more byte.
263                                                 } else {
264                                                         lastByte = b;
265                                                 }
266                                                 continue;
267                                         }
268                                         int ord = ((lastByte - 0xA1) * 191 + b - 0x40) * 2;
269                                         char c1 = ord < 0 || ord > convert.n2u.Length ?
270                                                 '\0' :
271                                                 (char)(convert.n2u[ord] + convert.n2u[ord + 1] * 256);
272                                         if (c1 == 0)
273                                                 chars[charIndex++] = '?';
274                                         else
275                                                 chars[charIndex++] = c1;
276                                         lastByte = 0;
277                                 }
278
279                                 if (lastByte != 0) {
280                                         if (refresh)
281                                                 chars [charIndex++] = '?';
282                                         else
283                                                 last_byte_conv = lastByte;
284                                 }
285                                 return charIndex - origIndex;
286                         }
287                 }
288         }
289         
290         [Serializable]
291         internal class ENCbig5 : CP950
292         {
293                 public ENCbig5() {}
294         }
295 }