2006-01-23 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / I18N / CJK / CP950.cs
1 //
2 // I18N.CJK.CP950
3 //
4 // Author:
5 //   Alan Tam Siu Lung (Tam@SiuLung.com)
6 //
7
8 using System;
9 using System.Text;
10 using I18N.Common;
11
12 namespace I18N.CJK
13 {
14         [Serializable]
15         internal class CP950 : DbcsEncoding
16         {
17                 // Magic number used by Windows for the Big5 code page.
18                 private const int BIG5_CODE_PAGE = 950;
19                 
20                 // Constructor.
21                 public CP950() : base(BIG5_CODE_PAGE) {
22                 }
23
24                 internal override DbcsConvert GetConvert ()
25                 {
26                         return Big5Convert.Convert;
27                 }
28
29                 // Get the bytes that result from encoding a character buffer.
30                 public unsafe override int GetBytesImpl (char* chars, int charCount,
31                                              byte* bytes, int byteCount)
32                 {
33                         DbcsConvert convert = GetConvert ();
34                         int charIndex = 0;
35                         int byteIndex = 0;
36 #if NET_2_0
37                         EncoderFallbackBuffer buffer = null;
38 #endif
39
40                         int origIndex = byteIndex;
41                         while (charCount-- > 0) {
42                                 char c = chars[charIndex++];
43                                 if (c <= 0x80 || c == 0xFF) { // ASCII
44                                         bytes[byteIndex++] = (byte)c;
45                                         continue;
46                                 }
47                                 byte b1 = convert.u2n[((int)c) * 2 + 1];
48                                 byte b2 = convert.u2n[((int)c) * 2];
49                                 if (b1 == 0 && b2 == 0) {
50 #if NET_2_0
51                                         HandleFallback (ref buffer, chars,
52                                                 ref charIndex, ref charCount,
53                                                 bytes, ref byteIndex, ref byteCount);
54 #else
55                                         bytes[byteIndex++] = (byte)'?';
56 #endif
57                                 } else {
58                                         bytes[byteIndex++] = b1;
59                                         bytes[byteIndex++] = b2;
60                                 }
61                         }
62                         return byteIndex - origIndex;
63                 }
64                 
65                 // Get the characters that result from decoding a byte buffer.
66                 public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
67                                              char[] chars, int charIndex)
68                 {
69                         /*
70                         DbcsConvert convert = GetConvert ();
71                         // A1 40 - FA FF
72                         base.GetChars(bytes, byteIndex, byteCount, chars, charIndex);
73                         int origIndex = charIndex;
74                         int lastByte = 0;
75                         while (byteCount-- > 0) {
76                                 int b = bytes[byteIndex++];
77                                 if (lastByte == 0) {
78                                         if (b <= 0x80 || b == 0xFF) { // ASCII
79                                                 chars[charIndex++] = (char)b;
80                                         } else if (b < 0xA1 || b >= 0xFA) {
81                                                 // incorrect first byte.
82                                                 chars[charIndex++] = '?';
83                                                 byteCount--; // cut one more byte.
84                                         } else {
85                                                 lastByte = b;
86                                         }
87                                         continue;
88                                 }
89                                 int ord = ((lastByte - 0xA1) * 191 + b - 0x40) * 2;
90                                 char c1 = ord < 0 || ord > convert.n2u.Length ?
91                                         '\0' :
92                                         (char)(convert.n2u[ord] + convert.n2u[ord + 1] * 256);
93                                 if (c1 == 0)
94                                         chars[charIndex++] = '?';
95                                 else
96                                         chars[charIndex++] = c1;
97                                 lastByte = 0;
98                         }
99                         if (lastByte != 0)
100                                 chars[charIndex++] = '?';
101
102                         return charIndex - origIndex;
103                         */
104
105                         return GetDecoder ().GetChars (bytes, byteIndex, byteCount, chars, charIndex);
106                 }
107                 
108                 // Get a decoder that handles a rolling Big5 state.
109                 public override Decoder GetDecoder()
110                 {
111                         return new CP950Decoder(GetConvert ());
112                 }
113                 
114                 // Get the mail body name for this encoding.
115                 public override String BodyName
116                 {
117                         get { return "big5"; }
118                 }
119                 
120                 // Get the human-readable name for this encoding.
121                 public override String EncodingName
122                 {
123                         get { return "Chinese Traditional (Big5)"; }
124                 }
125                 
126                 // Get the mail agent header name for this encoding.
127                 public override String HeaderName
128                 {
129                         get { return "big5"; }
130                 }
131                 
132                 // Get the IANA-preferred Web name for this encoding.
133                 public override String WebName
134                 {
135                         get { return "big5"; }
136                 }
137                 
138                 /*
139                 // Get the Windows code page represented by this object.
140                 public override int WindowsCodePage
141                 {
142                         get { return BIG5_PAGE; }
143                 }
144                 */
145                 
146                 // Decoder that handles a rolling Big5 state.
147                 private sealed class CP950Decoder : DbcsDecoder
148                 {
149                         // Constructor.
150                         public CP950Decoder(DbcsConvert convert) : base(convert) {}
151                         int last_byte_count, last_byte_conv;
152
153                         public override int GetCharCount (byte[] bytes, int index, int count)
154                         {
155                                 return GetCharCount (bytes, index, count, false);
156                         }
157
158 #if NET_2_0
159                         public override
160 #endif
161                         int GetCharCount (byte[] bytes, int index, int count, bool refresh)
162                         {
163                                 CheckRange (bytes, index, count);
164
165                                 int lastByte = last_byte_count;
166                                 last_byte_count = 0;
167                                 int length = 0;
168                                 while (count-- > 0) {
169                                         int b = bytes[index++];
170                                         if (lastByte == 0) {
171                                                 if (b <= 0x80 || b == 0xFF) { // ASCII
172                                                         length++;
173                                                 } else if (b < 0xA1 || b >= 0xFA) {
174                                                         // incorrect first byte.
175                                                         length++;
176                                                         count--; // cut one more byte.
177                                                 } else {
178                                                         lastByte = b;
179                                                 }
180                                                 continue;
181                                         }
182                                         int ord = ((lastByte - 0xA1) * 191 + b - 0x40) * 2;
183                                         char c1 = ord < 0 || ord > convert.n2u.Length ?
184                                                 '\0' :
185                                                 (char)(convert.n2u[ord] + convert.n2u[ord + 1] * 256);
186                                         if (c1 == 0)
187                                                 // FIXME: fallback
188                                                 length++;
189                                         else
190                                                 length++;
191                                         lastByte = 0;
192                                 }
193
194                                 if (lastByte != 0) {
195                                         if (refresh)
196                                                 // FIXME: fallback
197                                                 length++;
198                                         else
199                                                 last_byte_count = lastByte;
200                                 }
201                                 return length;
202                         }
203
204                         public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
205                                                      char[] chars, int charIndex)
206                         {
207                                 return GetChars (bytes, byteIndex, byteCount, chars, charIndex, false);
208                         }
209
210 #if NET_2_0
211                         public override
212 #endif
213                         int GetChars(byte[] bytes, int byteIndex, int byteCount,
214                                                      char[] chars, int charIndex, bool refresh)
215                         {
216                                 CheckRange (bytes, byteIndex, byteCount, chars, charIndex);
217
218                                 int origIndex = charIndex;
219                                 int lastByte = last_byte_conv;
220                                 last_byte_conv = 0;
221                                 while (byteCount-- > 0) {
222                                         int b = bytes[byteIndex++];
223                                         if (lastByte == 0) {
224                                                 if (b <= 0x80 || b == 0xFF) { // ASCII
225                                                         chars[charIndex++] = (char)b;
226                                                 } else if (b < 0xA1 || b >= 0xFA) {
227                                                         // incorrect first byte.
228                                                         chars[charIndex++] = '?';
229                                                         byteCount--; // cut one more byte.
230                                                 } else {
231                                                         lastByte = b;
232                                                 }
233                                                 continue;
234                                         }
235                                         int ord = ((lastByte - 0xA1) * 191 + b - 0x40) * 2;
236                                         char c1 = ord < 0 || ord > convert.n2u.Length ?
237                                                 '\0' :
238                                                 (char)(convert.n2u[ord] + convert.n2u[ord + 1] * 256);
239                                         if (c1 == 0)
240                                                 chars[charIndex++] = '?';
241                                         else
242                                                 chars[charIndex++] = c1;
243                                         lastByte = 0;
244                                 }
245
246                                 if (lastByte != 0) {
247                                         if (refresh)
248                                                 chars [charIndex++] = '?';
249                                         else
250                                                 last_byte_conv = lastByte;
251                                 }
252                                 return charIndex - origIndex;
253                         }
254                 }
255         }
256         
257         [Serializable]
258         internal class ENCbig5 : CP950
259         {
260                 public ENCbig5() {}
261         }
262 }