5 // Hye-Shik Chang (perky@FreeBSD.org)
6 // Atsushi Enomoto <atsushi@ximian.com>
16 internal class CP949 : KoreanEncoding
18 // Magic number used by Windows for the UHC code page.
19 private const int UHC_CODE_PAGE = 949;
22 public CP949 () : base (UHC_CODE_PAGE, true)
26 // Get the mail body name for this encoding.
27 public override String BodyName
29 get { return "ks_c_5601-1987"; }
32 // Get the human-readable name for this encoding.
33 public override String EncodingName
35 get { return "Korean (UHC)"; }
38 // Get the mail agent header name for this encoding.
39 public override String HeaderName
41 get { return "ks_c_5601-1987"; }
44 // Get the IANA-preferred Web name for this encoding.
45 public override String WebName
47 get { return "ks_c_5601-1987"; }
51 // Get the Windows code page represented by this object.
52 public override int WindowsCodePage
54 get { return UHC_PAGE; }
60 internal class CP51949 : KoreanEncoding
62 // Magic number used by Windows for the euc-kr code page.
63 private const int EUCKR_CODE_PAGE = 51949;
66 public CP51949 () : base (EUCKR_CODE_PAGE, false)
70 // Get the mail body name for this encoding.
71 public override String BodyName
73 get { return "euc-kr"; }
76 // Get the human-readable name for this encoding.
77 public override String EncodingName
79 get { return "Korean (EUC)"; }
82 // Get the mail agent header name for this encoding.
83 public override String HeaderName
85 get { return "euc-kr"; }
88 // Get the IANA-preferred Web name for this encoding.
89 public override String WebName
91 get { return "euc-kr"; }
95 // Get the Windows code page represented by this object.
96 public override int WindowsCodePage
98 get { return UHC_PAGE; }
105 internal class KoreanEncoding : DbcsEncoding
108 public KoreanEncoding (int codepage, bool useUHC)
109 : base (codepage, 949) {
110 this.useUHC = useUHC;
113 internal override DbcsConvert GetConvert ()
115 return DbcsConvert.KS;
120 // Get the bytes that result from encoding a character buffer.
121 public unsafe override int GetByteCountImpl (char* chars, int count)
125 DbcsConvert convert = GetConvert ();
128 while (count-- > 0) {
129 char c = chars[index++];
130 if (c <= 0x80 || c == 0xFF) { // ASCII
134 byte b1 = convert.u2n[((int)c) * 2];
135 byte b2 = convert.u2n[((int)c) * 2 + 1];
136 if (b1 == 0 && b2 == 0) {
138 // FIXME: handle fallback for GetByteCountImpl().
150 // Get the bytes that result from encoding a character buffer.
151 public unsafe override int GetBytesImpl (char* chars, int charCount,
152 byte* bytes, int byteCount)
156 DbcsConvert convert = GetConvert ();
158 EncoderFallbackBuffer buffer = null;
162 int origIndex = byteIndex;
163 while (charCount-- > 0) {
164 char c = chars[charIndex++];
165 if (c <= 0x80 || c == 0xFF) { // ASCII
166 bytes[byteIndex++] = (byte)c;
169 byte b1 = convert.u2n[((int)c) * 2];
170 byte b2 = convert.u2n[((int)c) * 2 + 1];
171 if (b1 == 0 && b2 == 0) {
173 HandleFallback (ref buffer, chars, ref charIndex, ref charCount,
174 bytes, ref byteIndex, ref byteCount);
176 bytes[byteIndex++] = (byte)'?';
179 bytes[byteIndex++] = b1;
180 bytes[byteIndex++] = b2;
183 return byteIndex - origIndex;
186 // Get the characters that result from decoding a byte buffer.
187 public override int GetCharCount (byte[] bytes, int index, int count)
189 return GetDecoder ().GetCharCount (bytes, index, count);
192 // Get the characters that result from decoding a byte buffer.
193 public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
194 char[] chars, int charIndex)
196 return GetDecoder ().GetChars (bytes, byteIndex, byteCount, chars, charIndex);
199 // Get a decoder that handles a rolling UHC state.
200 public override Decoder GetDecoder()
202 return new KoreanDecoder (GetConvert (), useUHC);
205 // Decoder that handles a rolling UHC state.
206 private sealed class KoreanDecoder : DbcsDecoder
209 public KoreanDecoder (DbcsConvert convert, bool useUHC)
212 this.useUHC = useUHC;
215 int last_byte_count, last_byte_conv;
217 public override int GetCharCount (byte[] bytes, int index, int count)
219 return GetCharCount (bytes, index, count, false);
225 int GetCharCount (byte [] bytes, int index, int count, bool refresh)
227 CheckRange (bytes, index, count);
229 int lastByte = last_byte_count;
232 while (count-- > 0) {
233 int b = bytes[index++];
235 if (b <= 0x80 || b == 0xFF) { // ASCII
245 if (useUHC && lastByte < 0xa1) { // UHC Level 1
246 int ord = 8836 + (lastByte - 0x81) * 178;
248 if (b >= 0x41 && b <= 0x5A)
250 else if (b >= 0x61 && b <= 0x7A)
251 ord += b - 0x61 + 26;
252 else if (b >= 0x81 && b <= 0xFE)
253 ord += b - 0x81 + 52;
257 if (ord >= 0 && ord * 2 <= convert.n2u.Length)
258 c1 = (char)(convert.n2u[ord*2] +
259 convert.n2u[ord*2 + 1] * 256);
262 } else if (useUHC && lastByte <= 0xC6 && b < 0xA1) { // UHC Level 2
263 int ord = 14532 + (lastByte - 0xA1) * 84;
265 if (b >= 0x41 && b <= 0x5A)
267 else if (b >= 0x61 && b <= 0x7A)
268 ord += b - 0x61 + 26;
269 else if (b >= 0x81 && b <= 0xA0)
270 ord += b - 0x81 + 52;
274 if (ord >= 0 && ord * 2 <= convert.n2u.Length)
275 c1 = (char)(convert.n2u[ord*2] +
276 convert.n2u[ord*2 + 1] * 256);
279 } else if (b >= 0xA1 && b <= 0xFE) { // KS X 1001
280 int ord = ((lastByte - 0xA1) * 94 + b - 0xA1) * 2;
282 c1 = ord < 0 || ord >= convert.n2u.Length ?
283 '\0' : (char)(convert.n2u[ord] +
284 convert.n2u[ord + 1] * 256);
303 last_byte_count = lastByte;
308 public override int GetChars(byte[] bytes, int byteIndex,
309 int byteCount, char[] chars, int charIndex)
311 return GetChars (bytes, byteIndex, byteCount, chars, charIndex, false);
317 int GetChars(byte[] bytes, int byteIndex,
318 int byteCount, char[] chars, int charIndex, bool refresh)
320 CheckRange (bytes, byteIndex, byteCount, chars, charIndex);
321 int origIndex = charIndex;
322 int lastByte = last_byte_conv;
324 while (byteCount-- > 0) {
325 int b = bytes[byteIndex++];
327 if (b <= 0x80 || b == 0xFF) { // ASCII
328 chars[charIndex++] = (char)b;
337 if (useUHC && lastByte < 0xa1) { // UHC Level 1
338 int ord = 8836 + (lastByte - 0x81) * 178;
340 if (b >= 0x41 && b <= 0x5A)
342 else if (b >= 0x61 && b <= 0x7A)
343 ord += b - 0x61 + 26;
344 else if (b >= 0x81 && b <= 0xFE)
345 ord += b - 0x81 + 52;
349 if (ord >= 0 && ord * 2 <= convert.n2u.Length)
350 c1 = (char)(convert.n2u[ord*2] +
351 convert.n2u[ord*2 + 1] * 256);
354 } else if (useUHC && lastByte <= 0xC6 && b < 0xA1) { // UHC Level 2
355 int ord = 14532 + (lastByte - 0xA1) * 84;
357 if (b >= 0x41 && b <= 0x5A)
359 else if (b >= 0x61 && b <= 0x7A)
360 ord += b - 0x61 + 26;
361 else if (b >= 0x81 && b <= 0xA0)
362 ord += b - 0x81 + 52;
366 if (ord >= 0 && ord * 2 <= convert.n2u.Length)
367 c1 = (char)(convert.n2u[ord*2] +
368 convert.n2u[ord*2 + 1] * 256);
371 } else if (b >= 0xA1 && b <= 0xFE) { // KS X 1001
372 int ord = ((lastByte - 0xA1) * 94 + b - 0xA1) * 2;
374 c1 = ord < 0 || ord >= convert.n2u.Length ?
375 '\0' : (char)(convert.n2u[ord] +
376 convert.n2u[ord + 1] * 256);
381 chars[charIndex++] = '?';
383 chars[charIndex++] = c1;
389 chars[charIndex++] = '?';
393 last_byte_conv = lastByte;
395 return charIndex - origIndex;
401 internal class ENCuhc : CP949
407 internal class ENCeuc_kr: CP51949
409 public ENCeuc_kr() {}