5 // Hye-Shik Chang (perky@FreeBSD.org)
6 // Atsushi Enomoto <atsushi@ximian.com>
16 internal class CP949 : KoreanEncoding
18 // Magic number used by Windows for the UHC code page.
19 private const int UHC_CODE_PAGE = 949;
22 public CP949 () : base (UHC_CODE_PAGE, true)
26 // Get the mail body name for this encoding.
27 public override String BodyName
29 get { return "ks_c_5601-1987"; }
32 // Get the human-readable name for this encoding.
33 public override String EncodingName
35 get { return "Korean (UHC)"; }
38 // Get the mail agent header name for this encoding.
39 public override String HeaderName
41 get { return "ks_c_5601-1987"; }
44 // Get the IANA-preferred Web name for this encoding.
45 public override String WebName
47 get { return "ks_c_5601-1987"; }
51 // Get the Windows code page represented by this object.
52 public override int WindowsCodePage
54 get { return UHC_PAGE; }
60 internal class CP51949 : KoreanEncoding
62 // Magic number used by Windows for the euc-kr code page.
63 private const int EUCKR_CODE_PAGE = 51949;
66 public CP51949 () : base (EUCKR_CODE_PAGE, false)
70 // Get the mail body name for this encoding.
71 public override String BodyName
73 get { return "euc-kr"; }
76 // Get the human-readable name for this encoding.
77 public override String EncodingName
79 get { return "Korean (EUC)"; }
82 // Get the mail agent header name for this encoding.
83 public override String HeaderName
85 get { return "euc-kr"; }
88 // Get the IANA-preferred Web name for this encoding.
89 public override String WebName
91 get { return "euc-kr"; }
95 // Get the Windows code page represented by this object.
96 public override int WindowsCodePage
98 get { return UHC_PAGE; }
105 internal class KoreanEncoding : DbcsEncoding
108 public KoreanEncoding (int codepage, bool useUHC)
109 : base (codepage, 949) {
110 this.useUHC = useUHC;
113 internal override DbcsConvert GetConvert ()
115 return DbcsConvert.KS;
120 // Get the bytes that result from encoding a character buffer.
121 public unsafe override int GetByteCountImpl (char* chars, int count)
125 DbcsConvert convert = GetConvert ();
127 EncoderFallbackBuffer buffer = null;
131 while (count-- > 0) {
132 char c = chars[index++];
133 if (c <= 0x80 || c == 0xFF) { // ASCII
137 byte b1 = convert.u2n[((int)c) * 2];
138 byte b2 = convert.u2n[((int)c) * 2 + 1];
139 if (b1 == 0 && b2 == 0) {
141 // FIXME: handle fallback for GetByteCountImpl().
153 // Get the bytes that result from encoding a character buffer.
154 public unsafe override int GetBytesImpl (char* chars, int charCount,
155 byte* bytes, int byteCount)
159 DbcsConvert convert = GetConvert ();
161 EncoderFallbackBuffer buffer = null;
165 int origIndex = byteIndex;
166 while (charCount-- > 0) {
167 char c = chars[charIndex++];
168 if (c <= 0x80 || c == 0xFF) { // ASCII
169 bytes[byteIndex++] = (byte)c;
172 byte b1 = convert.u2n[((int)c) * 2];
173 byte b2 = convert.u2n[((int)c) * 2 + 1];
174 if (b1 == 0 && b2 == 0) {
176 HandleFallback (ref buffer, chars, ref charIndex, ref charCount,
177 bytes, ref byteIndex, ref byteCount);
179 bytes[byteIndex++] = (byte)'?';
182 bytes[byteIndex++] = b1;
183 bytes[byteIndex++] = b2;
186 return byteIndex - origIndex;
189 // Get the characters that result from decoding a byte buffer.
190 public override int GetCharCount (byte[] bytes, int index, int count)
192 return GetDecoder ().GetCharCount (bytes, index, count);
195 // Get the characters that result from decoding a byte buffer.
196 public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
197 char[] chars, int charIndex)
199 return GetDecoder ().GetChars (bytes, byteIndex, byteCount, chars, charIndex);
202 // Get a decoder that handles a rolling UHC state.
203 public override Decoder GetDecoder()
205 return new KoreanDecoder (GetConvert (), useUHC);
208 // Decoder that handles a rolling UHC state.
209 private sealed class KoreanDecoder : DbcsDecoder
212 public KoreanDecoder (DbcsConvert convert, bool useUHC)
215 this.useUHC = useUHC;
218 int last_byte_count, last_byte_conv;
220 public override int GetCharCount (byte[] bytes, int index, int count)
222 return GetCharCount (bytes, index, count, false);
228 int GetCharCount (byte [] bytes, int index, int count, bool refresh)
230 CheckRange (bytes, index, count);
232 int lastByte = last_byte_count;
235 while (count-- > 0) {
236 int b = bytes[index++];
238 if (b <= 0x80 || b == 0xFF) { // ASCII
248 if (useUHC && lastByte < 0xa1) { // UHC Level 1
249 int ord = 8836 + (lastByte - 0x81) * 178;
251 if (b >= 0x41 && b <= 0x5A)
253 else if (b >= 0x61 && b <= 0x7A)
254 ord += b - 0x61 + 26;
255 else if (b >= 0x81 && b <= 0xFE)
256 ord += b - 0x81 + 52;
260 if (ord >= 0 && ord * 2 <= convert.n2u.Length)
261 c1 = (char)(convert.n2u[ord*2] +
262 convert.n2u[ord*2 + 1] * 256);
265 } else if (useUHC && lastByte <= 0xC6 && b < 0xA1) { // UHC Level 2
266 int ord = 14532 + (lastByte - 0xA1) * 84;
268 if (b >= 0x41 && b <= 0x5A)
270 else if (b >= 0x61 && b <= 0x7A)
271 ord += b - 0x61 + 26;
272 else if (b >= 0x81 && b <= 0xA0)
273 ord += b - 0x81 + 52;
277 if (ord >= 0 && ord * 2 <= convert.n2u.Length)
278 c1 = (char)(convert.n2u[ord*2] +
279 convert.n2u[ord*2 + 1] * 256);
282 } else if (b >= 0xA1 && b <= 0xFE) { // KS X 1001
283 int ord = ((lastByte - 0xA1) * 94 + b - 0xA1) * 2;
285 c1 = ord < 0 || ord >= convert.n2u.Length ?
286 '\0' : (char)(convert.n2u[ord] +
287 convert.n2u[ord + 1] * 256);
306 last_byte_count = lastByte;
311 public override int GetChars(byte[] bytes, int byteIndex,
312 int byteCount, char[] chars, int charIndex)
314 return GetChars (bytes, byteIndex, byteCount, chars, charIndex, false);
320 int GetChars(byte[] bytes, int byteIndex,
321 int byteCount, char[] chars, int charIndex, bool refresh)
323 CheckRange (bytes, byteIndex, byteCount, chars, charIndex);
324 int origIndex = charIndex;
325 int lastByte = last_byte_conv;
327 while (byteCount-- > 0) {
328 int b = bytes[byteIndex++];
330 if (b <= 0x80 || b == 0xFF) { // ASCII
331 chars[charIndex++] = (char)b;
340 if (useUHC && lastByte < 0xa1) { // UHC Level 1
341 int ord = 8836 + (lastByte - 0x81) * 178;
343 if (b >= 0x41 && b <= 0x5A)
345 else if (b >= 0x61 && b <= 0x7A)
346 ord += b - 0x61 + 26;
347 else if (b >= 0x81 && b <= 0xFE)
348 ord += b - 0x81 + 52;
352 if (ord >= 0 && ord * 2 <= convert.n2u.Length)
353 c1 = (char)(convert.n2u[ord*2] +
354 convert.n2u[ord*2 + 1] * 256);
357 } else if (useUHC && lastByte <= 0xC6 && b < 0xA1) { // UHC Level 2
358 int ord = 14532 + (lastByte - 0xA1) * 84;
360 if (b >= 0x41 && b <= 0x5A)
362 else if (b >= 0x61 && b <= 0x7A)
363 ord += b - 0x61 + 26;
364 else if (b >= 0x81 && b <= 0xA0)
365 ord += b - 0x81 + 52;
369 if (ord >= 0 && ord * 2 <= convert.n2u.Length)
370 c1 = (char)(convert.n2u[ord*2] +
371 convert.n2u[ord*2 + 1] * 256);
374 } else if (b >= 0xA1 && b <= 0xFE) { // KS X 1001
375 int ord = ((lastByte - 0xA1) * 94 + b - 0xA1) * 2;
377 c1 = ord < 0 || ord >= convert.n2u.Length ?
378 '\0' : (char)(convert.n2u[ord] +
379 convert.n2u[ord + 1] * 256);
384 chars[charIndex++] = '?';
386 chars[charIndex++] = c1;
392 chars[charIndex++] = '?';
396 last_byte_conv = lastByte;
398 return charIndex - origIndex;
404 internal class ENCuhc : CP949
410 internal class ENCeuc_kr: CP51949
412 public ENCeuc_kr() {}