7 // find out what is the difference between 50220, 50221 and 50222.
9 public class CP50220 : ISO2022JPEncoding
16 public override int CodePage {
20 public override string EncodingName {
21 get { return "Japanese (JIS)"; }
25 public class CP50221 : ISO2022JPEncoding
32 public override int CodePage {
36 public override string EncodingName {
37 get { return "Japanese (JIS-Allow 1 byte Kana)"; }
41 public class CP50222 : ISO2022JPEncoding
48 public override int CodePage {
52 public override string EncodingName {
53 get { return "Japanese (JIS-Allow 1 byte Kana - SO/SI)"; }
57 public class ISO2022JPEncoding : Encoding
59 static JISConvert convert = JISConvert.Convert;
61 public ISO2022JPEncoding (bool allow1ByteKana, bool allowShiftIO)
63 this.allow_1byte_kana = allow1ByteKana;
64 this.allow_shift_io = allowShiftIO;
67 readonly bool allow_1byte_kana, allow_shift_io;
69 public override string BodyName {
70 get { return "iso-2022-jp"; }
73 public override string HeaderName {
74 get { return "iso-2022-jp"; }
77 public override string WebName {
78 get { return "csISO2022JP"; }
81 public override int GetMaxByteCount (int charCount)
83 // ESC w ESC s ESC w ... (even number) ESC s
84 return charCount / 2 * 5 + 4;
87 public override int GetMaxCharCount (int byteCount)
93 public override int GetByteCount (char [] chars, int charIndex, int charCount)
95 return new ISO2022JPEncoder (allow_1byte_kana, allow_shift_io).GetByteCount (chars, charIndex, charCount, true);
98 public override int GetBytes (char [] chars, int charIndex, int charCount, byte [] bytes, int byteIndex)
100 return new ISO2022JPEncoder (allow_1byte_kana, allow_shift_io).GetBytes (chars, charIndex, charCount, bytes, byteIndex, true);
103 public override int GetCharCount (byte [] bytes, int index, int count)
105 return new ISO2022JPDecoder (allow_1byte_kana, allow_shift_io).GetCharCount (bytes, index, count);
108 public override int GetChars (byte [] bytes, int byteIndex, int byteCount, char [] chars, int charIndex)
110 return new ISO2022JPDecoder (allow_1byte_kana, allow_shift_io).GetChars (bytes, byteIndex, byteCount, chars, charIndex);
114 internal enum ISO2022JPMode {
120 internal class ISO2022JPEncoder : Encoder
122 static JISConvert convert = JISConvert.Convert;
124 readonly bool allow_1byte_kana, allow_shift_io;
126 ISO2022JPMode m = ISO2022JPMode.ASCII;
128 public ISO2022JPEncoder (bool allow1ByteKana, bool allowShiftIO)
130 this.allow_1byte_kana = allow1ByteKana;
131 this.allow_shift_io = allowShiftIO;
134 public override int GetByteCount (char [] chars, int charIndex, int charCount, bool flush)
136 int end = charIndex + charCount;
140 for (int i = charIndex; i < end; i++) {
142 if (ch >= 0x2010 && ch <= 0x9FA5)
144 if (m != ISO2022JPMode.JISX0208)
146 m = ISO2022JPMode.JISX0208;
147 // This range contains the bulk of the CJK set.
148 value = (ch - 0x2010) * 2;
149 value = ((int)(convert.cjkToJis[value])) |
150 (((int)(convert.cjkToJis[value + 1])) << 8);
151 } else if (ch >= 0xFF01 && ch <= 0xFF60) {
152 if (m != ISO2022JPMode.JISX0208)
154 m = ISO2022JPMode.JISX0208;
156 // This range contains extra characters,
157 value = (ch - 0xFF01) * 2;
158 value = ((int)(convert.extraToJis[value])) |
159 (((int)(convert.extraToJis[value + 1])) << 8);
160 } else if(ch >= 0xFF60 && ch <= 0xFFA0) {
161 if (m != ISO2022JPMode.JISX0201)
163 m = ISO2022JPMode.JISX0201;
164 value = ch - 0xFF60 + 0xA0;
165 } else if (ch < 128) {
166 if (m != ISO2022JPMode.ASCII)
168 m = ISO2022JPMode.ASCII;
171 // skip non-convertible character
179 // must end in ASCII mode
180 if (flush && m != ISO2022JPMode.ASCII) {
182 m = ISO2022JPMode.ASCII;
187 // returns false if it failed to add required ESC.
188 private bool SwitchMode (byte [] bytes, ref int byteIndex,
189 ISO2022JPMode cur, ISO2022JPMode next)
193 if (bytes.Length <= byteIndex + 3)
195 bytes [byteIndex++] = 0x1B;
196 bytes [byteIndex++] = (byte) (next == ISO2022JPMode.JISX0208 ? 0x24 : 0x28);
197 bytes [byteIndex++] = (byte) (next == ISO2022JPMode.JISX0201 ? 0x49 : 0x42);
201 public override int GetBytes (char [] chars, int charIndex, int charCount, byte [] bytes, int byteIndex, bool flush)
204 int start = byteIndex;
206 int end = charIndex + charCount;
209 for (int i = charIndex; i < end &&
210 byteIndex < bytes.Length + (wide ? 1 : 0); i++) {
212 if (ch >= 0x2010 && ch <= 0x9FA5)
214 if (!SwitchMode (bytes, ref byteIndex, m, ISO2022JPMode.JISX0208))
216 m = ISO2022JPMode.JISX0208;
217 // This range contains the bulk of the CJK set.
218 value = (ch - 0x2010) * 2;
219 value = ((int)(convert.cjkToJis[value])) |
220 (((int)(convert.cjkToJis[value + 1])) << 8);
221 } else if (ch >= 0xFF01 && ch <= 0xFF60) {
222 if (!SwitchMode (bytes, ref byteIndex, m, ISO2022JPMode.JISX0208))
224 m = ISO2022JPMode.JISX0208;
226 // This range contains extra characters,
227 value = (ch - 0xFF01) * 2;
228 value = ((int)(convert.extraToJis[value])) |
229 (((int)(convert.extraToJis[value + 1])) << 8);
230 } else if(ch >= 0xFF60 && ch <= 0xFFA0) {
231 if (!SwitchMode (bytes, ref byteIndex, m, ISO2022JPMode.JISX0201))
233 m = ISO2022JPMode.JISX0201;
234 value = ch - 0xFF60 + 0xA0;
235 } else if (ch < 128) {
236 if (!SwitchMode (bytes, ref byteIndex, m, ISO2022JPMode.ASCII))
238 m = ISO2022JPMode.ASCII;
241 // skip non-convertible character
244 //Console.WriteLine ("{0:X04} : {1:x02} {2:x02}", v, (int) v / 94 + 33, v % 94 + 33);
247 bytes [byteIndex++] = (byte) (value / 94 + 33);
248 bytes [byteIndex++] = (byte) (value % 94 + 33);
251 bytes [byteIndex++] = (byte) value;
254 // must end in ASCII mode
255 SwitchMode (bytes, ref byteIndex, m, ISO2022JPMode.ASCII);
256 m = ISO2022JPMode.ASCII;
258 return byteIndex - start;
262 internal class ISO2022JPDecoder : Decoder
264 static JISConvert convert = JISConvert.Convert;
266 readonly bool allow_1byte_kana, allow_shift_io;
268 public ISO2022JPDecoder (bool allow1ByteKana, bool allowShiftIO)
270 this.allow_1byte_kana = allow1ByteKana;
271 this.allow_shift_io = allowShiftIO;
275 public override int GetCharCount (byte [] bytes, int index, int count)
279 int end = index + count;
280 for (int i = index; i < end; i++) {
281 if (bytes [i] != 0x1B) {
286 break; // incomplete escape sequence
288 if (bytes [i] != 0x24 &&
290 throw new ArgumentException ("Unexpected ISO-2022-JP escape sequence.");
292 if (bytes [i] != 0x42)
293 throw new ArgumentException ("Unexpected ISO-2022-JP escape sequence.");
300 private char ToChar (int value)
303 return value >= convert.jisx0208ToUnicode.Length ? '?' :
304 (char) (((int) (convert.jisx0208ToUnicode [value])) |
305 (((int) (convert.jisx0208ToUnicode [value + 1])) << 8));
308 public override int GetChars (byte [] bytes, int byteIndex, int byteCount, char [] chars, int charIndex)
310 ISO2022JPMode m = ISO2022JPMode.ASCII;
311 int start = charIndex;
312 int end = byteIndex + byteCount;
313 for (int i = byteIndex; i < end && charIndex < chars.Length; i++) {
314 if (bytes [i] != 0x1B) {
315 if (m == ISO2022JPMode.JISX0208) {
317 break; // incomplete head of wide char
319 // am so lazy, so reusing jis2sjis and
320 int s1 = ((bytes [i] - 1) >> 1) + ((bytes [i] <= 0x5e) ? 0x71 : 0xb1);
321 int s2 = bytes [i + 1] + (((bytes [i] & 1) != 0) ? 0x20 : 0x7e);
322 int v = (s1 - 0x81) * 0xBC;
325 chars [charIndex++] = ToChar (v);
328 else if (m == ISO2022JPMode.JISX0201)
329 chars [charIndex++] = (char) (bytes [i] + 0xFF40);
330 // LAMESPEC: actually this should not
331 // be allowed when 1byte-kana is not
332 // allowed, but MS.NET seems to allow
334 else if (bytes [i] > 0xA0 && bytes [i] < 0xE0) // half-width Katakana
335 chars [charIndex++] = (char) (bytes [i] - 0xA0 + 0xFF60);
337 chars [charIndex++] = (char) bytes [i];
341 break; // incomplete escape sequence
344 if (bytes [i] == 0x24)
346 else if (bytes [i] == 0x28)
349 throw new ArgumentException ("Unexpected ISO-2022-JP escape sequence.");
351 if (bytes [i] == 0x42)
352 m = wide ? ISO2022JPMode.JISX0208 : ISO2022JPMode.ASCII;
353 else if (bytes [i] == 0x49)
354 m = ISO2022JPMode.JISX0201;
356 throw new ArgumentException (String.Format ("Unexpected ISO-2022-JP escape sequence. Ended with 0x{0:X04}", bytes [i]));
360 return charIndex - start;
364 public class ENCiso_2022_jp : CP50220
366 public ENCiso_2022_jp () : base() {}
368 }; // class ENCiso_2022_jp